diff --git a/.mailmap b/.mailmap index 05877be16ce29..3c703fb4aaab9 100644 --- a/.mailmap +++ b/.mailmap @@ -28,9 +28,8 @@ -Jon Roelofs Jon Roelofs -Jon Roelofs Jonathan Roelofs -Jon Roelofs Jonathan Roelofs +Jon Roelofs +Jon Roelofs LLVM GN Syncbot Martin Storsjö -Saleem Abdulrasool +Saleem Abdulrasool diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.cpp index 43812fe17a1c7..a191598415217 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.cpp @@ -433,17 +433,25 @@ void ProTypeMemberInitCheck::checkMissingMemberInitializer( [&](const FieldDecl *F) { OrderedFields.push_back(F); }); // Collect all the fields we need to initialize, including indirect fields. + // It only includes fields that have not been fixed SmallPtrSet AllFieldsToInit; - forEachField(ClassDecl, FieldsToInit, - [&](const FieldDecl *F) { AllFieldsToInit.insert(F); }); - if (AllFieldsToInit.empty()) + forEachField(ClassDecl, FieldsToInit, [&](const FieldDecl *F) { + if (!HasRecordClassMemberSet.contains(F)) { + AllFieldsToInit.insert(F); + HasRecordClassMemberSet.insert(F); + } + }); + if (FieldsToInit.empty()) return; DiagnosticBuilder Diag = diag(Ctor ? Ctor->getBeginLoc() : ClassDecl.getLocation(), "%select{|union }0constructor %select{does not|should}0 initialize " "%select{|one of }0these fields: %1") - << IsUnion << toCommaSeparatedString(OrderedFields, AllFieldsToInit); + << IsUnion << toCommaSeparatedString(OrderedFields, FieldsToInit); + + if (AllFieldsToInit.empty()) + return; // Do not propose fixes for constructors in macros since we cannot place them // correctly. diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.h index 5b4144396eab4..af7b14ec68ad9 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.h @@ -10,6 +10,7 @@ #define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PRO_TYPE_MEMBER_INIT_H #include "../ClangTidyCheck.h" +#include "llvm/ADT/DenseSet.h" namespace clang { namespace tidy { @@ -72,6 +73,10 @@ class ProTypeMemberInitCheck : public ClangTidyCheck { // instead of brace initialization. Only effective in C++11 mode. Default is // false. bool UseAssignment; + + // Record the member variables that have been initialized to prevent repeated + // initialization. + llvm::DenseSet HasRecordClassMemberSet; }; } // namespace cppcoreguidelines diff --git a/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.cpp b/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.cpp index fe25f7a7ccbcc..7dc519c152828 100644 --- a/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.cpp @@ -186,6 +186,10 @@ BracesAroundStatementsCheck::findRParenLoc(const IfOrWhileStmt *S, bool BracesAroundStatementsCheck::checkStmt( const MatchFinder::MatchResult &Result, const Stmt *S, SourceLocation InitialLoc, SourceLocation EndLocHint) { + + while (const auto *AS = dyn_cast(S)) + S = AS->getSubStmt(); + // 1) If there's a corresponding "else" or "while", the check inserts "} " // right before that token. // 2) If there's a multi-line block comment starting on the same line after diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt index 476de70654cf9..056a3272ebd11 100644 --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -83,6 +83,7 @@ add_clang_library(clangDaemon HeaderSourceSwitch.cpp HeuristicResolver.cpp Hover.cpp + IncludeCleaner.cpp IncludeFixer.cpp InlayHints.cpp JSONTransport.cpp diff --git a/clang-tools-extra/clangd/CompileCommands.cpp b/clang-tools-extra/clangd/CompileCommands.cpp index 57933169f9119..f9e283beca64c 100644 --- a/clang-tools-extra/clangd/CompileCommands.cpp +++ b/clang-tools-extra/clangd/CompileCommands.cpp @@ -127,7 +127,6 @@ const llvm::Optional detectSysroot() { if (::getenv("SDKROOT")) return llvm::None; return queryXcrun({"xcrun", "--show-sdk-path"}); - return llvm::None; } std::string detectStandardResourceDir() { diff --git a/clang-tools-extra/clangd/IncludeCleaner.cpp b/clang-tools-extra/clangd/IncludeCleaner.cpp new file mode 100644 index 0000000000000..dbf19d4e08f95 --- /dev/null +++ b/clang-tools-extra/clangd/IncludeCleaner.cpp @@ -0,0 +1,112 @@ +//===--- IncludeCleaner.cpp - Unused/Missing Headers Analysis ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "IncludeCleaner.h" +#include "support/Logger.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Basic/SourceLocation.h" + +namespace clang { +namespace clangd { +namespace { + +/// Crawler traverses the AST and feeds in the locations of (sometimes +/// implicitly) used symbols into \p Result. +class ReferencedLocationCrawler + : public RecursiveASTVisitor { +public: + ReferencedLocationCrawler(ReferencedLocations &Result) : Result(Result) {} + + bool VisitDeclRefExpr(DeclRefExpr *DRE) { + add(DRE->getDecl()); + add(DRE->getFoundDecl()); + return true; + } + + bool VisitMemberExpr(MemberExpr *ME) { + add(ME->getMemberDecl()); + add(ME->getFoundDecl().getDecl()); + return true; + } + + bool VisitTagType(TagType *TT) { + add(TT->getDecl()); + return true; + } + + bool VisitCXXConstructExpr(CXXConstructExpr *CCE) { + add(CCE->getConstructor()); + return true; + } + + bool VisitTemplateSpecializationType(TemplateSpecializationType *TST) { + if (isNew(TST)) { + add(TST->getTemplateName().getAsTemplateDecl()); // Primary template. + add(TST->getAsCXXRecordDecl()); // Specialization + } + return true; + } + + bool VisitTypedefType(TypedefType *TT) { + add(TT->getDecl()); + return true; + } + + // Consider types of any subexpression used, even if the type is not named. + // This is helpful in getFoo().bar(), where Foo must be complete. + // FIXME(kirillbobyrev): Should we tweak this? It may not be desirable to + // consider types "used" when they are not directly spelled in code. + bool VisitExpr(Expr *E) { + TraverseType(E->getType()); + return true; + } + + bool TraverseType(QualType T) { + if (isNew(T.getTypePtrOrNull())) { // don't care about quals + Base::TraverseType(T); + } + return true; + } + + bool VisitUsingDecl(UsingDecl *D) { + for (const auto *Shadow : D->shadows()) { + add(Shadow->getTargetDecl()); + } + return true; + } + +private: + using Base = RecursiveASTVisitor; + + void add(const Decl *D) { + if (!D || !isNew(D->getCanonicalDecl())) { + return; + } + for (const Decl *Redecl : D->redecls()) { + Result.insert(Redecl->getLocation()); + } + } + + bool isNew(const void *P) { return P && Visited.insert(P).second; } + + ReferencedLocations &Result; + llvm::DenseSet Visited; +}; + +} // namespace + +ReferencedLocations findReferencedLocations(ParsedAST &AST) { + ReferencedLocations Result; + ReferencedLocationCrawler Crawler(Result); + Crawler.TraverseAST(AST.getASTContext()); + // FIXME(kirillbobyrev): Handle macros. + return Result; +} + +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/IncludeCleaner.h b/clang-tools-extra/clangd/IncludeCleaner.h new file mode 100644 index 0000000000000..ca9f79c0f3b03 --- /dev/null +++ b/clang-tools-extra/clangd/IncludeCleaner.h @@ -0,0 +1,52 @@ +//===--- IncludeCleaner.h - Unused/Missing Headers Analysis -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Include Cleaner is clangd functionality for providing diagnostics for misuse +/// of transitive headers and unused includes. It is inspired by +/// Include-What-You-Use tool (https://include-what-you-use.org/). Our goal is +/// to provide useful warnings in most popular scenarios but not 1:1 exact +/// feature compatibility. +/// +/// FIXME(kirillbobyrev): Add support for IWYU pragmas. +/// FIXME(kirillbobyrev): Add support for standard library headers. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INCLUDE_CLEANER_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INCLUDE_CLEANER_H + +#include "Headers.h" +#include "ParsedAST.h" +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/DenseSet.h" + +namespace clang { +namespace clangd { + +using ReferencedLocations = llvm::DenseSet; +/// Finds locations of all symbols used in the main file. +/// +/// Uses RecursiveASTVisitor to go through main file AST and computes all the +/// locations used symbols are coming from. Returned locations may be macro +/// expansions, and are not resolved to their spelling/expansion location. These +/// locations are later used to determine which headers should be marked as +/// "used" and "directly used". +/// +/// We use this to compute unused headers, so we: +/// +/// - cover the whole file in a single traversal for efficiency +/// - don't attempt to describe where symbols were referenced from in +/// ambiguous cases (e.g. implicitly used symbols, multiple declarations) +/// - err on the side of reporting all possible locations +ReferencedLocations findReferencedLocations(ParsedAST &AST); + +} // namespace clangd +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INCLUDE_CLEANER_H diff --git a/clang-tools-extra/clangd/InlayHints.cpp b/clang-tools-extra/clangd/InlayHints.cpp index 1283aa4dd62cc..7c3c6a2421d83 100644 --- a/clang-tools-extra/clangd/InlayHints.cpp +++ b/clang-tools-extra/clangd/InlayHints.cpp @@ -13,6 +13,7 @@ #include "clang/AST/ExprCXX.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/Basic/SourceManager.h" +#include "llvm/Support/raw_ostream.h" namespace clang { namespace clangd { @@ -314,6 +315,10 @@ class InlayHintVisitor : public RecursiveASTVisitor { toHalfOpenFileRange(AST.getSourceManager(), AST.getLangOpts(), R); if (!FileRange) return; + // The hint may be in a file other than the main file (for example, a header + // file that was included after the preamble), do not show in that case. + if (!AST.getSourceManager().isWrittenInMainFile(FileRange->getBegin())) + return; Results.push_back(InlayHint{ Range{ sourceLocToPosition(AST.getSourceManager(), FileRange->getBegin()), diff --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt b/clang-tools-extra/clangd/unittests/CMakeLists.txt index 2f5a754f882ae..8fc4cf414bdb1 100644 --- a/clang-tools-extra/clangd/unittests/CMakeLists.txt +++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt @@ -58,6 +58,7 @@ add_unittest(ClangdUnitTests ClangdTests HeadersTests.cpp HeaderSourceSwitchTests.cpp HoverTests.cpp + IncludeCleanerTests.cpp IndexActionTests.cpp IndexTests.cpp InlayHintTests.cpp diff --git a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp new file mode 100644 index 0000000000000..718076adc762a --- /dev/null +++ b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp @@ -0,0 +1,136 @@ +//===--- IncludeCleanerTests.cpp --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Annotations.h" +#include "IncludeCleaner.h" +#include "TestTU.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace clang { +namespace clangd { +namespace { + +TEST(IncludeCleaner, ReferencedLocations) { + struct TestCase { + std::string HeaderCode; + std::string MainCode; + }; + TestCase Cases[] = { + // DeclRefExpr + { + "int ^x();", + "int y = x();", + }, + // RecordDecl + { + "class ^X;", + "X *y;", + }, + // TypedefType and UsingDecls + { + "using ^Integer = int;", + "Integer x;", + }, + { + "namespace ns { struct ^X; struct ^X {}; }", + "using ns::X;", + }, + { + "namespace ns { struct X; struct X {}; }", + "using namespace ns;", + }, + { + "struct ^A {}; using B = A; using ^C = B;", + "C a;", + }, + { + "typedef bool ^Y; template struct ^X {};", + "X x;", + }, + { + "struct Foo; struct ^Foo{}; typedef Foo ^Bar;", + "Bar b;", + }, + // MemberExpr + { + "struct ^X{int ^a;}; X ^foo();", + "int y = foo().a;", + }, + // Expr (type is traversed) + { + "class ^X{}; X ^foo();", + "auto bar() { return foo(); }", + }, + // Redecls + { + "class ^X; class ^X{}; class ^X;", + "X *y;", + }, + // Constructor + { + "struct ^X { ^X(int) {} int ^foo(); };", + "auto x = X(42); auto y = x.foo();", + }, + // Static function + { + "struct ^X { static bool ^foo(); }; bool X::^foo() {}", + "auto b = X::foo();", + }, + // TemplateRecordDecl + { + "template class ^X;", + "X *y;", + }, + // Type name not spelled out in code + { + "class ^X{}; X ^getX();", + "auto x = getX();", + }, + // Enums + { + "enum ^Color { ^Red = 42, Green = 9000};", + "int MyColor = Red;", + }, + { + "struct ^X { enum ^Language { ^CXX = 42, Python = 9000}; };", + "int Lang = X::CXX;", + }, + { + // When a type is resolved via a using declaration, the + // UsingShadowDecl is not referenced in the AST. + // Compare to TypedefType, or DeclRefExpr::getFoundDecl(). + // ^ + "namespace ns { class ^X; }; using ns::X;", + "X *y;", + }}; + for (const TestCase &T : Cases) { + TestTU TU; + TU.Code = T.MainCode; + Annotations Header(T.HeaderCode); + TU.HeaderCode = Header.code().str(); + auto AST = TU.build(); + + std::vector Points; + for (const auto &Loc : findReferencedLocations(AST)) { + if (AST.getSourceManager().getBufferName(Loc).endswith( + TU.HeaderFilename)) { + Points.push_back(offsetToPosition( + TU.HeaderCode, AST.getSourceManager().getFileOffset(Loc))); + } + } + llvm::sort(Points); + + EXPECT_EQ(Points, Header.points()) << T.HeaderCode << "\n---\n" + << T.MainCode; + } +} + +} // namespace +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/unittests/InlayHintTests.cpp b/clang-tools-extra/clangd/unittests/InlayHintTests.cpp index 1410ed115b6bf..6796a8ce70fff 100644 --- a/clang-tools-extra/clangd/unittests/InlayHintTests.cpp +++ b/clang-tools-extra/clangd/unittests/InlayHintTests.cpp @@ -9,6 +9,7 @@ #include "InlayHints.h" #include "Protocol.h" #include "TestTU.h" +#include "TestWorkspace.h" #include "XRefs.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -398,6 +399,28 @@ TEST(ParameterHints, SetterFunctions) { ExpectedHint{"timeout_millis: ", "timeout_millis"}); } +TEST(ParameterHints, IncludeAtNonGlobalScope) { + Annotations FooInc(R"cpp( + void bar() { foo(42); } + )cpp"); + Annotations FooCC(R"cpp( + struct S { + void foo(int param); + #include "foo.inc" + }; + )cpp"); + + TestWorkspace Workspace; + Workspace.addSource("foo.inc", FooInc.code()); + Workspace.addMainFile("foo.cc", FooCC.code()); + + auto AST = Workspace.openFile("foo.cc"); + ASSERT_TRUE(bool(AST)); + + // Ensure the hint for the call in foo.inc is NOT materialized in foo.cc. + EXPECT_EQ(hintsOfKind(*AST, InlayHintKind::ParameterHint).size(), 0u); +} + TEST(TypeHints, Smoke) { assertTypeHints(R"cpp( auto $waldo[[waldo]] = 42; diff --git a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-pro-type-member-init.cpp b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-pro-type-member-init.cpp index 403f28baf99d4..8cab4fd755752 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-pro-type-member-init.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-pro-type-member-init.cpp @@ -208,9 +208,8 @@ struct PositiveMultipleConstructors { PositiveMultipleConstructors(const PositiveMultipleConstructors &) {} // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: constructor does not initialize these fields: A, B - // FIXME: The fix-its here collide providing an erroneous fix int A, B; - // CHECK-FIXES: int A{}{}{}, B{}{}{}; + // CHECK-FIXES: int A{}, B{}; }; typedef struct { diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability-braces-around-statements-attributes.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability-braces-around-statements-attributes.cpp new file mode 100644 index 0000000000000..e799614a1f7b0 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/readability-braces-around-statements-attributes.cpp @@ -0,0 +1,24 @@ +// RUN: %check_clang_tidy -std=c++20-or-later %s readability-braces-around-statements %t + +void test(bool b) { + if (b) { + return; + } + if (b) [[likely]] { + // CHECK-FIXES-NOT: if (b) { {{[[][[]}}likely{{[]][]]}} { + return; + } + if (b) [[unlikely]] { + // CHECK-FIXES-NOT: if (b) { {{[[][[]}}unlikely{{[]][]]}} { + return; + } + + if (b) [[likely]] + // CHECK-FIXES: if (b) {{[[][[]}}likely{{[]][]]}} { + return; + // CHECK-FIXES: } + if (b) [[unlikely]] + // CHECK-FIXES: if (b) {{[[][[]}}unlikely{{[]][]]}} { + return; + // CHECK-FIXES: } +} diff --git a/clang/docs/AddressSanitizer.rst b/clang/docs/AddressSanitizer.rst index 7befbc3173da8..06b53e2e5da0b 100644 --- a/clang/docs/AddressSanitizer.rst +++ b/clang/docs/AddressSanitizer.rst @@ -282,11 +282,11 @@ Code generation control Instrumentation code outlining ------------------------------ -By default AddressSanitizer inlines the instumentation code to improve the +By default AddressSanitizer inlines the instrumentation code to improve the run-time performance, which leads to increased binary size. Using the (clang flag ``-fsanitize-address-outline-instrumentation` default: ``false``) -flag forces all code instumentation to be outlined, which reduces the size -of the generated code, but also reduces the run-time performace. +flag forces all code instrumentation to be outlined, which reduces the size +of the generated code, but also reduces the run-time performance. Limitations =========== diff --git a/clang/docs/ClangCommandLineReference.rst b/clang/docs/ClangCommandLineReference.rst index 2770d28a9f3b0..dcbfba3aa836c 100644 --- a/clang/docs/ClangCommandLineReference.rst +++ b/clang/docs/ClangCommandLineReference.rst @@ -1705,7 +1705,7 @@ Enable support for int128\_t type .. option:: -ffp-contract= -Form fused FP ops (e.g. FMAs): fast (fuses across statements disregarding pragmas) \| on (only fuses in the same statement unless dictated by pragmas) \| off (never fuses) \| fast-honor-pragmas (fuses across statements unless diectated by pragmas). Default is 'fast' for CUDA, 'fast-honor-pragmas' for HIP, and 'on' otherwise. +Form fused FP ops (e.g. FMAs): fast (fuses across statements disregarding pragmas) \| on (only fuses in the same statement unless dictated by pragmas) \| off (never fuses) \| fast-honor-pragmas (fuses across statements unless dictated by pragmas). Default is 'fast' for CUDA, 'fast-honor-pragmas' for HIP, and 'on' otherwise. .. option:: -ffp-exception-behavior= @@ -2543,7 +2543,7 @@ Give global types 'default' visibility and global functions and variables 'hidde .. option:: -fvisibility-nodllstorageclass= -The visibility for defintiions without an explicit DLL export class \[-fvisibility-from-dllstorageclass\] +The visibility for definitions without an explicit DLL export class \[-fvisibility-from-dllstorageclass\] .. option:: -fvisibility= diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index 96d89db7a5ccf..d8ac58734dc4a 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -741,8 +741,7 @@ the configuration (without a prefix: ``Auto``). enum { A, B } myEnum; false: - enum - { + enum { A, B } myEnum; @@ -3542,7 +3541,7 @@ the configuration (without a prefix: ``Auto``). ForEach and If macros. This is useful in projects where ForEach/If macros are treated as function calls instead of control statements. ``SBPO_ControlStatementsExceptForEachMacros`` remains an alias for - backward compatability. + backward compatibility. .. code-block:: c++ diff --git a/clang/docs/DataFlowSanitizerDesign.rst b/clang/docs/DataFlowSanitizerDesign.rst index bed4d2f38cba5..4f028de2ed09d 100644 --- a/clang/docs/DataFlowSanitizerDesign.rst +++ b/clang/docs/DataFlowSanitizerDesign.rst @@ -139,7 +139,7 @@ Origin tracking trace representation ------------------------------------ An origin tracking trace is a list of chains. Each chain has a stack trace -where the DFSan runtime records a label propapation, and a pointer to its +where the DFSan runtime records a label propagation, and a pointer to its previous chain. The very first chain does not point to any chain. Every four 4-bytes aligned application bytes share a 4-byte origin trace ID. A diff --git a/clang/docs/IntroductionToTheClangAST.rst b/clang/docs/IntroductionToTheClangAST.rst index 286ab88d01ef6..6fbb8f1d6440e 100644 --- a/clang/docs/IntroductionToTheClangAST.rst +++ b/clang/docs/IntroductionToTheClangAST.rst @@ -32,7 +32,7 @@ clang ParenExpr). Examining the AST ================= -A good way to familarize yourself with the Clang AST is to actually look +A good way to familiarize yourself with the Clang AST is to actually look at it on some simple example code. Clang has a builtin AST-dump mode, which can be enabled with the flag ``-ast-dump``. diff --git a/clang/docs/MemorySanitizer.rst b/clang/docs/MemorySanitizer.rst index 3ba5ce5bed3ee..c6fc9407ea15f 100644 --- a/clang/docs/MemorySanitizer.rst +++ b/clang/docs/MemorySanitizer.rst @@ -85,6 +85,15 @@ particular function. MemorySanitizer may still instrument such functions to avoid false positives. This attribute may not be supported by other compilers, so we suggest to use it together with ``__has_feature(memory_sanitizer)``. +``__attribute__((disable_sanitizer_instrumentation))`` +-------------------------------------------------------- + +The ``disable_sanitizer_instrumentation`` attribute can be applied to functions +to prevent all kinds of instrumentation. As a result, it may introduce false +positives and therefore should be used with care, and only if absolutely +required; for example for certain code that cannot tolerate any instrumentation +and resulting side-effects. This attribute overrides ``no_sanitize("memory")``. + Ignorelist ---------- diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index 7291a5cfc5a0f..b1aa2ca65f14b 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -133,9 +133,9 @@ implementation. +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ | loop extension | inclusive scan extension (matching C++17 PSTL) | :good:`done` | | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| memory mangagement | memory allocators | :good:`done` | r341687,r357929 | +| memory management | memory allocators | :good:`done` | r341687,r357929 | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| memory mangagement | allocate directive and allocate clause | :good:`done` | r355614,r335952 | +| memory management | allocate directive and allocate clause | :good:`done` | r355614,r335952 | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ | OMPD | OMPD interfaces | :part:`not upstream` | https://github.com/OpenMPToolsInterface/LLVM-openmp/tree/ompd-tests | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 7604df7482c76..f728f5b4fcfc4 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -100,7 +100,9 @@ Windows Support C Language Changes in Clang --------------------------- -- ... +- Wide multi-characters literals such as ``L'ab'`` that would previously be interpreted as ``L'b'`` + are now ill-formed in all language modes. The motivation for this change is outlined in + `P2362 `_. C++ Language Changes in Clang ----------------------------- diff --git a/clang/include/clang/AST/ComparisonCategories.h b/clang/include/clang/AST/ComparisonCategories.h index b41e934142ee4..fb648b322b61c 100644 --- a/clang/include/clang/AST/ComparisonCategories.h +++ b/clang/include/clang/AST/ComparisonCategories.h @@ -115,8 +115,7 @@ class ComparisonCategoryInfo { public: /// The declaration for the comparison category type from the /// standard library. - // FIXME: Make this const - CXXRecordDecl *Record = nullptr; + const CXXRecordDecl *Record = nullptr; /// The Kind of the comparison category type ComparisonCategoryType Kind; diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index 27241cfa9e4a4..b223b413c7556 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -615,7 +615,9 @@ class NamespaceDecl : public NamedDecl, public DeclContext, if (!isInline()) return false; auto X = lookup(Name); - auto Y = getParent()->lookup(Name); + // We should not perform a lookup within a transparent context, so find a + // non-transparent parent context. + auto Y = getParent()->getNonTransparentContext()->lookup(Name); return std::distance(X.begin(), X.end()) == std::distance(Y.begin(), Y.end()); } diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h index 482d2889a25a1..18468c8ca1c47 100644 --- a/clang/include/clang/AST/DeclBase.h +++ b/clang/include/clang/AST/DeclBase.h @@ -1997,6 +1997,12 @@ class DeclContext { return const_cast(this)->getNonClosureAncestor(); } + // Retrieve the nearest context that is not a transparent context. + DeclContext *getNonTransparentContext(); + const DeclContext *getNonTransparentContext() const { + return const_cast(this)->getNonTransparentContext(); + } + /// getPrimaryContext - There may be many different /// declarations of the same entity (including forward declarations /// of classes, multiple definitions of namespaces, etc.), each with diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index 1bc2e955ddc73..3741ea9989b6e 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -3465,10 +3465,6 @@ class ConstantMatrixType final : public MatrixType { protected: friend class ASTContext; - /// The element type of the matrix. - // FIXME: Appears to be unused? There is also MatrixType::ElementType... - QualType ElementType; - /// Number of rows and columns. unsigned NumRows; unsigned NumColumns; diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index a5df8351e8c8a..fd99e677b1cc1 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -3607,6 +3607,13 @@ def NoSanitizeSpecific : InheritableAttr { let ASTNode = 0; } +def DisableSanitizerInstrumentation : InheritableAttr { + let Spellings = [Clang<"disable_sanitizer_instrumentation">]; + let Subjects = SubjectList<[Function, ObjCMethod, GlobalVar]>; + let Documentation = [DisableSanitizerInstrumentationDocs]; + let SimpleHandler = 1; +} + def CFICanonicalJumpTable : InheritableAttr { let Spellings = [Clang<"cfi_canonical_jump_table">]; let Subjects = SubjectList<[Function], ErrorDiag>; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 5cb6f853910b1..891e450bb2ab4 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -3825,6 +3825,18 @@ full list of supported sanitizer flags. }]; } +def DisableSanitizerInstrumentationDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ +Use the ``disable_sanitizer_instrumentation`` attribute on a function, +Objective-C method, or global variable, to specify that no sanitizer +instrumentation should be applied. + +This is not the same as ``__attribute__((no_sanitize(...)))``, which depending +on the tool may still insert instrumentation to prevent false positive reports. + }]; +} + def NoSanitizeAddressDocs : Documentation { let Category = DocCatFunction; // This function has multiple distinct spellings, and so it requires a custom diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def index 1dac5d2371d48..634bcaed20a6f 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -243,6 +243,9 @@ TARGET_HEADER_BUILTIN(_ReadStatusReg, "LLii", "nh", "intrin.h", ALL_MS_LANGUAG TARGET_HEADER_BUILTIN(_WriteStatusReg, "viLLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_AddressOfReturnAddress, "v*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(__mulh, "SLLiSLLiSLLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(__umulh, "ULLiULLiULLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") + #undef BUILTIN #undef LANGBUILTIN #undef TARGET_HEADER_BUILTIN diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def index 04ec45aa3b747..f5120b23f8118 100644 --- a/clang/include/clang/Basic/BuiltinsWebAssembly.def +++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -119,18 +119,22 @@ TARGET_BUILTIN(__builtin_wasm_all_true_i16x8, "iV8s", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_all_true_i32x4, "iV4i", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_all_true_i64x2, "iV2LLi", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_bitmask_i8x16, "iV16Sc", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_bitmask_i16x8, "iV8s", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_bitmask_i32x4, "iV4i", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_bitmask_i64x2, "iV2LLi", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_bitmask_i8x16, "UiV16Sc", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_bitmask_i16x8, "UiV8s", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_bitmask_i32x4, "UiV4i", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_bitmask_i64x2, "UiV2LLi", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_abs_f32x4, "V4fV4f", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_abs_f64x2, "V2dV2d", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_min_f32x4, "V4fV4fV4f", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_max_f32x4, "V4fV4fV4f", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_pmin_f32x4, "V4fV4fV4f", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_pmax_f32x4, "V4fV4fV4f", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_min_f64x2, "V2dV2dV2d", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_max_f64x2, "V2dV2dV2d", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_pmin_f64x2, "V2dV2dV2d", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_pmax_f64x2, "V2dV2dV2d", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_ceil_f32x4, "V4fV4f", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_floor_f32x4, "V4fV4f", "nc", "simd128") diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index c54133c449889..f21c17ee0ebe9 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -1850,9 +1850,151 @@ TARGET_BUILTIN(__builtin_ia32_vp2intersect_d_256, "vV8iV8iUc*Uc*", "nV:256:", "a TARGET_BUILTIN(__builtin_ia32_vp2intersect_d_128, "vV4iV4iUc*Uc*", "nV:128:", "avx512vp2intersect,avx512vl") // AVX512 fp16 intrinsics +TARGET_BUILTIN(__builtin_ia32_vcomish, "iV8xV8xIiIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_addph512, "V32xV32xV32xIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_subph512, "V32xV32xV32xIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_mulph512, "V32xV32xV32xIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_divph512, "V32xV32xV32xIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_maxph512, "V32xV32xV32xIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_minph512, "V32xV32xV32xIi", "ncV:512:", "avx512fp16") + +TARGET_BUILTIN(__builtin_ia32_minph256, "V16xV16xV16x", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_minph128, "V8xV8xV8x", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_maxph256, "V16xV16xV16x", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_maxph128, "V8xV8xV8x", "ncV:128:", "avx512fp16,avx512vl") + +TARGET_BUILTIN(__builtin_ia32_addsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_divsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_mulsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_subsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_maxsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_minsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_cmpph512_mask, "UiV32xV32xIiUiIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_cmpph256_mask, "UsV16xV16xIiUs", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cmpph128_mask, "UcV8xV8xIiUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cmpsh_mask, "UcV8xV8xIiUcIi", "ncV:128:", "avx512fp16") TARGET_BUILTIN(__builtin_ia32_loadsh128_mask, "V8xV8x*V8xUc", "nV:128:", "avx512fp16") TARGET_BUILTIN(__builtin_ia32_storesh128_mask, "vV8x*V8xUc", "nV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_rcpph128_mask, "V8xV8xV8xUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_rcpph256_mask, "V16xV16xV16xUs", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_rcpph512_mask, "V32xV32xV32xUi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_rsqrtph128_mask, "V8xV8xV8xUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_rsqrtph256_mask, "V16xV16xV16xUs", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_rsqrtph512_mask, "V32xV32xV32xUi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_getmantph128_mask, "V8xV8xIiV8xUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_getmantph256_mask, "V16xV16xIiV16xUs", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_getmantph512_mask, "V32xV32xIiV32xUiIi", "ncV:512:", "avx512fp16") + +TARGET_BUILTIN(__builtin_ia32_getexpph128_mask, "V8xV8xV8xUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_getexpph256_mask, "V16xV16xV16xUs", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_getexpph512_mask, "V32xV32xV32xUiIi", "ncV:512:", "avx512fp16") + +TARGET_BUILTIN(__builtin_ia32_scalefph128_mask, "V8xV8xV8xV8xUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_scalefph256_mask, "V16xV16xV16xV16xUs", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_scalefph512_mask, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16") + +TARGET_BUILTIN(__builtin_ia32_rndscaleph_128_mask, "V8xV8xIiV8xUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_rndscaleph_256_mask, "V16xV16xIiV16xUs", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_rndscaleph_mask, "V32xV32xIiV32xUiIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_reduceph128_mask, "V8xV8xIiV8xUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_reduceph256_mask, "V16xV16xIiV16xUs", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_reduceph512_mask, "V32xV32xIiV32xUiIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_rcpsh_mask, "V8xV8xV8xV8xUc", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_rsqrtsh_mask, "V8xV8xV8xV8xUc", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_getmantsh_round_mask, "V8xV8xV8xIiV8xUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_getexpsh128_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_scalefsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_rndscalesh_round_mask, "V8xV8xV8xV8xUcIiIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_reducesh_mask, "V8xV8xV8xV8xUcIiIi", "ncV:128:", "avx512fp16") + +TARGET_BUILTIN(__builtin_ia32_sqrtph, "V8xV8x", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_sqrtph256, "V16xV16x", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_sqrtph512, "V32xV32xIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_sqrtsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_fpclassph128_mask, "UcV8xIiUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_fpclassph256_mask, "UsV16xIiUs", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_fpclassph512_mask, "UiV32xIiUi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_fpclasssh_mask, "UcV8xIiUc", "ncV:128:", "avx512fp16") + +TARGET_BUILTIN(__builtin_ia32_vcvtpd2ph128_mask, "V8xV2dV8xUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtpd2ph256_mask, "V8xV4dV8xUc", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtpd2ph512_mask, "V8xV8dV8xUcIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtph2pd128_mask, "V2dV8xV2dUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtph2pd256_mask, "V4dV8xV4dUc", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtph2pd512_mask, "V8dV8xV8dUcIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtsh2ss_round_mask, "V4fV4fV8xV4fUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtss2sh_round_mask, "V8xV8xV4fV8xUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtsd2sh_round_mask, "V8xV8xV2dV8xUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtsh2sd_round_mask, "V2dV2dV8xV2dUcIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtph2w128_mask, "V8sV8xV8sUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtph2w256_mask, "V16sV16xV16sUs", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtph2w512_mask, "V32sV32xV32sUiIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvttph2w128_mask, "V8sV8xV8sUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvttph2w256_mask, "V16sV16xV16sUs", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvttph2w512_mask, "V32sV32xV32sUiIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtw2ph128_mask, "V8xV8sV8xUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtw2ph256_mask, "V16xV16sV16xUs", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtw2ph512_mask, "V32xV32sV32xUiIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtph2uw128_mask, "V8UsV8xV8UsUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtph2uw256_mask, "V16UsV16xV16UsUs", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtph2uw512_mask, "V32UsV32xV32UsUiIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvttph2uw128_mask, "V8UsV8xV8UsUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvttph2uw256_mask, "V16UsV16xV16UsUs", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvttph2uw512_mask, "V32UsV32xV32UsUiIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtuw2ph128_mask, "V8xV8UsV8xUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtuw2ph256_mask, "V16xV16UsV16xUs", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtuw2ph512_mask, "V32xV32UsV32xUiIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtph2dq128_mask, "V4iV8xV4iUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtph2dq256_mask, "V8iV8xV8iUc", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtph2dq512_mask, "V16iV16xV16iUsIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtph2udq128_mask, "V4UiV8xV4UiUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtph2udq256_mask, "V8UiV8xV8UiUc", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtph2udq512_mask, "V16UiV16xV16UiUsIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtdq2ph128_mask, "V8xV4iV8xUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtdq2ph256_mask, "V8xV8iV8xUc", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtdq2ph512_mask, "V16xV16iV16xUsIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtudq2ph128_mask, "V8xV4UiV8xUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtudq2ph256_mask, "V8xV8UiV8xUc", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtudq2ph512_mask, "V16xV16UiV16xUsIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvttph2dq128_mask, "V4iV8xV4iUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvttph2dq256_mask, "V8iV8xV8iUc", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvttph2dq512_mask, "V16iV16xV16iUsIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvttph2udq128_mask, "V4UiV8xV4UiUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvttph2udq256_mask, "V8UiV8xV8UiUc", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvttph2udq512_mask, "V16UiV16xV16UiUsIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtqq2ph128_mask, "V8xV2OiV8xUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtqq2ph256_mask, "V8xV4OiV8xUc", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtqq2ph512_mask, "V8xV8OiV8xUcIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtph2qq128_mask, "V2OiV8xV2OiUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtph2qq256_mask, "V4OiV8xV4OiUc", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtph2qq512_mask, "V8OiV8xV8OiUcIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ph128_mask, "V8xV2UOiV8xUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ph256_mask, "V8xV4UOiV8xUc", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ph512_mask, "V8xV8UOiV8xUcIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtph2uqq128_mask, "V2UOiV8xV2UOiUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtph2uqq256_mask, "V4UOiV8xV4UOiUc", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtph2uqq512_mask, "V8UOiV8xV8UOiUcIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvttph2qq128_mask, "V2OiV8xV2OiUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvttph2qq256_mask, "V4OiV8xV4OiUc", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvttph2qq512_mask, "V8OiV8xV8OiUcIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvttph2uqq128_mask, "V2UOiV8xV2UOiUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvttph2uqq256_mask, "V4UOiV8xV4UOiUc", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvttph2uqq512_mask, "V8UOiV8xV8UOiUcIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtsh2si32, "iV8xIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtsh2usi32, "UiV8xIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtusi2sh, "V8xV8xUiIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtsi2sh, "V8xV8xiIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvttsh2si32, "iV8xIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvttsh2usi32, "UiV8xIi", "ncV:128:", "avx512fp16") + +TARGET_BUILTIN(__builtin_ia32_vcvtph2psx128_mask, "V4fV8xV4fUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtph2psx256_mask, "V8fV8xV8fUc", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtph2psx512_mask, "V16fV16xV16fUsIi", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtps2phx128_mask, "V8xV4fV8xUc", "ncV:128:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtps2phx256_mask, "V8xV8fV8xUc", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtps2phx512_mask, "V16xV16fV16xUsIi", "ncV:512:", "avx512fp16") + // generic select intrinsics TARGET_BUILTIN(__builtin_ia32_selectb_128, "V16cUsV16cV16c", "ncV:128:", "avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_selectb_256, "V32cUiV32cV32c", "ncV:256:", "avx512bw,avx512vl") @@ -1886,12 +2028,24 @@ TARGET_BUILTIN(__builtin_ia32_reduce_and_d512, "iV16i", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_reduce_and_q512, "OiV8Oi", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_reduce_fadd_pd512, "ddV8d", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ps512, "ffV16f", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ph512, "xxV32x", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ph256, "xxV16x", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ph128, "xxV8x", "ncV:128:", "avx512fp16,avx512vl") TARGET_BUILTIN(__builtin_ia32_reduce_fmax_pd512, "dV8d", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_reduce_fmax_ps512, "fV16f", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_reduce_fmax_ph512, "xV32x", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_reduce_fmax_ph256, "xV16x", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_reduce_fmax_ph128, "xV8x", "ncV:128:", "avx512fp16,avx512vl") TARGET_BUILTIN(__builtin_ia32_reduce_fmin_pd512, "dV8d", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_reduce_fmin_ps512, "fV16f", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_reduce_fmin_ph512, "xV32x", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_reduce_fmin_ph256, "xV16x", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_reduce_fmin_ph128, "xV8x", "ncV:128:", "avx512fp16,avx512vl") TARGET_BUILTIN(__builtin_ia32_reduce_fmul_pd512, "ddV8d", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ps512, "ffV16f", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ph512, "xxV32x", "ncV:512:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ph256, "xxV16x", "ncV:256:", "avx512fp16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ph128, "xxV8x", "ncV:128:", "avx512fp16,avx512vl") TARGET_BUILTIN(__builtin_ia32_reduce_mul_d512, "iV16i", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_reduce_mul_q512, "OiV8Oi", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_reduce_or_d512, "iV16i", "ncV:512:", "avx512f") diff --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def index ce2b1decdf6ca..e0c9bec9b4e00 100644 --- a/clang/include/clang/Basic/BuiltinsX86_64.def +++ b/clang/include/clang/Basic/BuiltinsX86_64.def @@ -92,6 +92,12 @@ TARGET_BUILTIN(__builtin_ia32_cvtsi2sd64, "V2dV2dOiIi", "ncV:128:", "avx512f") TARGET_BUILTIN(__builtin_ia32_cvtsi2ss64, "V4fV4fOiIi", "ncV:128:", "avx512f") TARGET_BUILTIN(__builtin_ia32_cvtusi2sd64, "V2dV2dUOiIi", "ncV:128:", "avx512f") TARGET_BUILTIN(__builtin_ia32_cvtusi2ss64, "V4fV4fUOiIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtsh2si64, "OiV8xIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtsh2usi64, "UOiV8xIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtusi642sh, "V8xV8xUOiIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvtsi642sh, "V8xV8xOiIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvttsh2si64, "OiV8xIi", "ncV:128:", "avx512fp16") +TARGET_BUILTIN(__builtin_ia32_vcvttsh2usi64, "UOiV8xIi", "ncV:128:", "avx512fp16") TARGET_BUILTIN(__builtin_ia32_directstore_u64, "vULi*ULi", "n", "movdiri") // UINTR diff --git a/clang/include/clang/Basic/CharInfo.h b/clang/include/clang/Basic/CharInfo.h index 8577475fab069..472eb38b9829d 100644 --- a/clang/include/clang/Basic/CharInfo.h +++ b/clang/include/clang/Basic/CharInfo.h @@ -43,6 +43,11 @@ LLVM_READNONE inline bool isASCII(char c) { return static_cast(c) <= 127; } +LLVM_READNONE inline bool isASCII(unsigned char c) { return c <= 127; } + +/// Returns true if this is an ASCII character. +LLVM_READNONE inline bool isASCII(uint32_t c) { return c <= 127; } + /// Returns true if this is a valid first character of a C identifier, /// which is [a-zA-Z_]. LLVM_READONLY inline bool isIdentifierHead(unsigned char c, diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index b4ec18d2726e6..c250f78b969a2 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -64,6 +64,7 @@ def StringConversion : DiagGroup<"string-conversion">; def SignConversion : DiagGroup<"sign-conversion">; def PointerBoolConversion : DiagGroup<"pointer-bool-conversion">; def UndefinedBoolConversion : DiagGroup<"undefined-bool-conversion">; +def BoolOperation : DiagGroup<"bool-operation">; def BoolConversion : DiagGroup<"bool-conversion", [PointerBoolConversion, UndefinedBoolConversion]>; def IntConversion : DiagGroup<"int-conversion">; @@ -822,8 +823,10 @@ def ReservedIdentifier : DiagGroup<"reserved-identifier", // under separate flags. // def UnreachableCodeLoopIncrement : DiagGroup<"unreachable-code-loop-increment">; +def UnreachableCodeFallthrough : DiagGroup<"unreachable-code-fallthrough">; def UnreachableCode : DiagGroup<"unreachable-code", - [UnreachableCodeLoopIncrement]>; + [UnreachableCodeLoopIncrement, + UnreachableCodeFallthrough]>; def UnreachableCodeBreak : DiagGroup<"unreachable-code-break">; def UnreachableCodeReturn : DiagGroup<"unreachable-code-return">; def UnreachableCodeAggressive : DiagGroup<"unreachable-code-aggressive", @@ -946,6 +949,7 @@ def Extra : DiagGroup<"extra", [ ]>; def Most : DiagGroup<"most", [ + BoolOperation, CharSubscript, Comment, DeleteNonVirtualDtor, diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index f621cdb466560..c19adf104db1f 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -113,8 +113,10 @@ def warn_four_char_character_literal : Warning< // Unicode and UCNs def err_invalid_utf8 : Error< "source file is not valid UTF-8">; -def err_non_ascii : Error< - "non-ASCII characters are not allowed outside of literals and identifiers">; +def err_character_not_allowed : Error< + "unexpected character ">; +def err_character_not_allowed_identifier : Error< + "character not allowed %select{in|at the start of}1 an identifier">; def ext_unicode_whitespace : ExtWarn< "treating Unicode character as whitespace">, InGroup>; @@ -150,9 +152,6 @@ def warn_c99_compat_unicode_id : Warning< "%select{using this character in an identifier|starting an identifier with " "this character}0 is incompatible with C99">, InGroup, DefaultIgnore; -def warn_cxx98_compat_unicode_id : Warning< - "using this character in an identifier is incompatible with C++98">, - InGroup, DefaultIgnore; def warn_cxx98_compat_literal_ucn_escape_basic_scs : Warning< "specifying character '%0' with a universal character name " @@ -184,12 +183,10 @@ def warn_c2x_compat_digit_separator : Warning< InGroup, DefaultIgnore; def err_digit_separator_not_between_digits : Error< "digit separator cannot appear at %select{start|end}0 of digit sequence">; -def warn_extraneous_char_constant : Warning< - "extraneous characters in character constant ignored">; def warn_char_constant_too_large : Warning< "character constant too long for its type">; -def err_multichar_utf_character_literal : Error< - "Unicode character literals may not contain multiple characters">; +def err_multichar_character_literal : Error< + "%select{wide|Unicode}0 character literals may not contain multiple characters">; def err_exponent_has_no_digits : Error<"exponent has no digits">; def err_hex_constant_requires : Error< "hexadecimal floating %select{constant|literal}0 requires " diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 7bddf754263e6..f2eb528ab5ab9 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -715,6 +715,9 @@ def warn_unreachable_return : Warning< def warn_unreachable_loop_increment : Warning< "loop will run at most once (loop increment never executed)">, InGroup, DefaultIgnore; +def warn_unreachable_fallthrough_attr : Warning< + "fallthrough annotation in unreachable code">, + InGroup, DefaultIgnore; def note_unreachable_silence : Note< "silence by adding parentheses to mark code as explicitly dead">; @@ -7472,7 +7475,7 @@ def note_member_first_declared_here : Note< def warn_bitwise_negation_bool : Warning< "bitwise negation of a boolean expression%select{;| always evaluates to 'true';}0 " "did you mean logical negation?">, - InGroup>; + InGroup, DefaultIgnore; def err_decrement_bool : Error<"cannot decrement expression of type bool">; def warn_increment_bool : Warning< "incrementing expression of type bool is deprecated and " @@ -9628,9 +9631,6 @@ def err_fallthrough_attr_outside_switch : Error< "fallthrough annotation is outside switch statement">; def err_fallthrough_attr_invalid_placement : Error< "fallthrough annotation does not directly precede switch label">; -def warn_fallthrough_attr_unreachable : Warning< - "fallthrough annotation in unreachable code">, - InGroup, DefaultIgnore; def warn_unreachable_default : Warning< "default label in switch which covers all enumeration values">, diff --git a/clang/include/clang/Basic/LangStandards.def b/clang/include/clang/Basic/LangStandards.def index 160dc3f2405a7..6056cfd65bbbf 100644 --- a/clang/include/clang/Basic/LangStandards.def +++ b/clang/include/clang/Basic/LangStandards.def @@ -180,12 +180,18 @@ LANGSTANDARD(opencl20, "cl2.0", LANGSTANDARD(opencl30, "cl3.0", OpenCL, "OpenCL 3.0", LineComment | C99 | Digraphs | HexFloat | OpenCL) + LANGSTANDARD(openclcpp10, "clc++1.0", OpenCL, "C++ for OpenCL 1.0", LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus17 | Digraphs | HexFloat | OpenCL) LANGSTANDARD_ALIAS(openclcpp10, "clc++") +LANGSTANDARD(openclcpp2021, "clc++2021", + OpenCL, "C++ for OpenCL 2021", + LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus17 | + Digraphs | HexFloat | OpenCL) + LANGSTANDARD_ALIAS_DEPR(opencl10, "CL") LANGSTANDARD_ALIAS_DEPR(opencl11, "CL1.1") LANGSTANDARD_ALIAS_DEPR(opencl12, "CL1.2") @@ -193,6 +199,7 @@ LANGSTANDARD_ALIAS_DEPR(opencl20, "CL2.0") LANGSTANDARD_ALIAS_DEPR(opencl30, "CL3.0") LANGSTANDARD_ALIAS_DEPR(openclcpp10, "CLC++") LANGSTANDARD_ALIAS_DEPR(openclcpp10, "CLC++1.0") +LANGSTANDARD_ALIAS_DEPR(openclcpp2021, "CLC++2021") // CUDA LANGSTANDARD(cuda, "cuda", CUDA, "NVIDIA CUDA(tm)", diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index c7a57a7dba9a8..ab855948b447c 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -210,8 +210,8 @@ class TargetInfo : public virtual TransferrableTargetInfo, unsigned char RegParmMax, SSERegParmMax; TargetCXXABI TheCXXABI; const LangASMap *AddrSpaceMap; - const unsigned *GridValues = - nullptr; // Array of target-specific GPU grid values that must be + const llvm::omp::GV *GridValues = + nullptr; // target-specific GPU grid values that must be // consistent between host RTL (plugin), device RTL, and clang. mutable StringRef PlatformName; @@ -871,6 +871,11 @@ class TargetInfo : public virtual TransferrableTargetInfo, /// across the current set of primary and secondary targets. virtual ArrayRef getTargetBuiltins() const = 0; + /// Returns target-specific min and max values VScale_Range. + virtual Optional> + getVScaleRange(const LangOptions &LangOpts) const { + return None; + } /// The __builtin_clz* and __builtin_ctz* built-in /// functions are specified to have undefined results for zero inputs, but /// on targets that support these operations in a way that provides @@ -1405,10 +1410,10 @@ class TargetInfo : public virtual TransferrableTargetInfo, return LangAS::Default; } - /// Return a target-specific GPU grid value based on the GVIDX enum \p gv - unsigned getGridValue(llvm::omp::GVIDX gv) const { + /// Return a target-specific GPU grid values + const llvm::omp::GV &getGridValue() const { assert(GridValues != nullptr && "GridValues not initialized"); - return GridValues[gv]; + return *GridValues; } /// Retrieve the name of the platform as it is used in the diff --git a/clang/include/clang/Driver/Distro.h b/clang/include/clang/Driver/Distro.h index 0d2a0939639ea..d9909bcf96968 100644 --- a/clang/include/clang/Driver/Distro.h +++ b/clang/include/clang/Driver/Distro.h @@ -37,6 +37,7 @@ class Distro { DebianStretch, DebianBuster, DebianBullseye, + DebianBookworm, Exherbo, RHEL5, RHEL6, @@ -119,7 +120,7 @@ class Distro { bool IsOpenSUSE() const { return DistroVal == OpenSUSE; } bool IsDebian() const { - return DistroVal >= DebianLenny && DistroVal <= DebianBullseye; + return DistroVal >= DebianLenny && DistroVal <= DebianBookworm; } bool IsUbuntu() const { diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 872df1bc06264..e5da70a52427a 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -887,7 +887,7 @@ def cl_no_signed_zeros : Flag<["-"], "cl-no-signed-zeros">, Group, MarshallingInfoFlag>; def cl_std_EQ : Joined<["-"], "cl-std=">, Group, Flags<[CC1Option]>, HelpText<"OpenCL language standard to compile for.">, - Values<"cl,CL,cl1.0,CL1.0,cl1.1,CL1.1,cl1.2,CL1.2,cl2.0,CL2.0,cl3.0,CL3.0,clc++,CLC++,clc++1.0,CLC++1.0">; + Values<"cl,CL,cl1.0,CL1.0,cl1.1,CL1.1,cl1.2,CL1.2,cl2.0,CL2.0,cl3.0,CL3.0,clc++,CLC++,clc++1.0,CLC++1.0,clc++2021,CLC++2021">; def cl_denorms_are_zero : Flag<["-"], "cl-denorms-are-zero">, Group, HelpText<"OpenCL only. Allow denormals to be flushed to zero.">; def cl_fp32_correctly_rounded_divide_sqrt : Flag<["-"], "cl-fp32-correctly-rounded-divide-sqrt">, Group, Flags<[CC1Option]>, @@ -2584,7 +2584,7 @@ def ftrivial_auto_var_init : Joined<["-"], "ftrivial-auto-var-init=">, Group, MarshallingInfoEnum, "Uninitialized">; def enable_trivial_var_init_zero : Flag<["-"], "enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang">, - Flags<[CC1Option, CoreOption]>, + Flags<[CC1Option, CoreOption, NoArgumentUnused]>, HelpText<"Trivial automatic variable initialization to zero is only here for benchmarks, it'll eventually be removed, and I'm OK with that because I'm only using it to benchmark">; def ftrivial_auto_var_init_stop_after : Joined<["-"], "ftrivial-auto-var-init-stop-after=">, Group, Flags<[CC1Option, CoreOption]>, HelpText<"Stop initializing trivial automatic stack variables after the specified number of instances">, @@ -4402,7 +4402,6 @@ def mno_vzeroupper : Flag<["-"], "mno-vzeroupper">, Group; // These are legacy user-facing driver-level option spellings. They are always // aliases for options that are spelled using the more common Unix / GNU flag // style of double-dash and equals-joined flags. -def gcc_toolchain_legacy_spelling : Separate<["-"], "gcc-toolchain">, Alias; def target_legacy_spelling : Separate<["-"], "target">, Alias; // Special internal option to handle -Xlinker --no-demangle. diff --git a/clang/include/clang/Sema/ParsedAttr.h b/clang/include/clang/Sema/ParsedAttr.h index abed12f38d92b..66d8297beae52 100644 --- a/clang/include/clang/Sema/ParsedAttr.h +++ b/clang/include/clang/Sema/ParsedAttr.h @@ -70,6 +70,8 @@ struct ParsedAttrInfo { const char *NormalizedFullName; }; ArrayRef Spellings; + // The names of the known arguments of this attribute. + ArrayRef ArgNames; ParsedAttrInfo(AttributeCommonInfo::Kind AttrKind = AttributeCommonInfo::NoSemaHandlerAttribute) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index f417a9db7a83a..ba825ff2fea57 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -11597,15 +11597,14 @@ class Sema final { SourceLocation ModifierLoc, SourceLocation EndLoc); /// Called on well-formed 'map' clause. - OMPClause * - ActOnOpenMPMapClause(ArrayRef MapTypeModifiers, - ArrayRef MapTypeModifiersLoc, - CXXScopeSpec &MapperIdScopeSpec, - DeclarationNameInfo &MapperId, - OpenMPMapClauseKind MapType, bool IsMapTypeImplicit, - SourceLocation MapLoc, SourceLocation ColonLoc, - ArrayRef VarList, const OMPVarListLocTy &Locs, - ArrayRef UnresolvedMappers = llvm::None); + OMPClause *ActOnOpenMPMapClause( + ArrayRef MapTypeModifiers, + ArrayRef MapTypeModifiersLoc, + CXXScopeSpec &MapperIdScopeSpec, DeclarationNameInfo &MapperId, + OpenMPMapClauseKind MapType, bool IsMapTypeImplicit, + SourceLocation MapLoc, SourceLocation ColonLoc, ArrayRef VarList, + const OMPVarListLocTy &Locs, bool NoDiagnose = false, + ArrayRef UnresolvedMappers = llvm::None); /// Called on well-formed 'num_teams' clause. OMPClause *ActOnOpenMPNumTeamsClause(Expr *NumTeams, SourceLocation StartLoc, SourceLocation LParenLoc, diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td index 444b00d73f0b7..125ef859d1ebb 100644 --- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td +++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -485,7 +485,17 @@ def DynamicMemoryModeling: Checker<"DynamicMemoryModeling">, "allocating and deallocating functions are annotated with " "ownership_holds, ownership_takes and ownership_returns.", "false", - InAlpha> + InAlpha>, + CmdLineOption ]>, Dependencies<[CStringModeling]>, Documentation, diff --git a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h index 24cae12af24a1..139b0dcd51704 100644 --- a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h +++ b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h @@ -21,6 +21,7 @@ #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringRef.h" #include #include @@ -622,6 +623,84 @@ class TagVisitor : public BugReporterVisitor { PathSensitiveBugReport &R) override; }; +class ObjCMethodCall; +class CXXConstructorCall; + +/// Put a diagnostic on return statement (or on } in its absence) of all inlined +/// functions for which some property remained unchanged. +/// Resulting diagnostics may read such as "Returning without writing to X". +/// +/// Descendants can define what a "state change is", like a change of value +/// to a memory region, liveness, etc. For function calls where the state did +/// not change as defined, a custom note may be constructed. +class NoStateChangeFuncVisitor : public BugReporterVisitor { +private: + /// Frames modifying the state as defined in \c wasModifiedBeforeCallExit. + /// This visitor generates a note only if a function does *not* change the + /// state that way. This information is not immediately available + /// by looking at the node associated with the exit from the function + /// (usually the return statement). To avoid recomputing the same information + /// many times (going up the path for each node and checking whether the + /// region was written into) we instead lazily compute the stack frames + /// along the path. + llvm::SmallPtrSet FramesModifying; + llvm::SmallPtrSet FramesModifyingCalculated; + + /// Check and lazily calculate whether the state is modified in the stack + /// frame to which \p CallExitBeginN belongs. + /// The calculation is cached in FramesModifying. + bool isModifiedInFrame(const ExplodedNode *CallExitBeginN); + + /// Write to \c FramesModifying all stack frames along the path in the current + /// stack frame which modifies the state. + void findModifyingFrames(const ExplodedNode *const CallExitBeginN); + +protected: + bugreporter::TrackingKind TKind; + + /// \return Whether the state was modified from the current node, \CurrN, to + /// the end of the stack fram, at \p CallExitBeginN. + virtual bool + wasModifiedBeforeCallExit(const ExplodedNode *CurrN, + const ExplodedNode *CallExitBeginN) = 0; + + /// Consume the information on the non-modifying stack frame in order to + /// either emit a note or not. May suppress the report entirely. + /// \return Diagnostics piece for the unmodified state in the current + /// function, if it decides to emit one. A good description might start with + /// "Returning without...". + virtual PathDiagnosticPieceRef + maybeEmitNoteForObjCSelf(PathSensitiveBugReport &R, + const ObjCMethodCall &Call, + const ExplodedNode *N) = 0; + + /// Consume the information on the non-modifying stack frame in order to + /// either emit a note or not. May suppress the report entirely. + /// \return Diagnostics piece for the unmodified state in the current + /// function, if it decides to emit one. A good description might start with + /// "Returning without...". + virtual PathDiagnosticPieceRef + maybeEmitNoteForCXXThis(PathSensitiveBugReport &R, + const CXXConstructorCall &Call, + const ExplodedNode *N) = 0; + + /// Consume the information on the non-modifying stack frame in order to + /// either emit a note or not. May suppress the report entirely. + /// \return Diagnostics piece for the unmodified state in the current + /// function, if it decides to emit one. A good description might start with + /// "Returning without...". + virtual PathDiagnosticPieceRef + maybeEmitNoteForParameters(PathSensitiveBugReport &R, const CallEvent &Call, + const ExplodedNode *N) = 0; + +public: + NoStateChangeFuncVisitor(bugreporter::TrackingKind TKind) : TKind(TKind) {} + + PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, + BugReporterContext &BR, + PathSensitiveBugReport &R) override final; +}; + } // namespace ento } // namespace clang diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index e85ca8ef86157..c49a52b03106f 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -172,29 +172,28 @@ static SourceLocation getDeclLocForCommentSearch(const Decl *D, // Allow association with Y across {} in `typedef struct X {} Y`. isa(D)) return D->getBeginLoc(); - else { - const SourceLocation DeclLoc = D->getLocation(); - if (DeclLoc.isMacroID()) { - if (isa(D)) { - // If location of the typedef name is in a macro, it is because being - // declared via a macro. Try using declaration's starting location as - // the "declaration location". - return D->getBeginLoc(); - } else if (const auto *TD = dyn_cast(D)) { - // If location of the tag decl is inside a macro, but the spelling of - // the tag name comes from a macro argument, it looks like a special - // macro like NS_ENUM is being used to define the tag decl. In that - // case, adjust the source location to the expansion loc so that we can - // attach the comment to the tag decl. - if (SourceMgr.isMacroArgExpansion(DeclLoc) && - TD->isCompleteDefinition()) - return SourceMgr.getExpansionLoc(DeclLoc); - } + + const SourceLocation DeclLoc = D->getLocation(); + if (DeclLoc.isMacroID()) { + if (isa(D)) { + // If location of the typedef name is in a macro, it is because being + // declared via a macro. Try using declaration's starting location as + // the "declaration location". + return D->getBeginLoc(); + } + + if (const auto *TD = dyn_cast(D)) { + // If location of the tag decl is inside a macro, but the spelling of + // the tag name comes from a macro argument, it looks like a special + // macro like NS_ENUM is being used to define the tag decl. In that + // case, adjust the source location to the expansion loc so that we can + // attach the comment to the tag decl. + if (SourceMgr.isMacroArgExpansion(DeclLoc) && TD->isCompleteDefinition()) + return SourceMgr.getExpansionLoc(DeclLoc); } - return DeclLoc; } - return {}; + return DeclLoc; } RawComment *ASTContext::getRawCommentForDeclNoCacheImpl( diff --git a/clang/lib/AST/ASTDiagnostic.cpp b/clang/lib/AST/ASTDiagnostic.cpp index dc22481d0a84c..7e435e8b35b80 100644 --- a/clang/lib/AST/ASTDiagnostic.cpp +++ b/clang/lib/AST/ASTDiagnostic.cpp @@ -1088,6 +1088,9 @@ class TemplateDiff { Ty->getAs()) return TST; + if (const auto* SubstType = Ty->getAs()) + Ty = SubstType->getReplacementType(); + const RecordType *RT = Ty->getAs(); if (!RT) diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp index 3467da2b549e2..e042ae8dae4ae 100644 --- a/clang/lib/AST/DeclBase.cpp +++ b/clang/lib/AST/DeclBase.cpp @@ -1217,6 +1217,15 @@ bool DeclContext::Encloses(const DeclContext *DC) const { return false; } +DeclContext *DeclContext::getNonTransparentContext() { + DeclContext *DC = this; + while (DC->isTransparentContext()) { + DC = DC->getParent(); + assert(DC && "All transparent contexts should have a parent!"); + } + return DC; +} + DeclContext *DeclContext::getPrimaryContext() { switch (getDeclKind()) { case Decl::ExternCContext: diff --git a/clang/lib/Basic/LangOptions.cpp b/clang/lib/Basic/LangOptions.cpp index 169b679490389..3e3e7fe3dc155 100644 --- a/clang/lib/Basic/LangOptions.cpp +++ b/clang/lib/Basic/LangOptions.cpp @@ -47,6 +47,8 @@ bool LangOptions::isNoBuiltinFunc(StringRef FuncName) const { VersionTuple LangOptions::getOpenCLVersionTuple() const { const int Ver = OpenCLCPlusPlus ? OpenCLCPlusPlusVersion : OpenCLVersion; + if (OpenCLCPlusPlus && Ver != 100) + return VersionTuple(Ver / 100); return VersionTuple(Ver / 100, (Ver % 100) / 10); } diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index e163ebfa2348b..2b5bf34a7b23f 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -424,6 +424,17 @@ ArrayRef AArch64TargetInfo::getTargetBuiltins() const { Builtin::FirstTSBuiltin); } +Optional> +AArch64TargetInfo::getVScaleRange(const LangOptions &LangOpts) const { + if (LangOpts.ArmSveVectorBits) { + unsigned VScale = LangOpts.ArmSveVectorBits / 128; + return std::pair(VScale, VScale); + } + if (hasFeature("sve")) + return std::pair(0, 16); + return None; +} + bool AArch64TargetInfo::hasFeature(StringRef Feature) const { return Feature == "aarch64" || Feature == "arm64" || Feature == "arm" || (Feature == "neon" && (FPU & NeonMode)) || diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index 46882a808336b..12830348fb453 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -96,6 +96,9 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo { ArrayRef getTargetBuiltins() const override; + Optional> + getVScaleRange(const LangOptions &LangOpts) const override; + bool hasFeature(StringRef Feature) const override; bool handleTargetFeatures(std::vector &Features, DiagnosticsEngine &Diags) override; diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index fac786dbcf9e2..cebb19e7ccab3 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -335,7 +335,7 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, llvm::AMDGPU::getArchAttrR600(GPUKind)) { resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN : DataLayoutStringR600); - GridValues = llvm::omp::AMDGPUGpuGridValues; + GridValues = &llvm::omp::AMDGPUGridValues; setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || !isAMDGCN(Triple)); diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index f8772cbe244f0..77c2c5fd50145 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -353,6 +353,8 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { LangAS getCUDABuiltinAddressSpace(unsigned AS) const override { switch (AS) { + case 0: + return LangAS::Default; case 1: return LangAS::cuda_device; case 3: diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index ae546492a8bb3..398d5a1107f51 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -65,7 +65,7 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple, TLSSupported = false; VLASupported = false; AddrSpaceMap = &NVPTXAddrSpaceMap; - GridValues = llvm::omp::NVPTXGpuGridValues; + GridValues = &llvm::omp::NVPTXGridValues; UseAddrSpaceMapMangling = true; HasLegalHalfType = true; HasFloat16 = true; diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index c15d2df33f9f7..c8afb71e7dfd1 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -237,6 +237,7 @@ static void defineXLCompatMacros(MacroBuilder &Builder) { Builder.defineMacro("__fsqrt", "__builtin_ppc_fsqrt"); Builder.defineMacro("__fsqrts", "__builtin_ppc_fsqrts"); Builder.defineMacro("__addex", "__builtin_ppc_addex"); + Builder.defineMacro("__cmplxl", "__builtin_complex"); } /// PPCTargetInfo::getTargetDefines - Return a set of the PowerPC-specific diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7c7431886be01..f906383082269 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1189,7 +1189,7 @@ static void addSanitizers(const Triple &TargetTriple, CompileKernel, Recover, ModuleUseAfterScope, UseOdrIndicator, DestructorKind)); MPM.addPass(createModuleToFunctionPassAdaptor(AddressSanitizerPass( - CompileKernel, Recover, UseAfterScope, UseAfterReturn))); + {CompileKernel, Recover, UseAfterScope, UseAfterReturn}))); } }; ASanPass(SanitizerKind::Address, false); @@ -1199,8 +1199,8 @@ static void addSanitizers(const Triple &TargetTriple, if (LangOpts.Sanitize.has(Mask)) { bool Recover = CodeGenOpts.SanitizeRecover.has(Mask); MPM.addPass(HWAddressSanitizerPass( - CompileKernel, Recover, - /*DisableOptimization=*/CodeGenOpts.OptimizationLevel == 0)); + {CompileKernel, Recover, + /*DisableOptimization=*/CodeGenOpts.OptimizationLevel == 0})); } }; HWASanPass(SanitizerKind::HWAddress, false); @@ -1289,6 +1289,8 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( "", PGOOptions::NoAction, PGOOptions::CSIRInstr, CodeGenOpts.DebugInfoForProfiling); } + if (TM) + TM->setPGOOption(PGOOpt); PipelineTuningOptions PTO; PTO.LoopUnrolling = CodeGenOpts.UnrollLoops; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 65671a49ed92f..6bbf556a55680 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -8385,7 +8385,7 @@ Value *CodeGenFunction::vectorWrapScalar16(Value *Op) { /// SVEBuiltinMemEltTy - Returns the memory element type for this memory /// access builtin. Only required if it can't be inferred from the base pointer /// operand. -llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(SVETypeFlags TypeFlags) { +llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) { switch (TypeFlags.getMemEltType()) { case SVETypeFlags::MemEltTyDefault: return getEltType(TypeFlags); @@ -8401,7 +8401,7 @@ llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(SVETypeFlags TypeFlags) { llvm_unreachable("Unknown MemEltType"); } -llvm::Type *CodeGenFunction::getEltType(SVETypeFlags TypeFlags) { +llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) { switch (TypeFlags.getEltType()) { default: llvm_unreachable("Invalid SVETypeFlag!"); @@ -8436,7 +8436,7 @@ llvm::Type *CodeGenFunction::getEltType(SVETypeFlags TypeFlags) { // Return the llvm predicate vector type corresponding to the specified element // TypeFlags. llvm::ScalableVectorType * -CodeGenFunction::getSVEPredType(SVETypeFlags TypeFlags) { +CodeGenFunction::getSVEPredType(const SVETypeFlags &TypeFlags) { switch (TypeFlags.getEltType()) { default: llvm_unreachable("Unhandled SVETypeFlag!"); @@ -8505,7 +8505,8 @@ CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) { } } -llvm::Value *CodeGenFunction::EmitSVEAllTruePred(SVETypeFlags TypeFlags) { +llvm::Value * +CodeGenFunction::EmitSVEAllTruePred(const SVETypeFlags &TypeFlags) { Function *Ptrue = CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags)); return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)}); @@ -8549,7 +8550,7 @@ Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred, return C; } -Value *CodeGenFunction::EmitSVEGatherLoad(SVETypeFlags TypeFlags, +Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { auto *ResultTy = getSVEType(TypeFlags); @@ -8601,7 +8602,7 @@ Value *CodeGenFunction::EmitSVEGatherLoad(SVETypeFlags TypeFlags, : Builder.CreateSExt(Call, ResultTy); } -Value *CodeGenFunction::EmitSVEScatterStore(SVETypeFlags TypeFlags, +Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { auto *SrcDataTy = getSVEType(TypeFlags); @@ -8656,7 +8657,7 @@ Value *CodeGenFunction::EmitSVEScatterStore(SVETypeFlags TypeFlags, return Builder.CreateCall(F, Ops); } -Value *CodeGenFunction::EmitSVEGatherPrefetch(SVETypeFlags TypeFlags, +Value *CodeGenFunction::EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { // The gather prefetches are overloaded on the vector input - this can either @@ -8689,7 +8690,7 @@ Value *CodeGenFunction::EmitSVEGatherPrefetch(SVETypeFlags TypeFlags, return Builder.CreateCall(F, Ops); } -Value *CodeGenFunction::EmitSVEStructLoad(SVETypeFlags TypeFlags, +Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { llvm::ScalableVectorType *VTy = getSVEType(TypeFlags); @@ -8723,7 +8724,7 @@ Value *CodeGenFunction::EmitSVEStructLoad(SVETypeFlags TypeFlags, return Builder.CreateCall(F, { Predicate, BasePtr }); } -Value *CodeGenFunction::EmitSVEStructStore(SVETypeFlags TypeFlags, +Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { llvm::ScalableVectorType *VTy = getSVEType(TypeFlags); @@ -8770,7 +8771,7 @@ Value *CodeGenFunction::EmitSVEStructStore(SVETypeFlags TypeFlags, // SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and // svpmullt_pair intrinsics, with the exception that their results are bitcast // to a wider type. -Value *CodeGenFunction::EmitSVEPMull(SVETypeFlags TypeFlags, +Value *CodeGenFunction::EmitSVEPMull(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned BuiltinID) { // Splat scalar operand to vector (intrinsics with _n infix) @@ -8788,14 +8789,14 @@ Value *CodeGenFunction::EmitSVEPMull(SVETypeFlags TypeFlags, return EmitSVEReinterpret(Call, Ty); } -Value *CodeGenFunction::EmitSVEMovl(SVETypeFlags TypeFlags, +Value *CodeGenFunction::EmitSVEMovl(const SVETypeFlags &TypeFlags, ArrayRef Ops, unsigned BuiltinID) { llvm::Type *OverloadedTy = getSVEType(TypeFlags); Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy); return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)}); } -Value *CodeGenFunction::EmitSVEPrefetchLoad(SVETypeFlags TypeFlags, +Value *CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned BuiltinID) { auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags); @@ -8904,8 +8905,10 @@ static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, Ops.insert(Ops.begin(), SplatUndef); } -SmallVector CodeGenFunction::getSVEOverloadTypes( - SVETypeFlags TypeFlags, llvm::Type *ResultType, ArrayRef Ops) { +SmallVector +CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags, + llvm::Type *ResultType, + ArrayRef Ops) { if (TypeFlags.isOverloadNone()) return {}; @@ -9718,6 +9721,29 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F); } + if (BuiltinID == AArch64::BI__mulh || BuiltinID == AArch64::BI__umulh) { + llvm::Type *ResType = ConvertType(E->getType()); + llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); + + bool IsSigned = BuiltinID == AArch64::BI__mulh; + Value *LHS = + Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned); + Value *RHS = + Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned); + + Value *MulResult, *HigherBits; + if (IsSigned) { + MulResult = Builder.CreateNSWMul(LHS, RHS); + HigherBits = Builder.CreateAShr(MulResult, 64); + } else { + MulResult = Builder.CreateNUWMul(LHS, RHS); + HigherBits = Builder.CreateLShr(MulResult, 64); + } + HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned); + + return HigherBits; + } + // Handle MSVC intrinsics before argument evaluation to prevent double // evaluation. if (Optional MsvcIntId = translateAarch64ToMsvcIntrin(BuiltinID)) @@ -12729,10 +12755,16 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_cvtdq2ps512_mask: case X86::BI__builtin_ia32_cvtqq2ps512_mask: case X86::BI__builtin_ia32_cvtqq2pd512_mask: + case X86::BI__builtin_ia32_vcvtw2ph512_mask: + case X86::BI__builtin_ia32_vcvtdq2ph512_mask: + case X86::BI__builtin_ia32_vcvtqq2ph512_mask: return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true); case X86::BI__builtin_ia32_cvtudq2ps512_mask: case X86::BI__builtin_ia32_cvtuqq2ps512_mask: case X86::BI__builtin_ia32_cvtuqq2pd512_mask: + case X86::BI__builtin_ia32_vcvtuw2ph512_mask: + case X86::BI__builtin_ia32_vcvtudq2ph512_mask: + case X86::BI__builtin_ia32_vcvtuqq2ph512_mask: return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false); case X86::BI__builtin_ia32_vfmaddss3: @@ -13936,15 +13968,28 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0); } + case X86::BI__builtin_ia32_sqrtsh_round_mask: case X86::BI__builtin_ia32_sqrtsd_round_mask: case X86::BI__builtin_ia32_sqrtss_round_mask: { unsigned CC = cast(Ops[4])->getZExtValue(); // Support only if the rounding mode is 4 (AKA CUR_DIRECTION), // otherwise keep the intrinsic. if (CC != 4) { - Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtsd_round_mask ? - Intrinsic::x86_avx512_mask_sqrt_sd : - Intrinsic::x86_avx512_mask_sqrt_ss; + Intrinsic::ID IID; + + switch (BuiltinID) { + default: + llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_sqrtsh_round_mask: + IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh; + break; + case X86::BI__builtin_ia32_sqrtsd_round_mask: + IID = Intrinsic::x86_avx512_mask_sqrt_sd; + break; + case X86::BI__builtin_ia32_sqrtss_round_mask: + IID = Intrinsic::x86_avx512_mask_sqrt_ss; + break; + } return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); } Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0); @@ -13966,6 +14011,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_sqrtpd: case X86::BI__builtin_ia32_sqrtps256: case X86::BI__builtin_ia32_sqrtps: + case X86::BI__builtin_ia32_sqrtph256: + case X86::BI__builtin_ia32_sqrtph: + case X86::BI__builtin_ia32_sqrtph512: case X86::BI__builtin_ia32_sqrtps512: case X86::BI__builtin_ia32_sqrtpd512: { if (Ops.size() == 2) { @@ -13973,9 +14021,21 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // Support only if the rounding mode is 4 (AKA CUR_DIRECTION), // otherwise keep the intrinsic. if (CC != 4) { - Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtps512 ? - Intrinsic::x86_avx512_sqrt_ps_512 : - Intrinsic::x86_avx512_sqrt_pd_512; + Intrinsic::ID IID; + + switch (BuiltinID) { + default: + llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_sqrtph512: + IID = Intrinsic::x86_avx512fp16_sqrt_ph_512; + break; + case X86::BI__builtin_ia32_sqrtps512: + IID = Intrinsic::x86_avx512_sqrt_ps_512; + break; + case X86::BI__builtin_ia32_sqrtpd512: + IID = Intrinsic::x86_avx512_sqrt_pd_512; + break; + } return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); } } @@ -14143,28 +14203,40 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Ops[0]}); } case X86::BI__builtin_ia32_reduce_fadd_pd512: - case X86::BI__builtin_ia32_reduce_fadd_ps512: { + case X86::BI__builtin_ia32_reduce_fadd_ps512: + case X86::BI__builtin_ia32_reduce_fadd_ph512: + case X86::BI__builtin_ia32_reduce_fadd_ph256: + case X86::BI__builtin_ia32_reduce_fadd_ph128: { Function *F = CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType()); Builder.getFastMathFlags().setAllowReassoc(); return Builder.CreateCall(F, {Ops[0], Ops[1]}); } case X86::BI__builtin_ia32_reduce_fmul_pd512: - case X86::BI__builtin_ia32_reduce_fmul_ps512: { + case X86::BI__builtin_ia32_reduce_fmul_ps512: + case X86::BI__builtin_ia32_reduce_fmul_ph512: + case X86::BI__builtin_ia32_reduce_fmul_ph256: + case X86::BI__builtin_ia32_reduce_fmul_ph128: { Function *F = CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType()); Builder.getFastMathFlags().setAllowReassoc(); return Builder.CreateCall(F, {Ops[0], Ops[1]}); } case X86::BI__builtin_ia32_reduce_fmax_pd512: - case X86::BI__builtin_ia32_reduce_fmax_ps512: { + case X86::BI__builtin_ia32_reduce_fmax_ps512: + case X86::BI__builtin_ia32_reduce_fmax_ph512: + case X86::BI__builtin_ia32_reduce_fmax_ph256: + case X86::BI__builtin_ia32_reduce_fmax_ph128: { Function *F = CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType()); Builder.getFastMathFlags().setNoNaNs(); return Builder.CreateCall(F, {Ops[0]}); } case X86::BI__builtin_ia32_reduce_fmin_pd512: - case X86::BI__builtin_ia32_reduce_fmin_ps512: { + case X86::BI__builtin_ia32_reduce_fmin_ps512: + case X86::BI__builtin_ia32_reduce_fmin_ph512: + case X86::BI__builtin_ia32_reduce_fmin_ph256: + case X86::BI__builtin_ia32_reduce_fmin_ph128: { Function *F = CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType()); Builder.getFastMathFlags().setNoNaNs(); @@ -14280,6 +14352,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_fpclassps128_mask: case X86::BI__builtin_ia32_fpclassps256_mask: case X86::BI__builtin_ia32_fpclassps512_mask: + case X86::BI__builtin_ia32_fpclassph128_mask: + case X86::BI__builtin_ia32_fpclassph256_mask: + case X86::BI__builtin_ia32_fpclassph512_mask: case X86::BI__builtin_ia32_fpclasspd128_mask: case X86::BI__builtin_ia32_fpclasspd256_mask: case X86::BI__builtin_ia32_fpclasspd512_mask: { @@ -14291,6 +14366,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Intrinsic::ID ID; switch (BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_fpclassph128_mask: + ID = Intrinsic::x86_avx512fp16_fpclass_ph_128; + break; + case X86::BI__builtin_ia32_fpclassph256_mask: + ID = Intrinsic::x86_avx512fp16_fpclass_ph_256; + break; + case X86::BI__builtin_ia32_fpclassph512_mask: + ID = Intrinsic::x86_avx512fp16_fpclass_ph_512; + break; case X86::BI__builtin_ia32_fpclassps128_mask: ID = Intrinsic::x86_avx512_fpclass_ps_128; break; @@ -14428,6 +14512,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_cmpordps: case X86::BI__builtin_ia32_cmpordpd: return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false); + case X86::BI__builtin_ia32_cmpph128_mask: + case X86::BI__builtin_ia32_cmpph256_mask: + case X86::BI__builtin_ia32_cmpph512_mask: case X86::BI__builtin_ia32_cmpps128_mask: case X86::BI__builtin_ia32_cmpps256_mask: case X86::BI__builtin_ia32_cmpps512_mask: @@ -15909,11 +15996,9 @@ Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF, const CallExpr *E = nullptr) { auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr); auto *Call = CGF.Builder.CreateCall(F); - Call->addAttribute( - AttributeList::ReturnIndex, + Call->addRetAttr( Attribute::getWithDereferenceableBytes(Call->getContext(), 64)); - Call->addAttribute(AttributeList::ReturnIndex, - Attribute::getWithAlignment(Call->getContext(), Align(4))); + Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4))); if (!E) return Call; QualType BuiltinRetType = E->getType(); @@ -17786,6 +17871,22 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } + case WebAssembly::BI__builtin_wasm_pmin_f32x4: + case WebAssembly::BI__builtin_wasm_pmin_f64x2: { + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + Function *Callee = + CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType())); + return Builder.CreateCall(Callee, {LHS, RHS}); + } + case WebAssembly::BI__builtin_wasm_pmax_f32x4: + case WebAssembly::BI__builtin_wasm_pmax_f64x2: { + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + Function *Callee = + CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType())); + return Builder.CreateCall(Callee, {LHS, RHS}); + } case WebAssembly::BI__builtin_wasm_ceil_f32x4: case WebAssembly::BI__builtin_wasm_floor_f32x4: case WebAssembly::BI__builtin_wasm_trunc_f32x4: diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 96117aeea07ff..daf25f41492d7 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1892,7 +1892,7 @@ void CodeGenModule::addDefaultFunctionDefinitionAttributes(llvm::Function &F) { getDefaultFunctionAttributes(F.getName(), F.hasOptNone(), /* AttrOnCallSite = */ false, FuncAttrs); // TODO: call GetCPUAndFeaturesAttributes? - F.addAttributes(llvm::AttributeList::FunctionIndex, FuncAttrs); + F.addFnAttrs(FuncAttrs); } void CodeGenModule::addDefaultFunctionDefinitionAttributes( @@ -4540,10 +4540,8 @@ maybeRaiseRetAlignmentAttribute(llvm::LLVMContext &Ctx, if (CurAlign >= NewAlign) return Attrs; llvm::Attribute AlignAttr = llvm::Attribute::getWithAlignment(Ctx, NewAlign); - return Attrs - .removeAttribute(Ctx, llvm::AttributeList::ReturnIndex, - llvm::Attribute::AttrKind::Alignment) - .addAttribute(Ctx, llvm::AttributeList::ReturnIndex, AlignAttr); + return Attrs.removeRetAttribute(Ctx, llvm::Attribute::AttrKind::Alignment) + .addRetAttribute(Ctx, AlignAttr); } template class AbstractAssumeAlignedAttrEmitter { @@ -5209,15 +5207,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (const FunctionDecl *FD = dyn_cast_or_null(CurFuncDecl)) if (FD->hasAttr()) // All calls within a strictfp function are marked strictfp - Attrs = - Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, - llvm::Attribute::StrictFP); + Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::StrictFP); // Add call-site nomerge attribute if exists. if (InNoMergeAttributedStmt) - Attrs = - Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, - llvm::Attribute::NoMerge); + Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::NoMerge); // Apply some call-site-specific attributes. // TODO: work this into building the attribute set. @@ -5227,15 +5221,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (CurCodeDecl && CurCodeDecl->hasAttr() && !(TargetDecl && TargetDecl->hasAttr())) { Attrs = - Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, - llvm::Attribute::AlwaysInline); + Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline); } // Disable inlining inside SEH __try blocks. if (isSEHTryScope()) { - Attrs = - Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, - llvm::Attribute::NoInline); + Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::NoInline); } // Decide whether to use a call or an invoke. @@ -5251,7 +5242,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, CannotThrow = true; } else { // Otherwise, nounwind call sites will never throw. - CannotThrow = Attrs.hasFnAttribute(llvm::Attribute::NoUnwind); + CannotThrow = Attrs.hasFnAttr(llvm::Attribute::NoUnwind); if (auto *FPtr = dyn_cast(CalleePtr)) if (FPtr->hasFnAttribute(llvm::Attribute::NoUnwind)) @@ -5274,9 +5265,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (const FunctionDecl *FD = dyn_cast_or_null(CurFuncDecl)) if (FD->hasAttr()) // All calls within a strictfp function are marked strictfp - Attrs = - Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, - llvm::Attribute::StrictFP); + Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::StrictFP); AssumeAlignedAttrEmitter AssumeAlignedAttrEmitter(*this, TargetDecl); Attrs = AssumeAlignedAttrEmitter.TryEmitAsCallSiteAttribute(Attrs); @@ -5303,8 +5292,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (const auto *FD = dyn_cast_or_null(CurFuncDecl)) { if (const auto *A = FD->getAttr()) { if (A->getGuard() == CFGuardAttr::GuardArg::nocf && !CI->getCalledFunction()) - Attrs = Attrs.addAttribute( - getLLVMContext(), llvm::AttributeList::FunctionIndex, "guard_nocf"); + Attrs = Attrs.addFnAttribute(getLLVMContext(), "guard_nocf"); } } @@ -5374,8 +5362,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // attributes of the called function. if (auto *F = CI->getCalledFunction()) F->removeFnAttr(llvm::Attribute::NoReturn); - CI->removeAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::NoReturn); + CI->removeFnAttr(llvm::Attribute::NoReturn); // Avoid incompatibility with ASan which relies on the `noreturn` // attribute to insert handler calls. diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 6a3c98f7989ef..e7c42d95f61a2 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -1384,16 +1384,16 @@ llvm::DIType *CGDebugInfo::createBitFieldType(const FieldDecl *BitFieldDecl, Offset = BitFieldInfo.StorageSize - BitFieldInfo.Size - Offset; uint64_t OffsetInBits = StorageOffsetInBits + Offset; llvm::DINode::DIFlags Flags = getAccessFlag(BitFieldDecl->getAccess(), RD); + llvm::DINodeArray Annotations = CollectBTFTagAnnotations(BitFieldDecl); return DBuilder.createBitFieldMemberType( RecordTy, Name, File, Line, SizeInBits, OffsetInBits, StorageOffsetInBits, - Flags, DebugType); + Flags, DebugType, Annotations); } -llvm::DIType * -CGDebugInfo::createFieldType(StringRef name, QualType type, SourceLocation loc, - AccessSpecifier AS, uint64_t offsetInBits, - uint32_t AlignInBits, llvm::DIFile *tunit, - llvm::DIScope *scope, const RecordDecl *RD) { +llvm::DIType *CGDebugInfo::createFieldType( + StringRef name, QualType type, SourceLocation loc, AccessSpecifier AS, + uint64_t offsetInBits, uint32_t AlignInBits, llvm::DIFile *tunit, + llvm::DIScope *scope, const RecordDecl *RD, llvm::DINodeArray Annotations) { llvm::DIType *debugType = getOrCreateType(type, tunit); // Get the location for the field. @@ -1411,7 +1411,7 @@ CGDebugInfo::createFieldType(StringRef name, QualType type, SourceLocation loc, llvm::DINode::DIFlags flags = getAccessFlag(AS, RD); return DBuilder.createMemberType(scope, name, file, line, SizeInBits, Align, - offsetInBits, flags, debugType); + offsetInBits, flags, debugType, Annotations); } void CGDebugInfo::CollectRecordLambdaFields( @@ -1501,9 +1501,10 @@ void CGDebugInfo::CollectRecordNormalField( FieldType = createBitFieldType(field, RecordTy, RD); } else { auto Align = getDeclAlignIfRequired(field, CGM.getContext()); + llvm::DINodeArray Annotations = CollectBTFTagAnnotations(field); FieldType = createFieldType(name, type, field->getLocation(), field->getAccess(), - OffsetInBits, Align, tunit, RecordTy, RD); + OffsetInBits, Align, tunit, RecordTy, RD, Annotations); } elements.push_back(FieldType); @@ -2070,6 +2071,20 @@ llvm::DINodeArray CGDebugInfo::CollectCXXTemplateParams( return CollectTemplateParams(TPList, TAList.asArray(), Unit); } +llvm::DINodeArray CGDebugInfo::CollectBTFTagAnnotations(const Decl *D) { + if (!D->hasAttr()) + return nullptr; + + SmallVector Annotations; + for (const auto *I : D->specific_attrs()) { + llvm::Metadata *Ops[2] = { + llvm::MDString::get(CGM.getLLVMContext(), StringRef("btf_tag")), + llvm::MDString::get(CGM.getLLVMContext(), I->getBTFTag())}; + Annotations.push_back(llvm::MDNode::get(CGM.getLLVMContext(), Ops)); + } + return DBuilder.getOrCreateArray(Annotations); +} + llvm::DIType *CGDebugInfo::getOrCreateVTablePtrType(llvm::DIFile *Unit) { if (VTablePtrType) return VTablePtrType; @@ -3442,9 +3457,10 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) { Flags |= llvm::DINode::FlagExportSymbols; } + llvm::DINodeArray Annotations = CollectBTFTagAnnotations(D); llvm::DICompositeType *RealDecl = DBuilder.createReplaceableCompositeType( getTagForRecord(RD), RDName, RDContext, DefUnit, Line, 0, Size, Align, - Flags, Identifier); + Flags, Identifier, Annotations); // Elements of composite types usually have back to the type, creating // uniquing cycles. Distinct nodes are more efficient. diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h index f3322fa91f57f..2cccc392658cf 100644 --- a/clang/lib/CodeGen/CGDebugInfo.h +++ b/clang/lib/CodeGen/CGDebugInfo.h @@ -297,11 +297,15 @@ class CGDebugInfo { CollectCXXTemplateParams(const ClassTemplateSpecializationDecl *TS, llvm::DIFile *F); + /// A helper function to collect debug info for btf_tag annotations. + llvm::DINodeArray CollectBTFTagAnnotations(const Decl *D); + llvm::DIType *createFieldType(StringRef name, QualType type, SourceLocation loc, AccessSpecifier AS, uint64_t offsetInBits, uint32_t AlignInBits, llvm::DIFile *tunit, llvm::DIScope *scope, - const RecordDecl *RD = nullptr); + const RecordDecl *RD = nullptr, + llvm::DINodeArray Annotations = nullptr); llvm::DIType *createFieldType(StringRef name, QualType type, SourceLocation loc, AccessSpecifier AS, diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index d70cbb64de84c..7453a923042b7 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -3499,7 +3499,7 @@ void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked, if (!CGM.getCodeGenOpts().TrapFuncName.empty()) { auto A = llvm::Attribute::get(getLLVMContext(), "trap-func-name", CGM.getCodeGenOpts().TrapFuncName); - TrapCall->addAttribute(llvm::AttributeList::FunctionIndex, A); + TrapCall->addFnAttr(A); } TrapCall->setDoesNotReturn(); TrapCall->setDoesNotThrow(); @@ -3523,7 +3523,7 @@ llvm::CallInst *CodeGenFunction::EmitTrapCall(llvm::Intrinsic::ID IntrID) { if (!CGM.getCodeGenOpts().TrapFuncName.empty()) { auto A = llvm::Attribute::get(getLLVMContext(), "trap-func-name", CGM.getCodeGenOpts().TrapFuncName); - TrapCall->addAttribute(llvm::AttributeList::FunctionIndex, A); + TrapCall->addFnAttr(A); } return TrapCall; diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp index 96cf977ca2901..d86ae7ad17d68 100644 --- a/clang/lib/CodeGen/CGExprCXX.cpp +++ b/clang/lib/CodeGen/CGExprCXX.cpp @@ -1323,8 +1323,7 @@ static RValue EmitNewDeleteCall(CodeGenFunction &CGF, llvm::Function *Fn = dyn_cast(CalleePtr); if (CalleeDecl->isReplaceableGlobalAllocationFunction() && Fn && Fn->hasFnAttribute(llvm::Attribute::NoBuiltin)) { - CallOrInvoke->addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::Builtin); + CallOrInvoke->addFnAttr(llvm::Attribute::Builtin); } return RV; diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index b66d8d0bf869c..962ab55ee35d1 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -4806,11 +4806,8 @@ Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) { // vector to get a vec4, then a bitcast if the target type is different. if (NumElementsSrc == 3 && NumElementsDst != 3) { Src = ConvertVec3AndVec4(Builder, CGF, Src, 4); - - if (!CGF.CGM.getCodeGenOpts().PreserveVec3Type) { - Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src, - DstTy); - } + Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src, + DstTy); Src->setName("astype"); return Src; diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 90fcf2232be2f..5718546b3bb67 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -9435,34 +9435,50 @@ static void emitNonContiguousDescriptor( } } +// Try to extract the base declaration from a `this->x` expression if possible. +static ValueDecl *getDeclFromThisExpr(const Expr *E) { + if (!E) + return nullptr; + + if (const auto *OASE = dyn_cast(E->IgnoreParenCasts())) + if (const MemberExpr *ME = + dyn_cast(OASE->getBase()->IgnoreParenImpCasts())) + return ME->getMemberDecl(); + return nullptr; +} + /// Emit a string constant containing the names of the values mapped to the /// offloading runtime library. llvm::Constant * emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, MappableExprsHandler::MappingExprInfo &MapExprs) { - llvm::Constant *SrcLocStr; - if (!MapExprs.getMapDecl()) { - SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); + + if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) + return OMPBuilder.getOrCreateDefaultSrcLocStr(); + + SourceLocation Loc; + if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { + if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) + Loc = VD->getLocation(); + else + Loc = MapExprs.getMapExpr()->getExprLoc(); } else { - std::string ExprName = ""; - if (MapExprs.getMapExpr()) { - PrintingPolicy P(CGF.getContext().getLangOpts()); - llvm::raw_string_ostream OS(ExprName); - MapExprs.getMapExpr()->printPretty(OS, nullptr, P); - OS.flush(); - } else { - ExprName = MapExprs.getMapDecl()->getNameAsString(); - } + Loc = MapExprs.getMapDecl()->getLocation(); + } - SourceLocation Loc = MapExprs.getMapDecl()->getLocation(); - PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); - const char *FileName = PLoc.getFilename(); - unsigned Line = PLoc.getLine(); - unsigned Column = PLoc.getColumn(); - SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(), - Line, Column); + std::string ExprName = ""; + if (MapExprs.getMapExpr()) { + PrintingPolicy P(CGF.getContext().getLangOpts()); + llvm::raw_string_ostream OS(ExprName); + MapExprs.getMapExpr()->printPretty(OS, nullptr, P); + OS.flush(); + } else { + ExprName = MapExprs.getMapDecl()->getNameAsString(); } - return SrcLocStr; + + PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); + return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName.c_str(), + PLoc.getLine(), PLoc.getColumn()); } /// Emit the arrays used to pass the captures and map information to the diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp index 33d4ab838af12..cac5faaa8d0f2 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp @@ -20,6 +20,7 @@ #include "clang/AST/StmtVisitor.h" #include "clang/Basic/Cuda.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Frontend/OpenMP/OMPGridValues.h" #include "llvm/IR/IntrinsicsAMDGPU.h" using namespace clang; @@ -35,7 +36,7 @@ CGOpenMPRuntimeAMDGCN::CGOpenMPRuntimeAMDGCN(CodeGenModule &CGM) llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUWarpSize(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; // return constant compile-time target-specific warp size - unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size); + unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size; return Bld.getInt32(WarpSize); } diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 63fecedc6fb77..b13d55994ef68 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -339,7 +339,7 @@ class CheckVarsEscapingDeclContext final assert(!GlobalizedRD && "Record for globalized variables is built already."); ArrayRef EscapedDeclsForParallel, EscapedDeclsForTeams; - unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size); + unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size; if (IsInTTDRegion) EscapedDeclsForTeams = EscapedDecls.getArrayRef(); else @@ -535,8 +535,7 @@ class CheckVarsEscapingDeclContext final /// on the NVPTX device, to generate more efficient code. static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; - unsigned LaneIDBits = - CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size_Log2); + unsigned LaneIDBits = CGF.getTarget().getGridValue().GV_Warp_Size_Log2; auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); return Bld.CreateAShr(RT.getGPUThreadID(CGF), LaneIDBits, "nvptx_warp_id"); } @@ -546,8 +545,8 @@ static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) { /// on the NVPTX device, to generate more efficient code. static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; - unsigned LaneIDMask = CGF.getContext().getTargetInfo().getGridValue( - llvm::omp::GV_Warp_Size_Log2_Mask); + unsigned LaneIDMask = + CGF.getContext().getTargetInfo().getGridValue().GV_Warp_Size_Log2_Mask; auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); return Bld.CreateAnd(RT.getGPUThreadID(CGF), Bld.getInt32(LaneIDMask), "nvptx_lane_id"); @@ -1308,7 +1307,7 @@ llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction( const RecordDecl *GlobalizedRD = nullptr; llvm::SmallVector LastPrivatesReductions; llvm::SmallDenseMap MappedDeclsFields; - unsigned WarpSize = CGM.getTarget().getGridValue(llvm::omp::GV_Warp_Size); + unsigned WarpSize = CGM.getTarget().getGridValue().GV_Warp_Size; // Globalize team reductions variable unconditionally in all modes. if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD) getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions); @@ -2089,7 +2088,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, "__openmp_nvptx_data_transfer_temporary_storage"; llvm::GlobalVariable *TransferMedium = M.getGlobalVariable(TransferMediumName); - unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size); + unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size; if (!TransferMedium) { auto *Ty = llvm::ArrayType::get(CGM.Int32Ty, WarpSize); unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared); diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h index b5f1b843c46b8..5d3b711e6d4b5 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h @@ -17,7 +17,6 @@ #include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" #include "clang/AST/StmtOpenMP.h" -#include "llvm/Frontend/OpenMP/OMPGridValues.h" namespace clang { namespace CodeGen { diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 0a3a722fa6533..4ff1f7b3a85b9 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -2188,20 +2188,16 @@ static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect, CodeGenFunction &CGF, std::vector &RegResults) { if (!HasUnwindClobber) - Result.addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::NoUnwind); + Result.addFnAttr(llvm::Attribute::NoUnwind); if (NoMerge) - Result.addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::NoMerge); + Result.addFnAttr(llvm::Attribute::NoMerge); // Attach readnone and readonly attributes. if (!HasSideEffect) { if (ReadNone) - Result.addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::ReadNone); + Result.addFnAttr(llvm::Attribute::ReadNone); else if (ReadOnly) - Result.addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::ReadOnly); + Result.addFnAttr(llvm::Attribute::ReadOnly); } // Slap the source location of the inline asm into a !srcloc metadata on the @@ -2223,8 +2219,7 @@ static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect, // convergent (meaning, they may call an intrinsically convergent op, such // as bar.sync, and so can't have certain optimizations applied around // them). - Result.addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::Convergent); + Result.addFnAttr(llvm::Attribute::Convergent); // Extract all of the register value results from the asm. if (ResultRegTypes.size() == 1) { RegResults.push_back(&Result); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 2328dd8198426..b96515093a7f3 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -309,8 +309,8 @@ llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) { VlaSizePair VlaSize = getVLASize(VAT); Ty = VlaSize.Type; - Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) - : VlaSize.NumElts; + Size = + Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) : VlaSize.NumElts; } SizeInChars = C.getTypeSizeInChars(Ty); if (SizeInChars.isZero()) @@ -498,9 +498,8 @@ static llvm::Function *emitOutlinedFunctionPrologue( : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); ++I; } - Args.append( - std::next(CD->param_begin(), CD->getContextParamPosition() + 1), - CD->param_end()); + Args.append(std::next(CD->param_begin(), CD->getContextParamPosition() + 1), + CD->param_end()); TargetArgs.append( std::next(CD->param_begin(), CD->getContextParamPosition() + 1), CD->param_end()); @@ -672,9 +671,9 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, if (EI != VLASizes.end()) { CallArg = EI->second.second; } else { - LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), - Arg->getType(), - AlignmentSource::Decl); + LValue LV = + WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), + Arg->getType(), AlignmentSource::Decl); CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); } } @@ -719,29 +718,29 @@ void CodeGenFunction::EmitOMPAggregateAssign( CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); llvm::PHINode *SrcElementPHI = - Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); + Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); SrcElementPHI->addIncoming(SrcBegin, EntryBB); Address SrcElementCurrent = Address(SrcElementPHI, SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); - llvm::PHINode *DestElementPHI = - Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); + llvm::PHINode *DestElementPHI = Builder.CreatePHI( + DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); DestElementPHI->addIncoming(DestBegin, EntryBB); Address DestElementCurrent = - Address(DestElementPHI, - DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); + Address(DestElementPHI, + DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); // Emit copy. CopyGen(DestElementCurrent, SrcElementCurrent); // Shift the address forward by one element. - llvm::Value *DestElementNext = Builder.CreateConstGEP1_32( - DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, - "omp.arraycpy.dest.element"); - llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32( - SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, - "omp.arraycpy.src.element"); + llvm::Value *DestElementNext = + Builder.CreateConstGEP1_32(DestAddr.getElementType(), DestElementPHI, + /*Idx0=*/1, "omp.arraycpy.dest.element"); + llvm::Value *SrcElementNext = + Builder.CreateConstGEP1_32(SrcAddr.getElementType(), SrcElementPHI, + /*Idx0=*/1, "omp.arraycpy.src.element"); // Check whether we've reached the end. llvm::Value *Done = Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); @@ -1004,9 +1003,9 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { LocalDeclMap.erase(VD); } else { MasterAddr = - Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) - : CGM.GetAddrOfGlobal(VD), - getContext().getDeclAlign(VD)); + Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) + : CGM.GetAddrOfGlobal(VD), + getContext().getDeclAlign(VD)); } // Get the address of the threadprivate variable. Address PrivateAddr = EmitLValue(*IRef).getAddress(*this); @@ -1077,7 +1076,7 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit( PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() { DeclRefExpr DRE(getContext(), const_cast(OrigVD), /*RefersToEnclosingVariableOrCapture=*/ - CapturedStmtInfo->lookup(OrigVD) != nullptr, + CapturedStmtInfo->lookup(OrigVD) != nullptr, (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); return EmitLValue(&DRE).getAddress(*this); }); @@ -1086,19 +1085,19 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit( // for 'firstprivate' clause. if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { const auto *VD = cast(cast(IInit)->getDecl()); - bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD, C, - OrigVD]() { - if (C->getKind() == OMPC_LASTPRIVATE_conditional) { - Address VDAddr = - CGM.getOpenMPRuntime().emitLastprivateConditionalInit(*this, - OrigVD); - setAddrOfLocalVar(VD, VDAddr); - return VDAddr; - } - // Emit private VarDecl with copy init. - EmitDecl(*VD); - return GetAddrOfLocalVar(VD); - }); + bool IsRegistered = + PrivateScope.addPrivate(OrigVD, [this, VD, C, OrigVD]() { + if (C->getKind() == OMPC_LASTPRIVATE_conditional) { + Address VDAddr = + CGM.getOpenMPRuntime().emitLastprivateConditionalInit( + *this, OrigVD); + setAddrOfLocalVar(VD, VDAddr); + return VDAddr; + } + // Emit private VarDecl with copy init. + EmitDecl(*VD); + return GetAddrOfLocalVar(VD); + }); assert(IsRegistered && "lastprivate var already registered as private"); (void)IsRegistered; @@ -1293,14 +1292,12 @@ void CodeGenFunction::EmitOMPReductionClauseInit( OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); } PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; }); - PrivateScope.addPrivate( - RHSVD, [this, PrivateVD, RHSVD, IsArray]() { - return IsArray - ? Builder.CreateElementBitCast( + PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD, IsArray]() { + return IsArray ? Builder.CreateElementBitCast( GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()), "rhs.begin") : GetAddrOfLocalVar(PrivateVD); - }); + }); } ++ILHS; ++IRHS; @@ -2114,9 +2111,10 @@ bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { CapturedStmtInfo->lookup(OrigVD) != nullptr, VD->getInit()->getType(), VK_LValue, VD->getInit()->getExprLoc()); - EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), - VD->getType()), - /*capturedByInit=*/false); + EmitExprAsInit( + &DRE, VD, + MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()), + /*capturedByInit=*/false); EmitAutoVarCleanups(Emission); } else { EmitVarDecl(*VD); @@ -2219,9 +2217,8 @@ void CodeGenFunction::EmitOMPPrivateLoopCounters( AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD); EmitAutoVarCleanups(VarEmission); LocalDeclMap.erase(PrivateVD); - (void)LoopScope.addPrivate(VD, [&VarEmission]() { - return VarEmission.getAllocatedAddress(); - }); + (void)LoopScope.addPrivate( + VD, [&VarEmission]() { return VarEmission.getAllocatedAddress(); }); if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || VD->hasGlobalStorage()) { (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() { @@ -2273,7 +2270,7 @@ static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, // Create temp loop control variables with their init values to support // non-rectangular loops. CodeGenFunction::OMPMapVars PreCondVars; - for (const Expr * E: S.dependent_counters()) { + for (const Expr *E : S.dependent_counters()) { if (!E) continue; assert(!E->getType().getNonReferenceType()->isRecordType() && @@ -2733,12 +2730,10 @@ void CodeGenFunction::EmitOMPForOuterLoop( CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). - const bool DynamicOrOrdered = - Ordered || RT.isDynamic(ScheduleKind.Schedule); + const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind.Schedule); - assert((Ordered || - !RT.isStaticNonchunked(ScheduleKind.Schedule, - LoopArgs.Chunk != nullptr)) && + assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule, + LoopArgs.Chunk != nullptr)) && "static non-chunked schedule does not need outer loop"); // Emit outer loop. @@ -3058,15 +3053,15 @@ void CodeGenFunction::EmitOMPTargetSimdDirective( } namespace { - struct ScheduleKindModifiersTy { - OpenMPScheduleClauseKind Kind; - OpenMPScheduleClauseModifier M1; - OpenMPScheduleClauseModifier M2; - ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, - OpenMPScheduleClauseModifier M1, - OpenMPScheduleClauseModifier M2) - : Kind(Kind), M1(M1), M2(M2) {} - }; +struct ScheduleKindModifiersTy { + OpenMPScheduleClauseKind Kind; + OpenMPScheduleClauseModifier M1; + OpenMPScheduleClauseModifier M2; + ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, + OpenMPScheduleClauseModifier M1, + OpenMPScheduleClauseModifier M2) + : Kind(Kind), M1(M1), M2(M2) {} +}; } // namespace bool CodeGenFunction::EmitOMPWorksharingLoop( @@ -3186,8 +3181,10 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( // If the static schedule kind is specified or if the ordered clause is // specified, and if no monotonic modifier is specified, the effect will // be as if the monotonic modifier was specified. - bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule, - /* Chunked */ Chunk != nullptr) && HasChunkSizeOne && + bool StaticChunkedOne = + RT.isStaticChunked(ScheduleKind.Schedule, + /* Chunked */ Chunk != nullptr) && + HasChunkSizeOne && isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); bool IsMonotonic = Ordered || @@ -4442,7 +4439,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( const auto *OrigVD = cast(Pair.second->getDecl()); DeclRefExpr DRE(CGF.getContext(), const_cast(OrigVD), /*RefersToEnclosingVariableOrCapture=*/ - CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, + CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); Scope.addPrivate(Pair.first, [&CGF, &DRE]() { @@ -5170,8 +5167,8 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, // iteration space is divided into chunks that are approximately equal // in size, and at most one chunk is distributed to each team of the // league. The size of the chunks is unspecified in this case. - bool StaticChunked = RT.isStaticChunked( - ScheduleKind, /* Chunked */ Chunk != nullptr) && + bool StaticChunked = + RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) && isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); if (RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) || @@ -5942,8 +5939,7 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, return; } - auto LPCRegion = - CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); + auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); llvm::Function *Fn = nullptr; llvm::Constant *FnID = nullptr; @@ -6473,7 +6469,8 @@ void CodeGenFunction::EmitOMPUseDevicePtrClause( auto OrigVarIt = C.varlist_begin(); auto InitIt = C.inits().begin(); for (const Expr *PvtVarIt : C.private_copies()) { - const auto *OrigVD = cast(cast(*OrigVarIt)->getDecl()); + const auto *OrigVD = + cast(cast(*OrigVarIt)->getDecl()); const auto *InitVD = cast(cast(*InitIt)->getDecl()); const auto *PvtVD = cast(cast(PvtVarIt)->getDecl()); @@ -6496,31 +6493,30 @@ void CodeGenFunction::EmitOMPUseDevicePtrClause( if (InitAddrIt == CaptureDeviceAddrMap.end()) continue; - bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, OrigVD, - InitAddrIt, InitVD, - PvtVD]() { - // Initialize the temporary initialization variable with the address we - // get from the runtime library. We have to cast the source address - // because it is always a void *. References are materialized in the - // privatization scope, so the initialization here disregards the fact - // the original variable is a reference. - QualType AddrQTy = - getContext().getPointerType(OrigVD->getType().getNonReferenceType()); - llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); - Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); - setAddrOfLocalVar(InitVD, InitAddr); - - // Emit private declaration, it will be initialized by the value we - // declaration we just added to the local declarations map. - EmitDecl(*PvtVD); - - // The initialization variables reached its purpose in the emission - // of the previous declaration, so we don't need it anymore. - LocalDeclMap.erase(InitVD); - - // Return the address of the private variable. - return GetAddrOfLocalVar(PvtVD); - }); + bool IsRegistered = PrivateScope.addPrivate( + OrigVD, [this, OrigVD, InitAddrIt, InitVD, PvtVD]() { + // Initialize the temporary initialization variable with the address + // we get from the runtime library. We have to cast the source address + // because it is always a void *. References are materialized in the + // privatization scope, so the initialization here disregards the fact + // the original variable is a reference. + QualType AddrQTy = getContext().getPointerType( + OrigVD->getType().getNonReferenceType()); + llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); + Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); + setAddrOfLocalVar(InitVD, InitAddr); + + // Emit private declaration, it will be initialized by the value we + // declaration we just added to the local declarations map. + EmitDecl(*PvtVD); + + // The initialization variables reached its purpose in the emission + // of the previous declaration, so we don't need it anymore. + LocalDeclMap.erase(InitVD); + + // Return the address of the private variable. + return GetAddrOfLocalVar(PvtVD); + }); assert(IsRegistered && "firstprivate var already registered as private"); // Silence the warning about unused variable. (void)IsRegistered; @@ -6881,11 +6877,11 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { // TODO: Check if we should emit tied or untied task. Data.Tied = true; // Set scheduling for taskloop - if (const auto* Clause = S.getSingleClause()) { + if (const auto *Clause = S.getSingleClause()) { // grainsize clause Data.Schedule.setInt(/*IntVal=*/false); Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); - } else if (const auto* Clause = S.getSingleClause()) { + } else if (const auto *Clause = S.getSingleClause()) { // num_tasks clause Data.Schedule.setInt(/*IntVal=*/true); Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 21169e4ffaa78..d3fefb41ef88e 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -383,6 +383,9 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { "__cyg_profile_func_exit"); } + if (ShouldSkipSanitizerInstrumentation()) + CurFn->addFnAttr(llvm::Attribute::DisableSanitizerInstrumentation); + // Emit debug descriptor for function end. if (CGDebugInfo *DI = getDebugInfo()) DI->EmitFunctionEnd(Builder, CurFn); @@ -486,11 +489,13 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { // function. CurFn->addFnAttr("min-legal-vector-width", llvm::utostr(LargestVectorWidth)); - // Add vscale attribute if appropriate. - if (getLangOpts().ArmSveVectorBits) { - unsigned VScale = getLangOpts().ArmSveVectorBits / 128; - CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(getLLVMContext(), - VScale, VScale)); + // Add vscale_range attribute if appropriate. + Optional> VScaleRange = + getContext().getTargetInfo().getVScaleRange(getLangOpts()); + if (VScaleRange) { + CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs( + getLLVMContext(), VScaleRange.getValue().first, + VScaleRange.getValue().second)); } // If we generated an unreachable return block, delete it now. @@ -519,6 +524,12 @@ bool CodeGenFunction::ShouldInstrumentFunction() { return true; } +bool CodeGenFunction::ShouldSkipSanitizerInstrumentation() { + if (!CurFuncDecl) + return false; + return CurFuncDecl->hasAttr(); +} + /// ShouldXRayInstrument - Return true if the current function should be /// instrumented with XRay nop sleds. bool CodeGenFunction::ShouldXRayInstrumentFunction() const { diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 0c451aef8a950..6fca0eb6933e6 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -2286,6 +2286,10 @@ class CodeGenFunction : public CodeGenTypeCache { /// instrumented with __cyg_profile_func_* calls bool ShouldInstrumentFunction(); + /// ShouldSkipSanitizerInstrumentation - Return true if the current function + /// should not be instrumented with sanitizers. + bool ShouldSkipSanitizerInstrumentation(); + /// ShouldXRayInstrument - Return true if the current function should be /// instrumented with XRay nop sleds. bool ShouldXRayInstrumentFunction() const; @@ -4126,30 +4130,30 @@ class CodeGenFunction : public CodeGenTypeCache { /// SVEBuiltinMemEltTy - Returns the memory element type for this memory /// access builtin. Only required if it can't be inferred from the base /// pointer operand. - llvm::Type *SVEBuiltinMemEltTy(SVETypeFlags TypeFlags); + llvm::Type *SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags); - SmallVector getSVEOverloadTypes(SVETypeFlags TypeFlags, - llvm::Type *ReturnType, - ArrayRef Ops); - llvm::Type *getEltType(SVETypeFlags TypeFlags); + SmallVector + getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, + ArrayRef Ops); + llvm::Type *getEltType(const SVETypeFlags &TypeFlags); llvm::ScalableVectorType *getSVEType(const SVETypeFlags &TypeFlags); - llvm::ScalableVectorType *getSVEPredType(SVETypeFlags TypeFlags); - llvm::Value *EmitSVEAllTruePred(SVETypeFlags TypeFlags); + llvm::ScalableVectorType *getSVEPredType(const SVETypeFlags &TypeFlags); + llvm::Value *EmitSVEAllTruePred(const SVETypeFlags &TypeFlags); llvm::Value *EmitSVEDupX(llvm::Value *Scalar); llvm::Value *EmitSVEDupX(llvm::Value *Scalar, llvm::Type *Ty); llvm::Value *EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty); - llvm::Value *EmitSVEPMull(SVETypeFlags TypeFlags, + llvm::Value *EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl &Ops, unsigned BuiltinID); - llvm::Value *EmitSVEMovl(SVETypeFlags TypeFlags, + llvm::Value *EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef Ops, unsigned BuiltinID); llvm::Value *EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy); - llvm::Value *EmitSVEGatherLoad(SVETypeFlags TypeFlags, + llvm::Value *EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl &Ops, unsigned IntID); - llvm::Value *EmitSVEScatterStore(SVETypeFlags TypeFlags, + llvm::Value *EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl &Ops, unsigned IntID); llvm::Value *EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, @@ -4158,15 +4162,16 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::Value *EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl &Ops, unsigned BuiltinID); - llvm::Value *EmitSVEPrefetchLoad(SVETypeFlags TypeFlags, + llvm::Value *EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned BuiltinID); - llvm::Value *EmitSVEGatherPrefetch(SVETypeFlags TypeFlags, + llvm::Value *EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID); - llvm::Value *EmitSVEStructLoad(SVETypeFlags TypeFlags, - SmallVectorImpl &Ops, unsigned IntID); - llvm::Value *EmitSVEStructStore(SVETypeFlags TypeFlags, + llvm::Value *EmitSVEStructLoad(const SVETypeFlags &TypeFlags, + SmallVectorImpl &Ops, + unsigned IntID); + llvm::Value *EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID); llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 0632e79324652..82cef26a49fe6 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1848,7 +1848,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining) B.addAttribute(llvm::Attribute::NoInline); - F->addAttributes(llvm::AttributeList::FunctionIndex, B); + F->addFnAttrs(B); return; } @@ -1935,7 +1935,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, B.addAttribute(llvm::Attribute::MinSize); } - F->addAttributes(llvm::AttributeList::FunctionIndex, B); + F->addFnAttrs(B); unsigned alignment = D->getMaxAlignment() / Context.getCharWidth(); if (alignment) @@ -1985,7 +1985,7 @@ void CodeGenModule::setLLVMFunctionFEnvAttributes(const FunctionDecl *D, if (D->hasAttr()) { llvm::AttrBuilder FuncAttrs; FuncAttrs.addAttribute("strictfp"); - F->addAttributes(llvm::AttributeList::FunctionIndex, FuncAttrs); + F->addFnAttrs(FuncAttrs); } } @@ -2101,8 +2101,8 @@ void CodeGenModule::setNonAliasAttributes(GlobalDecl GD, RemoveAttrs.addAttribute("target-cpu"); RemoveAttrs.addAttribute("target-features"); RemoveAttrs.addAttribute("tune-cpu"); - F->removeAttributes(llvm::AttributeList::FunctionIndex, RemoveAttrs); - F->addAttributes(llvm::AttributeList::FunctionIndex, Attrs); + F->removeFnAttrs(RemoveAttrs); + F->addFnAttrs(Attrs); } } @@ -2185,7 +2185,7 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, F->arg_begin()->getType() ->canLosslesslyBitCastTo(F->getReturnType()) && "unexpected this return"); - F->addAttribute(1, llvm::Attribute::Returned); + F->addParamAttr(0, llvm::Attribute::Returned); } // Only a few attributes are set on declarations; these may later be @@ -2211,15 +2211,13 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, assert(HasBody && "Inline builtin declarations should always have an " "available body!"); if (shouldEmitFunction(FDBody)) - F->addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::NoBuiltin); + F->addFnAttr(llvm::Attribute::NoBuiltin); } if (FD->isReplaceableGlobalAllocationFunction()) { // A replaceable global allocation function does not act like a builtin by // default, only if it is invoked by a new-expression or delete-expression. - F->addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::NoBuiltin); + F->addFnAttr(llvm::Attribute::NoBuiltin); } if (isa(FD) || isa(FD)) @@ -3736,9 +3734,9 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( assert(F->getName() == MangledName && "name was uniqued!"); if (D) SetFunctionAttributes(GD, F, IsIncompleteFunction, IsThunk); - if (ExtraAttrs.hasAttributes(llvm::AttributeList::FunctionIndex)) { + if (ExtraAttrs.hasFnAttrs()) { llvm::AttrBuilder B(ExtraAttrs, llvm::AttributeList::FunctionIndex); - F->addAttributes(llvm::AttributeList::FunctionIndex, B); + F->addFnAttrs(B); } if (!DontDefer) { @@ -3884,8 +3882,7 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name, bool AssumeConvergent) { if (AssumeConvergent) { ExtraAttrs = - ExtraAttrs.addAttribute(VMContext, llvm::AttributeList::FunctionIndex, - llvm::Attribute::Convergent); + ExtraAttrs.addFnAttribute(VMContext, llvm::Attribute::Convergent); } llvm::Constant *C = @@ -5034,7 +5031,7 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old, } // Add any parameter attributes. - newArgAttrs.push_back(oldAttrs.getParamAttributes(argNo)); + newArgAttrs.push_back(oldAttrs.getParamAttrs(argNo)); argNo++; } if (dontTransform) @@ -5062,9 +5059,9 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old, if (!newCall->getType()->isVoidTy()) newCall->takeName(callSite); - newCall->setAttributes(llvm::AttributeList::get( - newFn->getContext(), oldAttrs.getFnAttributes(), - oldAttrs.getRetAttributes(), newArgAttrs)); + newCall->setAttributes( + llvm::AttributeList::get(newFn->getContext(), oldAttrs.getFnAttrs(), + oldAttrs.getRetAttrs(), newArgAttrs)); newCall->setCallingConv(callSite->getCallingConv()); // Finally, remove the old call, replacing any uses with the new one. diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index f752347a90ac9..4005c5c731ed6 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -827,19 +827,19 @@ class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo { llvm::Function *Fn = cast(GV); llvm::AttrBuilder B; B.addAttribute("wasm-import-module", Attr->getImportModule()); - Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + Fn->addFnAttrs(B); } if (const auto *Attr = FD->getAttr()) { llvm::Function *Fn = cast(GV); llvm::AttrBuilder B; B.addAttribute("wasm-import-name", Attr->getImportName()); - Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + Fn->addFnAttrs(B); } if (const auto *Attr = FD->getAttr()) { llvm::Function *Fn = cast(GV); llvm::AttrBuilder B; B.addAttribute("wasm-export-name", Attr->getExportName()); - Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + Fn->addFnAttrs(B); } } @@ -1524,6 +1524,14 @@ ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, if (isEmptyRecord(getContext(), RetTy, true)) return ABIArgInfo::getIgnore(); + // Return complex of _Float16 as <2 x half> so the backend will use xmm0. + if (const ComplexType *CT = RetTy->getAs()) { + QualType ET = getContext().getCanonicalType(CT->getElementType()); + if (ET->isFloat16Type()) + return ABIArgInfo::getDirect(llvm::FixedVectorType::get( + llvm::Type::getHalfTy(getVMContext()), 2)); + } + // Small structures which are register sized are generally returned // in a register. if (shouldReturnTypeInRegister(RetTy, getContext())) { @@ -6393,7 +6401,7 @@ class ARMTargetCodeGenInfo : public TargetCodeGenInfo { // the backend to perform a realignment as part of the function prologue. llvm::AttrBuilder B; B.addStackAlignmentAttr(8); - Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + Fn->addFnAttrs(B); } }; diff --git a/clang/lib/Driver/Distro.cpp b/clang/lib/Driver/Distro.cpp index c4cf4e48b5b8d..cdb5a1725750f 100644 --- a/clang/lib/Driver/Distro.cpp +++ b/clang/lib/Driver/Distro.cpp @@ -150,6 +150,8 @@ static Distro::DistroType DetectDistro(llvm::vfs::FileSystem &VFS) { return Distro::DebianBuster; case 11: return Distro::DebianBullseye; + case 12: + return Distro::DebianBookworm; default: return Distro::UnknownDistro; } @@ -161,6 +163,7 @@ static Distro::DistroType DetectDistro(llvm::vfs::FileSystem &VFS) { .Case("stretch/sid", Distro::DebianStretch) .Case("buster/sid", Distro::DebianBuster) .Case("bullseye/sid", Distro::DebianBullseye) + .Case("bookworm/sid", Distro::DebianBookworm) .Default(Distro::UnknownDistro); } diff --git a/clang/lib/Driver/ToolChains/AVR.cpp b/clang/lib/Driver/ToolChains/AVR.cpp index cebf9d13a4ce0..5a12406a51cc6 100644 --- a/clang/lib/Driver/ToolChains/AVR.cpp +++ b/clang/lib/Driver/ToolChains/AVR.cpp @@ -453,11 +453,21 @@ void AVR::Linker::ConstructJob(Compilation &C, const JobAction &JA, } llvm::Optional AVRToolChain::findAVRLibcInstallation() const { + // Search avr-libc installation according to avr-gcc installation. + std::string GCCParent(GCCInstallation.getParentLibPath()); + std::string Path(GCCParent + "/avr"); + if (llvm::sys::fs::is_directory(Path)) + return Path; + Path = GCCParent + "/../avr"; + if (llvm::sys::fs::is_directory(Path)) + return Path; + + // Search avr-libc installation from possible locations, and return the first + // one that exists, if there is no avr-gcc installed. for (StringRef PossiblePath : PossibleAVRLibcLocations) { std::string Path = getDriver().SysRoot + PossiblePath.str(); - // Return the first avr-libc installation that exists. if (llvm::sys::fs::is_directory(Path)) - return Optional(Path); + return Path; } return llvm::None; diff --git a/clang/lib/Driver/ToolChains/Hexagon.cpp b/clang/lib/Driver/ToolChains/Hexagon.cpp index 314d0efce4414..5f5964ec982b6 100644 --- a/clang/lib/Driver/ToolChains/Hexagon.cpp +++ b/clang/lib/Driver/ToolChains/Hexagon.cpp @@ -146,6 +146,8 @@ void hexagon::Assembler::ConstructJob(Compilation &C, const JobAction &JA, "-mcpu=hexagon" + toolchains::HexagonToolChain::GetTargetCPUVersion(Args))); + addSanitizerRuntimes(HTC, Args, CmdArgs); + if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); @@ -223,6 +225,8 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA, bool UseShared = IsShared && !IsStatic; StringRef CpuVer = toolchains::HexagonToolChain::GetTargetCPUVersion(Args); + bool NeedsSanitizerDeps = addSanitizerRuntimes(HTC, Args, CmdArgs); + //---------------------------------------------------------------------------- // Silence warnings for various options //---------------------------------------------------------------------------- @@ -288,6 +292,12 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA, AddLinkerInputs(HTC, Inputs, Args, CmdArgs, JA); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { + if (NeedsSanitizerDeps) { + linkSanitizerRuntimeDeps(HTC, CmdArgs); + + CmdArgs.push_back("-lunwind"); + } + CmdArgs.push_back("-lclang_rt.builtins-hexagon"); CmdArgs.push_back("-lc"); } @@ -450,6 +460,13 @@ Optional HexagonToolChain::getSmallDataThreshold( return None; } +std::string HexagonToolChain::getCompilerRTPath() const { + SmallString<128> Dir(getDriver().SysRoot); + llvm::sys::path::append(Dir, "usr", "lib"); + Dir += SelectedMultilib.gccSuffix(); + return std::string(Dir.str()); +} + void HexagonToolChain::getHexagonLibraryPaths(const ArgList &Args, ToolChain::path_list &LibPaths) const { const Driver &D = getDriver(); diff --git a/clang/lib/Driver/ToolChains/Hexagon.h b/clang/lib/Driver/ToolChains/Hexagon.h index c32cb7f09591a..9dc9b3ceddde1 100644 --- a/clang/lib/Driver/ToolChains/Hexagon.h +++ b/clang/lib/Driver/ToolChains/Hexagon.h @@ -104,6 +104,8 @@ class LLVM_LIBRARY_VISIBILITY HexagonToolChain : public Linux { void getHexagonLibraryPaths(const llvm::opt::ArgList &Args, ToolChain::path_list &LibPaths) const; + std::string getCompilerRTPath() const override; + static bool isAutoHVXEnabled(const llvm::opt::ArgList &Args); static const StringRef GetDefaultCPU(); static const StringRef GetTargetCPUVersion(const llvm::opt::ArgList &Args); diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp index 7d4b67a611e7b..0bae9c759501e 100644 --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -699,6 +699,7 @@ SanitizerMask Linux::getSupportedSanitizers() const { getTriple().getArch() == llvm::Triple::thumbeb; const bool IsRISCV64 = getTriple().getArch() == llvm::Triple::riscv64; const bool IsSystemZ = getTriple().getArch() == llvm::Triple::systemz; + const bool IsHexagon = getTriple().getArch() == llvm::Triple::hexagon; SanitizerMask Res = ToolChain::getSupportedSanitizers(); Res |= SanitizerKind::Address; Res |= SanitizerKind::PointerCompare; @@ -712,7 +713,7 @@ SanitizerMask Linux::getSupportedSanitizers() const { if (IsX86_64 || IsMIPS64 || IsAArch64) Res |= SanitizerKind::DataFlow; if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsArmArch || IsPowerPC64 || - IsRISCV64 || IsSystemZ) + IsRISCV64 || IsSystemZ || IsHexagon) Res |= SanitizerKind::Leak; if (IsX86_64 || IsMIPS64 || IsAArch64 || IsPowerPC64 || IsSystemZ) Res |= SanitizerKind::Thread; @@ -721,7 +722,7 @@ SanitizerMask Linux::getSupportedSanitizers() const { if (IsX86 || IsX86_64) Res |= SanitizerKind::Function; if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsMIPS || IsArmArch || - IsPowerPC64) + IsPowerPC64 || IsHexagon) Res |= SanitizerKind::Scudo; if (IsX86_64 || IsAArch64) { Res |= SanitizerKind::HWAddress; diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 0c4cacab50506..8487875064aa8 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -14,7 +14,6 @@ #include "UnwrappedLineParser.h" #include "FormatToken.h" -#include "clang/Basic/TokenKinds.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -995,6 +994,13 @@ static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, Keywords.kw_import, tok::kw_export); } +// Checks whether a token is a type in K&R C (aka C78). +static bool isC78Type(const FormatToken &Tok) { + return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long, + tok::kw_unsigned, tok::kw_float, tok::kw_double, + tok::identifier); +} + // This function checks whether a token starts the first parameter declaration // in a K&R C (aka C78) function definition, e.g.: // int f(a, b) @@ -1002,13 +1008,24 @@ static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, // { // return a + b; // } -static bool isC78ParameterDecl(const FormatToken *Tok) { - if (!Tok) +static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, + const FormatToken *FuncName) { + assert(Tok); + assert(Next); + assert(FuncName); + + if (FuncName->isNot(tok::identifier)) return false; - if (!Tok->isOneOf(tok::kw_int, tok::kw_char, tok::kw_float, tok::kw_double, - tok::kw_struct, tok::kw_union, tok::kw_long, tok::kw_short, - tok::kw_unsigned, tok::kw_register)) + const FormatToken *Prev = FuncName->Previous; + if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev))) + return false; + + if (!isC78Type(*Tok) && + !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) + return false; + + if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) return false; Tok = Tok->Previous; @@ -1369,22 +1386,20 @@ void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) { case tok::r_brace: addUnwrappedLine(); return; - case tok::l_paren: + case tok::l_paren: { parseParens(); // Break the unwrapped line if a K&R C function definition has a parameter // declaration. - if (!IsTopLevel || !Style.isCpp()) - break; - if (!Previous || Previous->isNot(tok::identifier)) + if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof)) break; - if (Previous->Previous && Previous->Previous->is(tok::at)) - break; - if (!Line->Tokens.begin()->Tok->is(tok::kw_typedef) && - isC78ParameterDecl(FormatTok)) { + const unsigned Position = Tokens->getPosition() + 1; + assert(Position < AllTokens.size()); + if (isC78ParameterDecl(FormatTok, AllTokens[Position], Previous)) { addUnwrappedLine(); return; } break; + } case tok::kw_operator: nextToken(); if (FormatTok->isBinaryOperator()) diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 9975504624fcd..5ff6fdb929405 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3171,6 +3171,8 @@ void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK, Opts.OpenCLVersion = 300; else if (LangStd == LangStandard::lang_openclcpp10) Opts.OpenCLCPlusPlusVersion = 100; + else if (LangStd == LangStandard::lang_openclcpp2021) + Opts.OpenCLCPlusPlusVersion = 202100; // OpenCL has some additional defaults. if (Opts.OpenCL) { @@ -3320,6 +3322,7 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts, case LangStandard::lang_opencl20: case LangStandard::lang_opencl30: case LangStandard::lang_openclcpp10: + case LangStandard::lang_openclcpp2021: StdOpt = OPT_cl_std_EQ; break; default: @@ -3647,6 +3650,7 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, .Cases("cl3.0", "CL3.0", LangStandard::lang_opencl30) .Cases("clc++", "CLC++", LangStandard::lang_openclcpp10) .Cases("clc++1.0", "CLC++1.0", LangStandard::lang_openclcpp10) + .Cases("clc++2021", "CLC++2021", LangStandard::lang_openclcpp2021) .Default(LangStandard::lang_unspecified); if (OpenCLLangStd == LangStandard::lang_unspecified) { diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 84e153ae12304..68f7249e6b916 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -433,11 +433,18 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, // OpenCL v1.0/1.1 s6.9, v1.2/2.0 s6.10: Preprocessor Directives and Macros. if (LangOpts.OpenCL) { if (LangOpts.CPlusPlus) { - if (LangOpts.OpenCLCPlusPlusVersion == 100) + switch (LangOpts.OpenCLCPlusPlusVersion) { + case 100: Builder.defineMacro("__OPENCL_CPP_VERSION__", "100"); - else + break; + case 202100: + Builder.defineMacro("__OPENCL_CPP_VERSION__", "202100"); + break; + default: llvm_unreachable("Unsupported C++ version for OpenCL"); + } Builder.defineMacro("__CL_CPP_VERSION_1_0__", "100"); + Builder.defineMacro("__CL_CPP_VERSION_2021__", "202100"); } else { // OpenCL v1.0 and v1.1 do not have a predefined macro to indicate the // language standard with which the program is compiled. __OPENCL_VERSION__ diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp index 1a820ad985a4c..5c5fc751179d1 100644 --- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp +++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp @@ -182,7 +182,7 @@ class PrintPPOutputPPCallbacks : public PPCallbacks { /// implicitly when at the beginning of the file. /// /// @param Tok Token where to move to. - /// @param RequiresStartOfLine Whether the next line depends on being in the + /// @param RequireStartOfLine Whether the next line depends on being in the /// first column, such as a directive. /// /// @return Whether column adjustments are necessary. diff --git a/clang/lib/Headers/__clang_hip_math.h b/clang/lib/Headers/__clang_hip_math.h index 9effaa18d3e8c..ef7e087b832ca 100644 --- a/clang/lib/Headers/__clang_hip_math.h +++ b/clang/lib/Headers/__clang_hip_math.h @@ -19,6 +19,9 @@ #endif #include #include +#ifdef __OPENMP_AMDGCN__ +#include +#endif #endif // !defined(__HIPCC_RTC__) #pragma push_macro("__DEVICE__") @@ -258,6 +261,9 @@ float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); } __DEVICE__ float frexpf(float __x, int *__nptr) { int __tmp; +#ifdef __OPENMP_AMDGCN__ +#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) +#endif float __r = __ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp); *__nptr = __tmp; @@ -343,6 +349,9 @@ long int lroundf(float __x) { return __ocml_round_f32(__x); } __DEVICE__ float modff(float __x, float *__iptr) { float __tmp; +#ifdef __OPENMP_AMDGCN__ +#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) +#endif float __r = __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); *__iptr = __tmp; @@ -423,6 +432,9 @@ float remainderf(float __x, float __y) { __DEVICE__ float remquof(float __x, float __y, int *__quo) { int __tmp; +#ifdef __OPENMP_AMDGCN__ +#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) +#endif float __r = __ocml_remquo_f32( __x, __y, (__attribute__((address_space(5))) int *)&__tmp); *__quo = __tmp; @@ -479,6 +491,9 @@ __RETURN_TYPE __signbitf(float __x) { return __ocml_signbit_f32(__x); } __DEVICE__ void sincosf(float __x, float *__sinptr, float *__cosptr) { float __tmp; +#ifdef __OPENMP_AMDGCN__ +#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) +#endif *__sinptr = __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); *__cosptr = __tmp; @@ -487,6 +502,9 @@ void sincosf(float __x, float *__sinptr, float *__cosptr) { __DEVICE__ void sincospif(float __x, float *__sinptr, float *__cosptr) { float __tmp; +#ifdef __OPENMP_AMDGCN__ +#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) +#endif *__sinptr = __ocml_sincospi_f32( __x, (__attribute__((address_space(5))) float *)&__tmp); *__cosptr = __tmp; @@ -799,6 +817,9 @@ double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); } __DEVICE__ double frexp(double __x, int *__nptr) { int __tmp; +#ifdef __OPENMP_AMDGCN__ +#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) +#endif double __r = __ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp); *__nptr = __tmp; @@ -883,6 +904,9 @@ long int lround(double __x) { return __ocml_round_f64(__x); } __DEVICE__ double modf(double __x, double *__iptr) { double __tmp; +#ifdef __OPENMP_AMDGCN__ +#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) +#endif double __r = __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp); *__iptr = __tmp; @@ -971,6 +995,9 @@ double remainder(double __x, double __y) { __DEVICE__ double remquo(double __x, double __y, int *__quo) { int __tmp; +#ifdef __OPENMP_AMDGCN__ +#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) +#endif double __r = __ocml_remquo_f64( __x, __y, (__attribute__((address_space(5))) int *)&__tmp); *__quo = __tmp; @@ -1029,6 +1056,9 @@ double sin(double __x) { return __ocml_sin_f64(__x); } __DEVICE__ void sincos(double __x, double *__sinptr, double *__cosptr) { double __tmp; +#ifdef __OPENMP_AMDGCN__ +#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) +#endif *__sinptr = __ocml_sincos_f64( __x, (__attribute__((address_space(5))) double *)&__tmp); *__cosptr = __tmp; @@ -1037,6 +1067,9 @@ void sincos(double __x, double *__sinptr, double *__cosptr) { __DEVICE__ void sincospi(double __x, double *__sinptr, double *__cosptr) { double __tmp; +#ifdef __OPENMP_AMDGCN__ +#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) +#endif *__sinptr = __ocml_sincospi_f64( __x, (__attribute__((address_space(5))) double *)&__tmp); *__cosptr = __tmp; diff --git a/clang/lib/Headers/amxintrin.h b/clang/lib/Headers/amxintrin.h index ec601a58e7c34..4940666e80836 100644 --- a/clang/lib/Headers/amxintrin.h +++ b/clang/lib/Headers/amxintrin.h @@ -314,8 +314,8 @@ typedef struct __tile1024i_str { /// \param stride /// The stride between the rows' data to be loaded in memory. __DEFAULT_FN_ATTRS_TILE -static void __tile_loadd(__tile1024i *dst, const void *base, - __SIZE_TYPE__ stride) { +static __inline__ void __tile_loadd(__tile1024i *dst, const void *base, + __SIZE_TYPE__ stride) { dst->tile = _tile_loadd_internal(dst->row, dst->col, base, stride); } @@ -335,8 +335,8 @@ static void __tile_loadd(__tile1024i *dst, const void *base, /// \param stride /// The stride between the rows' data to be loaded in memory. __DEFAULT_FN_ATTRS_TILE -static void __tile_stream_loadd(__tile1024i *dst, const void *base, - __SIZE_TYPE__ stride) { +static __inline__ void __tile_stream_loadd(__tile1024i *dst, const void *base, + __SIZE_TYPE__ stride) { dst->tile = _tile_loaddt1_internal(dst->row, dst->col, base, stride); } @@ -357,8 +357,8 @@ static void __tile_stream_loadd(__tile1024i *dst, const void *base, /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_INT8 -static void __tile_dpbssd(__tile1024i *dst, __tile1024i src0, - __tile1024i src1) { +static __inline__ void __tile_dpbssd(__tile1024i *dst, __tile1024i src0, + __tile1024i src1) { dst->tile = _tile_dpbssd_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } @@ -380,8 +380,8 @@ static void __tile_dpbssd(__tile1024i *dst, __tile1024i src0, /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_INT8 -static void __tile_dpbsud(__tile1024i *dst, __tile1024i src0, - __tile1024i src1) { +static __inline__ void __tile_dpbsud(__tile1024i *dst, __tile1024i src0, + __tile1024i src1) { dst->tile = _tile_dpbsud_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } @@ -403,8 +403,8 @@ static void __tile_dpbsud(__tile1024i *dst, __tile1024i src0, /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_INT8 -static void __tile_dpbusd(__tile1024i *dst, __tile1024i src0, - __tile1024i src1) { +static __inline__ void __tile_dpbusd(__tile1024i *dst, __tile1024i src0, + __tile1024i src1) { dst->tile = _tile_dpbusd_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } @@ -426,8 +426,8 @@ static void __tile_dpbusd(__tile1024i *dst, __tile1024i src0, /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_INT8 -static void __tile_dpbuud(__tile1024i *dst, __tile1024i src0, - __tile1024i src1) { +static __inline__ void __tile_dpbuud(__tile1024i *dst, __tile1024i src0, + __tile1024i src1) { dst->tile = _tile_dpbuud_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } @@ -446,7 +446,8 @@ static void __tile_dpbuud(__tile1024i *dst, __tile1024i src0, /// \param stride /// The stride between the rows' data to be stored in memory. __DEFAULT_FN_ATTRS_TILE -static void __tile_stored(void *base, __SIZE_TYPE__ stride, __tile1024i src) { +static __inline__ void __tile_stored(void *base, __SIZE_TYPE__ stride, + __tile1024i src) { _tile_stored_internal(src.row, src.col, base, stride, src.tile); } @@ -459,7 +460,7 @@ static void __tile_stored(void *base, __SIZE_TYPE__ stride, __tile1024i src) { /// \param dst /// The destination tile to be zero. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_TILE -static void __tile_zero(__tile1024i *dst) { +static __inline__ void __tile_zero(__tile1024i *dst) { dst->tile = __builtin_ia32_tilezero_internal(dst->row, dst->col); } @@ -479,8 +480,8 @@ static void __tile_zero(__tile1024i *dst) { /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_BF16 -static void __tile_dpbf16ps(__tile1024i *dst, __tile1024i src0, - __tile1024i src1) { +static __inline__ void __tile_dpbf16ps(__tile1024i *dst, __tile1024i src0, + __tile1024i src1) { dst->tile = _tile_dpbf16ps_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 4281a33d375c2..6aee8aed84871 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -178,16 +178,16 @@ _kadd_mask64(__mmask64 __A, __mmask64 __B) } #define _kshiftli_mask32(A, I) \ - (__mmask32)__builtin_ia32_kshiftlisi((__mmask32)(A), (unsigned int)(I)) + ((__mmask32)__builtin_ia32_kshiftlisi((__mmask32)(A), (unsigned int)(I))) #define _kshiftri_mask32(A, I) \ - (__mmask32)__builtin_ia32_kshiftrisi((__mmask32)(A), (unsigned int)(I)) + ((__mmask32)__builtin_ia32_kshiftrisi((__mmask32)(A), (unsigned int)(I))) #define _kshiftli_mask64(A, I) \ - (__mmask64)__builtin_ia32_kshiftlidi((__mmask64)(A), (unsigned int)(I)) + ((__mmask64)__builtin_ia32_kshiftlidi((__mmask64)(A), (unsigned int)(I))) #define _kshiftri_mask64(A, I) \ - (__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I)) + ((__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I))) static __inline__ unsigned int __DEFAULT_FN_ATTRS _cvtmask32_u32(__mmask32 __A) { @@ -232,44 +232,44 @@ _store_mask64(__mmask64 *__A, __mmask64 __B) { /* Integer compare */ #define _mm512_cmp_epi8_mask(a, b, p) \ - (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ - (__v64qi)(__m512i)(b), (int)(p), \ - (__mmask64)-1) + ((__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ + (__v64qi)(__m512i)(b), (int)(p), \ + (__mmask64)-1)) #define _mm512_mask_cmp_epi8_mask(m, a, b, p) \ - (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ - (__v64qi)(__m512i)(b), (int)(p), \ - (__mmask64)(m)) + ((__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ + (__v64qi)(__m512i)(b), (int)(p), \ + (__mmask64)(m))) #define _mm512_cmp_epu8_mask(a, b, p) \ - (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ - (__v64qi)(__m512i)(b), (int)(p), \ - (__mmask64)-1) + ((__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ + (__v64qi)(__m512i)(b), (int)(p), \ + (__mmask64)-1)) #define _mm512_mask_cmp_epu8_mask(m, a, b, p) \ - (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ - (__v64qi)(__m512i)(b), (int)(p), \ - (__mmask64)(m)) + ((__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ + (__v64qi)(__m512i)(b), (int)(p), \ + (__mmask64)(m))) #define _mm512_cmp_epi16_mask(a, b, p) \ - (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ - (__v32hi)(__m512i)(b), (int)(p), \ - (__mmask32)-1) + ((__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ + (__v32hi)(__m512i)(b), (int)(p), \ + (__mmask32)-1)) #define _mm512_mask_cmp_epi16_mask(m, a, b, p) \ - (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ - (__v32hi)(__m512i)(b), (int)(p), \ - (__mmask32)(m)) + ((__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ + (__v32hi)(__m512i)(b), (int)(p), \ + (__mmask32)(m))) #define _mm512_cmp_epu16_mask(a, b, p) \ - (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ - (__v32hi)(__m512i)(b), (int)(p), \ - (__mmask32)-1) + ((__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ + (__v32hi)(__m512i)(b), (int)(p), \ + (__mmask32)-1)) #define _mm512_mask_cmp_epu16_mask(m, a, b, p) \ - (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ - (__v32hi)(__m512i)(b), (int)(p), \ - (__mmask32)(m)) + ((__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ + (__v32hi)(__m512i)(b), (int)(p), \ + (__mmask32)(m))) #define _mm512_cmpeq_epi8_mask(A, B) \ _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) @@ -1428,36 +1428,36 @@ _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) #define _mm512_shufflehi_epi16(A, imm) \ - (__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm)) + ((__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm))) #define _mm512_mask_shufflehi_epi16(W, U, A, imm) \ - (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_shufflehi_epi16((A), \ - (imm)), \ - (__v32hi)(__m512i)(W)) + ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_shufflehi_epi16((A), \ + (imm)), \ + (__v32hi)(__m512i)(W))) #define _mm512_maskz_shufflehi_epi16(U, A, imm) \ - (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_shufflehi_epi16((A), \ - (imm)), \ - (__v32hi)_mm512_setzero_si512()) + ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_shufflehi_epi16((A), \ + (imm)), \ + (__v32hi)_mm512_setzero_si512())) #define _mm512_shufflelo_epi16(A, imm) \ - (__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm)) + ((__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm))) #define _mm512_mask_shufflelo_epi16(W, U, A, imm) \ - (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_shufflelo_epi16((A), \ - (imm)), \ - (__v32hi)(__m512i)(W)) + ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_shufflelo_epi16((A), \ + (imm)), \ + (__v32hi)(__m512i)(W))) #define _mm512_maskz_shufflelo_epi16(U, A, imm) \ - (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_shufflelo_epi16((A), \ - (imm)), \ - (__v32hi)_mm512_setzero_si512()) + ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_shufflelo_epi16((A), \ + (imm)), \ + (__v32hi)_mm512_setzero_si512())) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi16(__m512i __A, __m512i __B) @@ -1527,7 +1527,7 @@ _mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, unsigned int __B) } #define _mm512_bslli_epi128(a, imm) \ - (__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)) + ((__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi16(__m512i __A, __m512i __B) @@ -1664,7 +1664,7 @@ _mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B) } #define _mm512_bsrli_epi128(a, imm) \ - (__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)) + ((__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A) @@ -1984,32 +1984,32 @@ _mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A, } #define _mm512_alignr_epi8(A, B, N) \ - (__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \ - (__v64qi)(__m512i)(B), (int)(N)) + ((__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \ + (__v64qi)(__m512i)(B), (int)(N))) #define _mm512_mask_alignr_epi8(W, U, A, B, N) \ - (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ - (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ - (__v64qi)(__m512i)(W)) + ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ + (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ + (__v64qi)(__m512i)(W))) #define _mm512_maskz_alignr_epi8(U, A, B, N) \ - (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ + ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ - (__v64qi)(__m512i)_mm512_setzero_si512()) + (__v64qi)(__m512i)_mm512_setzero_si512())) #define _mm512_dbsad_epu8(A, B, imm) \ - (__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \ - (__v64qi)(__m512i)(B), (int)(imm)) + ((__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \ + (__v64qi)(__m512i)(B), (int)(imm))) #define _mm512_mask_dbsad_epu8(W, U, A, B, imm) \ - (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ - (__v32hi)(__m512i)(W)) + (__v32hi)(__m512i)(W))) #define _mm512_maskz_dbsad_epu8(U, A, B, imm) \ - (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ - (__v32hi)_mm512_setzero_si512()) + (__v32hi)_mm512_setzero_si512())) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sad_epu8 (__m512i __A, __m512i __B) diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index 337256c50f50d..3ba0a0cfd5fdf 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -121,10 +121,10 @@ _kadd_mask16(__mmask16 __A, __mmask16 __B) } #define _kshiftli_mask8(A, I) \ - (__mmask8)__builtin_ia32_kshiftliqi((__mmask8)(A), (unsigned int)(I)) + ((__mmask8)__builtin_ia32_kshiftliqi((__mmask8)(A), (unsigned int)(I))) #define _kshiftri_mask8(A, I) \ - (__mmask8)__builtin_ia32_kshiftriqi((__mmask8)(A), (unsigned int)(I)) + ((__mmask8)__builtin_ia32_kshiftriqi((__mmask8)(A), (unsigned int)(I))) static __inline__ unsigned int __DEFAULT_FN_ATTRS _cvtmask8_u32(__mmask8 __A) { @@ -342,19 +342,19 @@ _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) { } #define _mm512_cvt_roundpd_epi64(A, R) \ - (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1, (int)(R)) + ((__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ + (__v8di)_mm512_setzero_si512(), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) \ - (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ - (__v8di)(__m512i)(W), \ - (__mmask8)(U), (int)(R)) + ((__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ + (__v8di)(__m512i)(W), \ + (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvt_roundpd_epi64(U, A, R) \ - (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U), (int)(R)) + ((__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ + (__v8di)_mm512_setzero_si512(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epu64 (__m512d __A) { @@ -381,19 +381,19 @@ _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) { } #define _mm512_cvt_roundpd_epu64(A, R) \ - (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1, (int)(R)) + ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ + (__v8di)_mm512_setzero_si512(), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) \ - (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ - (__v8di)(__m512i)(W), \ - (__mmask8)(U), (int)(R)) + ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ + (__v8di)(__m512i)(W), \ + (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvt_roundpd_epu64(U, A, R) \ - (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U), (int)(R)) + ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ + (__v8di)_mm512_setzero_si512(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epi64 (__m256 __A) { @@ -420,19 +420,19 @@ _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) { } #define _mm512_cvt_roundps_epi64(A, R) \ - (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1, (int)(R)) + ((__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ + (__v8di)_mm512_setzero_si512(), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_cvt_roundps_epi64(W, U, A, R) \ - (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ - (__v8di)(__m512i)(W), \ - (__mmask8)(U), (int)(R)) + ((__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ + (__v8di)(__m512i)(W), \ + (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvt_roundps_epi64(U, A, R) \ - (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U), (int)(R)) + ((__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ + (__v8di)_mm512_setzero_si512(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epu64 (__m256 __A) { @@ -459,19 +459,19 @@ _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) { } #define _mm512_cvt_roundps_epu64(A, R) \ - (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1, (int)(R)) + ((__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ + (__v8di)_mm512_setzero_si512(), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_cvt_roundps_epu64(W, U, A, R) \ - (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ - (__v8di)(__m512i)(W), \ - (__mmask8)(U), (int)(R)) + ((__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ + (__v8di)(__m512i)(W), \ + (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvt_roundps_epu64(U, A, R) \ - (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U), (int)(R)) + ((__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ + (__v8di)_mm512_setzero_si512(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 @@ -494,19 +494,19 @@ _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) { } #define _mm512_cvt_roundepi64_pd(A, R) \ - (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)) + ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) \ - (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ - (__v8df)(__m512d)(W), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ + (__v8df)(__m512d)(W), \ + (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvt_roundepi64_pd(U, A, R) \ - (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_ps (__m512i __A) { @@ -533,19 +533,19 @@ _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) { } #define _mm512_cvt_roundepi64_ps(A, R) \ - (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ - (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)-1, (int)(R)) + ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ + (__v8sf)_mm256_setzero_ps(), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) \ - (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ - (__v8sf)(__m256)(W), (__mmask8)(U), \ - (int)(R)) + ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ + (__v8sf)(__m256)(W), (__mmask8)(U), \ + (int)(R))) #define _mm512_maskz_cvt_roundepi64_ps(U, A, R) \ - (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ - (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U), (int)(R)) + ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ + (__v8sf)_mm256_setzero_ps(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -573,19 +573,19 @@ _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) { } #define _mm512_cvtt_roundpd_epi64(A, R) \ - (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1, (int)(R)) + ((__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ + (__v8di)_mm512_setzero_si512(), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) \ - (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ - (__v8di)(__m512i)(W), \ - (__mmask8)(U), (int)(R)) + ((__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ + (__v8di)(__m512i)(W), \ + (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) \ - (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U), (int)(R)) + ((__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ + (__v8di)_mm512_setzero_si512(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epu64 (__m512d __A) { @@ -612,19 +612,19 @@ _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) { } #define _mm512_cvtt_roundpd_epu64(A, R) \ - (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1, (int)(R)) + ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ + (__v8di)_mm512_setzero_si512(), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) \ - (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ - (__v8di)(__m512i)(W), \ - (__mmask8)(U), (int)(R)) + ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ + (__v8di)(__m512i)(W), \ + (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) \ - (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U), (int)(R)) + ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ + (__v8di)_mm512_setzero_si512(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epi64 (__m256 __A) { @@ -651,19 +651,19 @@ _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) { } #define _mm512_cvtt_roundps_epi64(A, R) \ - (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1, (int)(R)) + ((__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ + (__v8di)_mm512_setzero_si512(), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) \ - (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ - (__v8di)(__m512i)(W), \ - (__mmask8)(U), (int)(R)) + ((__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ + (__v8di)(__m512i)(W), \ + (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvtt_roundps_epi64(U, A, R) \ - (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U), (int)(R)) + ((__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ + (__v8di)_mm512_setzero_si512(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epu64 (__m256 __A) { @@ -690,19 +690,19 @@ _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) { } #define _mm512_cvtt_roundps_epu64(A, R) \ - (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1, (int)(R)) + ((__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ + (__v8di)_mm512_setzero_si512(), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) \ - (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ - (__v8di)(__m512i)(W), \ - (__mmask8)(U), (int)(R)) + ((__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ + (__v8di)(__m512i)(W), \ + (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvtt_roundps_epu64(U, A, R) \ - (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U), (int)(R)) + ((__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ + (__v8di)_mm512_setzero_si512(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu64_pd (__m512i __A) { @@ -724,20 +724,20 @@ _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) { } #define _mm512_cvt_roundepu64_pd(A, R) \ - (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)) + ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) \ - (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ - (__v8df)(__m512d)(W), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ + (__v8df)(__m512d)(W), \ + (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvt_roundepu64_pd(U, A, R) \ - (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m256 __DEFAULT_FN_ATTRS512 @@ -765,290 +765,290 @@ _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) { } #define _mm512_cvt_roundepu64_ps(A, R) \ - (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ - (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)-1, (int)(R)) + ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ + (__v8sf)_mm256_setzero_ps(), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) \ - (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ - (__v8sf)(__m256)(W), (__mmask8)(U), \ - (int)(R)) + ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ + (__v8sf)(__m256)(W), (__mmask8)(U), \ + (int)(R))) #define _mm512_maskz_cvt_roundepu64_ps(U, A, R) \ - (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ - (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U), (int)(R)) + ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ + (__v8sf)_mm256_setzero_ps(), \ + (__mmask8)(U), (int)(R))) #define _mm512_range_pd(A, B, C) \ - (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), (int)(C), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION) + ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), (int)(C), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)-1, \ + _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_range_pd(W, U, A, B, C) \ - (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), (int)(C), \ - (__v8df)(__m512d)(W), (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION) + ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), (int)(C), \ + (__v8df)(__m512d)(W), (__mmask8)(U), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm512_maskz_range_pd(U, A, B, C) \ - (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), (int)(C), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION) + ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), (int)(C), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)(U), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm512_range_round_pd(A, B, C, R) \ - (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), (int)(C), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)) + ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), (int)(C), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_range_round_pd(W, U, A, B, C, R) \ - (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), (int)(C), \ - (__v8df)(__m512d)(W), (__mmask8)(U), \ - (int)(R)) + ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), (int)(C), \ + (__v8df)(__m512d)(W), (__mmask8)(U), \ + (int)(R))) #define _mm512_maskz_range_round_pd(U, A, B, C, R) \ - (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), (int)(C), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), (int)(C), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)(U), (int)(R))) #define _mm512_range_ps(A, B, C) \ - (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), (int)(C), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, \ - _MM_FROUND_CUR_DIRECTION) + ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), (int)(C), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)-1, \ + _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_range_ps(W, U, A, B, C) \ - (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), (int)(C), \ - (__v16sf)(__m512)(W), (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION) + ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), (int)(C), \ + (__v16sf)(__m512)(W), (__mmask16)(U), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm512_maskz_range_ps(U, A, B, C) \ - (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), (int)(C), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION) + ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), (int)(C), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)(U), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm512_range_round_ps(A, B, C, R) \ - (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), (int)(C), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)) + ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), (int)(C), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)-1, (int)(R))) #define _mm512_mask_range_round_ps(W, U, A, B, C, R) \ - (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), (int)(C), \ - (__v16sf)(__m512)(W), (__mmask16)(U), \ - (int)(R)) + ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), (int)(C), \ + (__v16sf)(__m512)(W), (__mmask16)(U), \ + (int)(R))) #define _mm512_maskz_range_round_ps(U, A, B, C, R) \ - (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), (int)(C), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), (int)(C), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)(U), (int)(R))) #define _mm_range_round_ss(A, B, C, R) \ - (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8) -1, (int)(C),\ - (int)(R)) + ((__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8) -1, (int)(C),\ + (int)(R))) #define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION) #define _mm_mask_range_round_ss(W, U, A, B, C, R) \ - (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)(__m128)(W),\ - (__mmask8)(U), (int)(C),\ - (int)(R)) + ((__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(W),\ + (__mmask8)(U), (int)(C),\ + (int)(R))) #define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION) #define _mm_maskz_range_round_ss(U, A, B, C, R) \ - (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(C),\ - (int)(R)) + ((__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U), (int)(C),\ + (int)(R))) #define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) #define _mm_range_round_sd(A, B, C, R) \ - (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8) -1, (int)(C),\ - (int)(R)) + ((__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8) -1, (int)(C),\ + (int)(R))) #define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION) #define _mm_mask_range_round_sd(W, U, A, B, C, R) \ - (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)(__m128d)(W),\ - (__mmask8)(U), (int)(C),\ - (int)(R)) + ((__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)(__m128d)(W),\ + (__mmask8)(U), (int)(C),\ + (int)(R))) #define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION) #define _mm_maskz_range_round_sd(U, A, B, C, R) \ - (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(C),\ - (int)(R)) + ((__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(U), (int)(C),\ + (int)(R))) #define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) #define _mm512_reduce_pd(A, B) \ - (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION) + ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)-1, \ + _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_reduce_pd(W, U, A, B) \ - (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ - (__v8df)(__m512d)(W), \ - (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION) + ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ + (__v8df)(__m512d)(W), \ + (__mmask8)(U), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm512_maskz_reduce_pd(U, A, B) \ - (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION) + ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)(U), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm512_reduce_ps(A, B) \ - (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, \ - _MM_FROUND_CUR_DIRECTION) + ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)-1, \ + _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_reduce_ps(W, U, A, B) \ - (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ - (__v16sf)(__m512)(W), \ - (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION) + ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ + (__v16sf)(__m512)(W), \ + (__mmask16)(U), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm512_maskz_reduce_ps(U, A, B) \ - (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION) + ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)(U), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm512_reduce_round_pd(A, B, R) \ - (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)) + ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_reduce_round_pd(W, U, A, B, R) \ - (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ - (__v8df)(__m512d)(W), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ + (__v8df)(__m512d)(W), \ + (__mmask8)(U), (int)(R))) #define _mm512_maskz_reduce_round_pd(U, A, B, R) \ - (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)(U), (int)(R))) #define _mm512_reduce_round_ps(A, B, R) \ - (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)) + ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)-1, (int)(R))) #define _mm512_mask_reduce_round_ps(W, U, A, B, R) \ - (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ - (__v16sf)(__m512)(W), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ + (__v16sf)(__m512)(W), \ + (__mmask16)(U), (int)(R))) #define _mm512_maskz_reduce_round_ps(U, A, B, R) \ - (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)(U), (int)(R))) #define _mm_reduce_ss(A, B, C) \ - (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ - (int)(C), _MM_FROUND_CUR_DIRECTION) + ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ + (int)(C), _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_reduce_ss(W, U, A, B, C) \ - (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)(__m128)(W), (__mmask8)(U), \ - (int)(C), _MM_FROUND_CUR_DIRECTION) + ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(W), (__mmask8)(U), \ + (int)(C), _MM_FROUND_CUR_DIRECTION)) #define _mm_maskz_reduce_ss(U, A, B, C) \ - (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(C), \ - _MM_FROUND_CUR_DIRECTION) + ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U), (int)(C), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm_reduce_round_ss(A, B, C, R) \ - (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ - (int)(C), (int)(R)) + ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ + (int)(C), (int)(R))) #define _mm_mask_reduce_round_ss(W, U, A, B, C, R) \ - (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)(__m128)(W), (__mmask8)(U), \ - (int)(C), (int)(R)) + ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(W), (__mmask8)(U), \ + (int)(C), (int)(R))) #define _mm_maskz_reduce_round_ss(U, A, B, C, R) \ - (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(C), (int)(R)) + ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U), (int)(C), (int)(R))) #define _mm_reduce_sd(A, B, C) \ - (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(C), \ - _MM_FROUND_CUR_DIRECTION) + ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)-1, (int)(C), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_reduce_sd(W, U, A, B, C) \ - (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)(__m128d)(W), (__mmask8)(U), \ - (int)(C), _MM_FROUND_CUR_DIRECTION) + ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)(__m128d)(W), (__mmask8)(U), \ + (int)(C), _MM_FROUND_CUR_DIRECTION)) #define _mm_maskz_reduce_sd(U, A, B, C) \ - (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(C), \ - _MM_FROUND_CUR_DIRECTION) + ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(U), (int)(C), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm_reduce_round_sd(A, B, C, R) \ - (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(C), (int)(R)) + ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)-1, (int)(C), (int)(R))) #define _mm_mask_reduce_round_sd(W, U, A, B, C, R) \ - (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)(__m128d)(W), (__mmask8)(U), \ - (int)(C), (int)(R)) + ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)(__m128d)(W), (__mmask8)(U), \ + (int)(C), (int)(R))) #define _mm_maskz_reduce_round_sd(U, A, B, C, R) \ - (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(C), (int)(R)) + ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(U), (int)(C), (int)(R))) static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_movepi32_mask (__m512i __A) @@ -1218,158 +1218,158 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) } #define _mm512_extractf32x8_ps(A, imm) \ - (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ - (__v8sf)_mm256_undefined_ps(), \ - (__mmask8)-1) + ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ + (__v8sf)_mm256_undefined_ps(), \ + (__mmask8)-1)) #define _mm512_mask_extractf32x8_ps(W, U, A, imm) \ - (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ - (__v8sf)(__m256)(W), \ - (__mmask8)(U)) + ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ + (__v8sf)(__m256)(W), \ + (__mmask8)(U))) #define _mm512_maskz_extractf32x8_ps(U, A, imm) \ - (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ - (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U)) + ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ + (__v8sf)_mm256_setzero_ps(), \ + (__mmask8)(U))) #define _mm512_extractf64x2_pd(A, imm) \ - (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ - (int)(imm), \ - (__v2df)_mm_undefined_pd(), \ - (__mmask8)-1) + ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ + (int)(imm), \ + (__v2df)_mm_undefined_pd(), \ + (__mmask8)-1)) #define _mm512_mask_extractf64x2_pd(W, U, A, imm) \ - (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ - (int)(imm), \ - (__v2df)(__m128d)(W), \ - (__mmask8)(U)) + ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ + (int)(imm), \ + (__v2df)(__m128d)(W), \ + (__mmask8)(U))) #define _mm512_maskz_extractf64x2_pd(U, A, imm) \ - (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ - (int)(imm), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U)) + ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ + (int)(imm), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(U))) #define _mm512_extracti32x8_epi32(A, imm) \ - (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ - (__v8si)_mm256_undefined_si256(), \ - (__mmask8)-1) + ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ + (__v8si)_mm256_undefined_si256(), \ + (__mmask8)-1)) #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \ - (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ - (__v8si)(__m256i)(W), \ - (__mmask8)(U)) + ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ + (__v8si)(__m256i)(W), \ + (__mmask8)(U))) #define _mm512_maskz_extracti32x8_epi32(U, A, imm) \ - (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ - (__v8si)_mm256_setzero_si256(), \ - (__mmask8)(U)) + ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ + (__v8si)_mm256_setzero_si256(), \ + (__mmask8)(U))) #define _mm512_extracti64x2_epi64(A, imm) \ - (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ + ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ (int)(imm), \ (__v2di)_mm_undefined_si128(), \ - (__mmask8)-1) + (__mmask8)-1)) #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \ - (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ - (int)(imm), \ - (__v2di)(__m128i)(W), \ - (__mmask8)(U)) + ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ + (int)(imm), \ + (__v2di)(__m128i)(W), \ + (__mmask8)(U))) #define _mm512_maskz_extracti64x2_epi64(U, A, imm) \ - (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ - (int)(imm), \ - (__v2di)_mm_setzero_si128(), \ - (__mmask8)(U)) + ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ + (int)(imm), \ + (__v2di)_mm_setzero_si128(), \ + (__mmask8)(U))) #define _mm512_insertf32x8(A, B, imm) \ - (__m512)__builtin_ia32_insertf32x8((__v16sf)(__m512)(A), \ - (__v8sf)(__m256)(B), (int)(imm)) + ((__m512)__builtin_ia32_insertf32x8((__v16sf)(__m512)(A), \ + (__v8sf)(__m256)(B), (int)(imm))) #define _mm512_mask_insertf32x8(W, U, A, B, imm) \ - (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ - (__v16sf)(__m512)(W)) + (__v16sf)(__m512)(W))) #define _mm512_maskz_insertf32x8(U, A, B, imm) \ - (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ - (__v16sf)_mm512_setzero_ps()) + (__v16sf)_mm512_setzero_ps())) #define _mm512_insertf64x2(A, B, imm) \ - (__m512d)__builtin_ia32_insertf64x2_512((__v8df)(__m512d)(A), \ - (__v2df)(__m128d)(B), (int)(imm)) + ((__m512d)__builtin_ia32_insertf64x2_512((__v8df)(__m512d)(A), \ + (__v2df)(__m128d)(B), (int)(imm))) #define _mm512_mask_insertf64x2(W, U, A, B, imm) \ - (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ - (__v8df)(__m512d)(W)) + (__v8df)(__m512d)(W))) #define _mm512_maskz_insertf64x2(U, A, B, imm) \ - (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ - (__v8df)_mm512_setzero_pd()) + (__v8df)_mm512_setzero_pd())) #define _mm512_inserti32x8(A, B, imm) \ - (__m512i)__builtin_ia32_inserti32x8((__v16si)(__m512i)(A), \ - (__v8si)(__m256i)(B), (int)(imm)) + ((__m512i)__builtin_ia32_inserti32x8((__v16si)(__m512i)(A), \ + (__v8si)(__m256i)(B), (int)(imm))) #define _mm512_mask_inserti32x8(W, U, A, B, imm) \ - (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_inserti32x8((A), (B), (imm)), \ - (__v16si)(__m512i)(W)) + (__v16si)(__m512i)(W))) #define _mm512_maskz_inserti32x8(U, A, B, imm) \ - (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_inserti32x8((A), (B), (imm)), \ - (__v16si)_mm512_setzero_si512()) + (__v16si)_mm512_setzero_si512())) #define _mm512_inserti64x2(A, B, imm) \ - (__m512i)__builtin_ia32_inserti64x2_512((__v8di)(__m512i)(A), \ - (__v2di)(__m128i)(B), (int)(imm)) + ((__m512i)__builtin_ia32_inserti64x2_512((__v8di)(__m512i)(A), \ + (__v2di)(__m128i)(B), (int)(imm))) #define _mm512_mask_inserti64x2(W, U, A, B, imm) \ - (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_inserti64x2((A), (B), (imm)), \ - (__v8di)(__m512i)(W)) + (__v8di)(__m512i)(W))) #define _mm512_maskz_inserti64x2(U, A, B, imm) \ - (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_inserti64x2((A), (B), (imm)), \ - (__v8di)_mm512_setzero_si512()) + (__v8di)_mm512_setzero_si512())) #define _mm512_mask_fpclass_ps_mask(U, A, imm) \ - (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ - (int)(imm), (__mmask16)(U)) + ((__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ + (int)(imm), (__mmask16)(U))) #define _mm512_fpclass_ps_mask(A, imm) \ - (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ - (int)(imm), (__mmask16)-1) + ((__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ + (int)(imm), (__mmask16)-1)) #define _mm512_mask_fpclass_pd_mask(U, A, imm) \ - (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ - (__mmask8)(U)) + ((__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ + (__mmask8)(U))) #define _mm512_fpclass_pd_mask(A, imm) \ - (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ - (__mmask8)-1) + ((__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ + (__mmask8)-1)) #define _mm_fpclass_sd_mask(A, imm) \ - (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ - (__mmask8)-1) + ((__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ + (__mmask8)-1)) #define _mm_mask_fpclass_sd_mask(U, A, imm) \ - (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ - (__mmask8)(U)) + ((__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ + (__mmask8)(U))) #define _mm_fpclass_ss_mask(A, imm) \ - (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ - (__mmask8)-1) + ((__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ + (__mmask8)-1)) #define _mm_mask_fpclass_ss_mask(U, A, imm) \ - (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ - (__mmask8)(U)) + ((__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ + (__mmask8)(U))) #undef __DEFAULT_FN_ATTRS512 #undef __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/avx512erintrin.h b/clang/lib/Headers/avx512erintrin.h index 8570061699068..1c5a2d2d208ff 100644 --- a/clang/lib/Headers/avx512erintrin.h +++ b/clang/lib/Headers/avx512erintrin.h @@ -15,19 +15,19 @@ /* exp2a23 */ #define _mm512_exp2a23_round_pd(A, R) \ - (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)) + ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_exp2a23_round_pd(S, M, A, R) \ - (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(S), (__mmask8)(M), \ - (int)(R)) + ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(S), (__mmask8)(M), \ + (int)(R))) #define _mm512_maskz_exp2a23_round_pd(M, A, R) \ - (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(M), (int)(R)) + ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)(M), (int)(R))) #define _mm512_exp2a23_pd(A) \ _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION) @@ -39,19 +39,19 @@ _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) #define _mm512_exp2a23_round_ps(A, R) \ - (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)) + ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)-1, (int)(R))) #define _mm512_mask_exp2a23_round_ps(S, M, A, R) \ - (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(S), (__mmask16)(M), \ - (int)(R)) + ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(S), (__mmask16)(M), \ + (int)(R))) #define _mm512_maskz_exp2a23_round_ps(M, A, R) \ - (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(M), (int)(R)) + ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)(M), (int)(R))) #define _mm512_exp2a23_ps(A) \ _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION) @@ -64,19 +64,19 @@ /* rsqrt28 */ #define _mm512_rsqrt28_round_pd(A, R) \ - (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)) + ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \ - (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(S), (__mmask8)(M), \ - (int)(R)) + ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(S), (__mmask8)(M), \ + (int)(R))) #define _mm512_maskz_rsqrt28_round_pd(M, A, R) \ - (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(M), (int)(R)) + ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)(M), (int)(R))) #define _mm512_rsqrt28_pd(A) \ _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION) @@ -88,19 +88,19 @@ _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) #define _mm512_rsqrt28_round_ps(A, R) \ - (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)) + ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)-1, (int)(R))) #define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \ - (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(S), (__mmask16)(M), \ - (int)(R)) + ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(S), (__mmask16)(M), \ + (int)(R))) #define _mm512_maskz_rsqrt28_round_ps(M, A, R) \ - (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(M), (int)(R)) + ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)(M), (int)(R))) #define _mm512_rsqrt28_ps(A) \ _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION) @@ -112,22 +112,22 @@ _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) #define _mm_rsqrt28_round_ss(A, B, R) \ - (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)) + ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)-1, (int)(R))) #define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \ - (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)(__m128)(S), \ - (__mmask8)(M), (int)(R)) + ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(S), \ + (__mmask8)(M), (int)(R))) #define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \ - (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(M), (int)(R)) + ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(M), (int)(R))) #define _mm_rsqrt28_ss(A, B) \ _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION) @@ -139,22 +139,22 @@ _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION) #define _mm_rsqrt28_round_sd(A, B, R) \ - (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)) + ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)-1, (int)(R))) #define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \ - (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)(__m128d)(S), \ - (__mmask8)(M), (int)(R)) + ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)(__m128d)(S), \ + (__mmask8)(M), (int)(R))) #define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \ - (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(M), (int)(R)) + ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(M), (int)(R))) #define _mm_rsqrt28_sd(A, B) \ _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION) @@ -167,19 +167,19 @@ /* rcp28 */ #define _mm512_rcp28_round_pd(A, R) \ - (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R)) + ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_rcp28_round_pd(S, M, A, R) \ - (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(S), (__mmask8)(M), \ - (int)(R)) + ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(S), (__mmask8)(M), \ + (int)(R))) #define _mm512_maskz_rcp28_round_pd(M, A, R) \ - (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(M), (int)(R)) + ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)(M), (int)(R))) #define _mm512_rcp28_pd(A) \ _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION) @@ -191,19 +191,19 @@ _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) #define _mm512_rcp28_round_ps(A, R) \ - (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)) + ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)-1, (int)(R))) #define _mm512_mask_rcp28_round_ps(S, M, A, R) \ - (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(S), (__mmask16)(M), \ - (int)(R)) + ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(S), (__mmask16)(M), \ + (int)(R))) #define _mm512_maskz_rcp28_round_ps(M, A, R) \ - (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(M), (int)(R)) + ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)(M), (int)(R))) #define _mm512_rcp28_ps(A) \ _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION) @@ -215,22 +215,22 @@ _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) #define _mm_rcp28_round_ss(A, B, R) \ - (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)) + ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)-1, (int)(R))) #define _mm_mask_rcp28_round_ss(S, M, A, B, R) \ - (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)(__m128)(S), \ - (__mmask8)(M), (int)(R)) + ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(S), \ + (__mmask8)(M), (int)(R))) #define _mm_maskz_rcp28_round_ss(M, A, B, R) \ - (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(M), (int)(R)) + ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(M), (int)(R))) #define _mm_rcp28_ss(A, B) \ _mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION) @@ -242,22 +242,22 @@ _mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION) #define _mm_rcp28_round_sd(A, B, R) \ - (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)) + ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)-1, (int)(R))) #define _mm_mask_rcp28_round_sd(S, M, A, B, R) \ - (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)(__m128d)(S), \ - (__mmask8)(M), (int)(R)) + ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)(__m128d)(S), \ + (__mmask8)(M), (int)(R))) #define _mm_maskz_rcp28_round_sd(M, A, B, R) \ - (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(M), (int)(R)) + ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(M), (int)(R))) #define _mm_rcp28_sd(A, B) \ _mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION) diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 010bcadab0195..df298640523b7 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -937,18 +937,18 @@ _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) } #define _mm512_max_round_pd(A, B, R) \ - (__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), (int)(R)) + ((__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), (int)(R))) #define _mm512_mask_max_round_pd(W, U, A, B, R) \ - (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_max_round_pd((A), (B), (R)), \ - (__v8df)(W)) + (__v8df)(W))) #define _mm512_maskz_max_round_pd(U, A, B, R) \ - (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_max_round_pd((A), (B), (R)), \ - (__v8df)_mm512_setzero_pd()) + (__v8df)_mm512_setzero_pd())) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_max_pd(__m512d __A, __m512d __B) @@ -974,18 +974,18 @@ _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) } #define _mm512_max_round_ps(A, B, R) \ - (__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), (int)(R)) + ((__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), (int)(R))) #define _mm512_mask_max_round_ps(W, U, A, B, R) \ - (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_max_round_ps((A), (B), (R)), \ - (__v16sf)(W)) + (__v16sf)(W))) #define _mm512_maskz_max_round_ps(U, A, B, R) \ - (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_max_round_ps((A), (B), (R)), \ - (__v16sf)_mm512_setzero_ps()) + (__v16sf)_mm512_setzero_ps())) static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_max_ps(__m512 __A, __m512 __B) @@ -1029,22 +1029,22 @@ _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) { } #define _mm_max_round_ss(A, B, R) \ - (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)) + ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)-1, (int)(R))) #define _mm_mask_max_round_ss(W, U, A, B, R) \ - (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)(__m128)(W), (__mmask8)(U), \ - (int)(R)) + ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(W), (__mmask8)(U), \ + (int)(R))) #define _mm_maskz_max_round_ss(U, A, B, R) \ - (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)) + ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { @@ -1065,22 +1065,22 @@ _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) { } #define _mm_max_round_sd(A, B, R) \ - (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)) + ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)-1, (int)(R))) #define _mm_mask_max_round_sd(W, U, A, B, R) \ - (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)) + ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)(__m128d)(W), \ + (__mmask8)(U), (int)(R))) #define _mm_maskz_max_round_sd(U, A, B, R) \ - (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)) + ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(U), (int)(R))) static __inline __m512i __DEFAULT_FN_ATTRS512 @@ -1172,18 +1172,18 @@ _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B) } #define _mm512_min_round_pd(A, B, R) \ - (__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), (int)(R)) + ((__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), (int)(R))) #define _mm512_mask_min_round_pd(W, U, A, B, R) \ - (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_min_round_pd((A), (B), (R)), \ - (__v8df)(W)) + (__v8df)(W))) #define _mm512_maskz_min_round_pd(U, A, B, R) \ - (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_min_round_pd((A), (B), (R)), \ - (__v8df)_mm512_setzero_pd()) + (__v8df)_mm512_setzero_pd())) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_min_pd(__m512d __A, __m512d __B) @@ -1209,18 +1209,18 @@ _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) } #define _mm512_min_round_ps(A, B, R) \ - (__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), (int)(R)) + ((__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), (int)(R))) #define _mm512_mask_min_round_ps(W, U, A, B, R) \ - (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_min_round_ps((A), (B), (R)), \ - (__v16sf)(W)) + (__v16sf)(W))) #define _mm512_maskz_min_round_ps(U, A, B, R) \ - (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_min_round_ps((A), (B), (R)), \ - (__v16sf)_mm512_setzero_ps()) + (__v16sf)_mm512_setzero_ps())) static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_min_ps(__m512 __A, __m512 __B) @@ -1264,22 +1264,22 @@ _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) { } #define _mm_min_round_ss(A, B, R) \ - (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)) + ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)-1, (int)(R))) #define _mm_mask_min_round_ss(W, U, A, B, R) \ - (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)(__m128)(W), (__mmask8)(U), \ - (int)(R)) + ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(W), (__mmask8)(U), \ + (int)(R))) #define _mm_maskz_min_round_ss(U, A, B, R) \ - (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)) + ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { @@ -1300,22 +1300,22 @@ _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) { } #define _mm_min_round_sd(A, B, R) \ - (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)) + ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)-1, (int)(R))) #define _mm_mask_min_round_sd(W, U, A, B, R) \ - (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)) + ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)(__m128d)(W), \ + (__mmask8)(U), (int)(R))) #define _mm_maskz_min_round_sd(U, A, B, R) \ - (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)) + ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(U), (int)(R))) static __inline __m512i __DEFAULT_FN_ATTRS512 @@ -1485,17 +1485,17 @@ _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { } #define _mm512_sqrt_round_pd(A, R) \ - (__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R)) + ((__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R))) #define _mm512_mask_sqrt_round_pd(W, U, A, R) \ - (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_sqrt_round_pd((A), (R)), \ - (__v8df)(__m512d)(W)) + (__v8df)(__m512d)(W))) #define _mm512_maskz_sqrt_round_pd(U, A, R) \ - (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_sqrt_round_pd((A), (R)), \ - (__v8df)_mm512_setzero_pd()) + (__v8df)_mm512_setzero_pd())) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A) @@ -1521,17 +1521,17 @@ _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) } #define _mm512_sqrt_round_ps(A, R) \ - (__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R)) + ((__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R))) #define _mm512_mask_sqrt_round_ps(W, U, A, R) \ - (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ - (__v16sf)(__m512)(W)) + (__v16sf)(__m512)(W))) #define _mm512_maskz_sqrt_round_ps(U, A, R) \ - (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ - (__v16sf)_mm512_setzero_ps()) + (__v16sf)_mm512_setzero_ps())) static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A) @@ -1900,22 +1900,22 @@ _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) { } #define _mm_add_round_ss(A, B, R) \ - (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)) + ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)-1, (int)(R))) #define _mm_mask_add_round_ss(W, U, A, B, R) \ - (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)(__m128)(W), (__mmask8)(U), \ - (int)(R)) + ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(W), (__mmask8)(U), \ + (int)(R))) #define _mm_maskz_add_round_ss(U, A, B, R) \ - (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)) + ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { @@ -1929,22 +1929,22 @@ _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) { return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd()); } #define _mm_add_round_sd(A, B, R) \ - (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)) + ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)-1, (int)(R))) #define _mm_mask_add_round_sd(W, U, A, B, R) \ - (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)) + ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)(__m128d)(W), \ + (__mmask8)(U), (int)(R))) #define _mm_maskz_add_round_sd(U, A, B, R) \ - (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)) + ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { @@ -1975,32 +1975,32 @@ _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) { } #define _mm512_add_round_pd(A, B, R) \ - (__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), (int)(R)) + ((__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), (int)(R))) #define _mm512_mask_add_round_pd(W, U, A, B, R) \ - (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_add_round_pd((A), (B), (R)), \ - (__v8df)(__m512d)(W)) + (__v8df)(__m512d)(W))) #define _mm512_maskz_add_round_pd(U, A, B, R) \ - (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_add_round_pd((A), (B), (R)), \ - (__v8df)_mm512_setzero_pd()) + (__v8df)_mm512_setzero_pd())) #define _mm512_add_round_ps(A, B, R) \ - (__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), (int)(R)) + ((__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), (int)(R))) #define _mm512_mask_add_round_ps(W, U, A, B, R) \ - (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_add_round_ps((A), (B), (R)), \ - (__v16sf)(__m512)(W)) + (__v16sf)(__m512)(W))) #define _mm512_maskz_add_round_ps(U, A, B, R) \ - (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_add_round_ps((A), (B), (R)), \ - (__v16sf)_mm512_setzero_ps()) + (__v16sf)_mm512_setzero_ps())) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { @@ -2014,22 +2014,22 @@ _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) { return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); } #define _mm_sub_round_ss(A, B, R) \ - (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)) + ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)-1, (int)(R))) #define _mm_mask_sub_round_ss(W, U, A, B, R) \ - (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)(__m128)(W), (__mmask8)(U), \ - (int)(R)) + ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(W), (__mmask8)(U), \ + (int)(R))) #define _mm_maskz_sub_round_ss(U, A, B, R) \ - (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)) + ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { @@ -2044,22 +2044,22 @@ _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) { } #define _mm_sub_round_sd(A, B, R) \ - (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)) + ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)-1, (int)(R))) #define _mm_mask_sub_round_sd(W, U, A, B, R) \ - (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)) + ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)(__m128d)(W), \ + (__mmask8)(U), (int)(R))) #define _mm_maskz_sub_round_sd(U, A, B, R) \ - (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)) + ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { @@ -2090,32 +2090,32 @@ _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) { } #define _mm512_sub_round_pd(A, B, R) \ - (__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), (int)(R)) + ((__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), (int)(R))) #define _mm512_mask_sub_round_pd(W, U, A, B, R) \ - (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_sub_round_pd((A), (B), (R)), \ - (__v8df)(__m512d)(W)) + (__v8df)(__m512d)(W))) #define _mm512_maskz_sub_round_pd(U, A, B, R) \ - (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_sub_round_pd((A), (B), (R)), \ - (__v8df)_mm512_setzero_pd()) + (__v8df)_mm512_setzero_pd())) #define _mm512_sub_round_ps(A, B, R) \ - (__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), (int)(R)) + ((__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), (int)(R))) #define _mm512_mask_sub_round_ps(W, U, A, B, R) \ - (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \ - (__v16sf)(__m512)(W)) + (__v16sf)(__m512)(W))) #define _mm512_maskz_sub_round_ps(U, A, B, R) \ - (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \ - (__v16sf)_mm512_setzero_ps()) + (__v16sf)_mm512_setzero_ps())) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { @@ -2129,22 +2129,22 @@ _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) { return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); } #define _mm_mul_round_ss(A, B, R) \ - (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)) + ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)-1, (int)(R))) #define _mm_mask_mul_round_ss(W, U, A, B, R) \ - (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)(__m128)(W), (__mmask8)(U), \ - (int)(R)) + ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(W), (__mmask8)(U), \ + (int)(R))) #define _mm_maskz_mul_round_ss(U, A, B, R) \ - (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)) + ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { @@ -2159,22 +2159,22 @@ _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) { } #define _mm_mul_round_sd(A, B, R) \ - (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)) + ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)-1, (int)(R))) #define _mm_mask_mul_round_sd(W, U, A, B, R) \ - (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)) + ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)(__m128d)(W), \ + (__mmask8)(U), (int)(R))) #define _mm_maskz_mul_round_sd(U, A, B, R) \ - (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)) + ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { @@ -2205,32 +2205,32 @@ _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) { } #define _mm512_mul_round_pd(A, B, R) \ - (__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), (int)(R)) + ((__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), (int)(R))) #define _mm512_mask_mul_round_pd(W, U, A, B, R) \ - (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_mul_round_pd((A), (B), (R)), \ - (__v8df)(__m512d)(W)) + (__v8df)(__m512d)(W))) #define _mm512_maskz_mul_round_pd(U, A, B, R) \ - (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_mul_round_pd((A), (B), (R)), \ - (__v8df)_mm512_setzero_pd()) + (__v8df)_mm512_setzero_pd())) #define _mm512_mul_round_ps(A, B, R) \ - (__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), (int)(R)) + ((__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), (int)(R))) #define _mm512_mask_mul_round_ps(W, U, A, B, R) \ - (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \ - (__v16sf)(__m512)(W)) + (__v16sf)(__m512)(W))) #define _mm512_maskz_mul_round_ps(U, A, B, R) \ - (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \ - (__v16sf)_mm512_setzero_ps()) + (__v16sf)_mm512_setzero_ps())) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { @@ -2245,22 +2245,22 @@ _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) { } #define _mm_div_round_ss(A, B, R) \ - (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)) + ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)-1, (int)(R))) #define _mm_mask_div_round_ss(W, U, A, B, R) \ - (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)(__m128)(W), (__mmask8)(U), \ - (int)(R)) + ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(W), (__mmask8)(U), \ + (int)(R))) #define _mm_maskz_div_round_ss(U, A, B, R) \ - (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)) + ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { @@ -2275,22 +2275,22 @@ _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) { } #define _mm_div_round_sd(A, B, R) \ - (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)) + ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)-1, (int)(R))) #define _mm_mask_div_round_sd(W, U, A, B, R) \ - (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)) + ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)(__m128d)(W), \ + (__mmask8)(U), (int)(R))) #define _mm_maskz_div_round_sd(U, A, B, R) \ - (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)) + ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(U), (int)(R))) static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_div_pd(__m512d __a, __m512d __b) @@ -2333,179 +2333,179 @@ _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) { } #define _mm512_div_round_pd(A, B, R) \ - (__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), (int)(R)) + ((__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), (int)(R))) #define _mm512_mask_div_round_pd(W, U, A, B, R) \ - (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_div_round_pd((A), (B), (R)), \ - (__v8df)(__m512d)(W)) + (__v8df)(__m512d)(W))) #define _mm512_maskz_div_round_pd(U, A, B, R) \ - (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_div_round_pd((A), (B), (R)), \ - (__v8df)_mm512_setzero_pd()) + (__v8df)_mm512_setzero_pd())) #define _mm512_div_round_ps(A, B, R) \ - (__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), (int)(R)) + ((__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), (int)(R))) #define _mm512_mask_div_round_ps(W, U, A, B, R) \ - (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_div_round_ps((A), (B), (R)), \ - (__v16sf)(__m512)(W)) + (__v16sf)(__m512)(W))) #define _mm512_maskz_div_round_ps(U, A, B, R) \ - (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_div_round_ps((A), (B), (R)), \ - (__v16sf)_mm512_setzero_ps()) + (__v16sf)_mm512_setzero_ps())) #define _mm512_roundscale_ps(A, B) \ - (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \ - (__v16sf)_mm512_undefined_ps(), \ - (__mmask16)-1, \ - _MM_FROUND_CUR_DIRECTION) + ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \ + (__v16sf)_mm512_undefined_ps(), \ + (__mmask16)-1, \ + _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_roundscale_ps(A, B, C, imm) \ - (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ + ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ (__v16sf)(__m512)(A), (__mmask16)(B), \ - _MM_FROUND_CUR_DIRECTION) + _MM_FROUND_CUR_DIRECTION)) #define _mm512_maskz_roundscale_ps(A, B, imm) \ - (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(A), \ - _MM_FROUND_CUR_DIRECTION) + ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)(A), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \ - (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ + ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ (__v16sf)(__m512)(A), (__mmask16)(B), \ - (int)(R)) + (int)(R))) #define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \ - (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(A), (int)(R)) + ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)(A), (int)(R))) #define _mm512_roundscale_round_ps(A, imm, R) \ - (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \ - (__v16sf)_mm512_undefined_ps(), \ - (__mmask16)-1, (int)(R)) + ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \ + (__v16sf)_mm512_undefined_ps(), \ + (__mmask16)-1, (int)(R))) #define _mm512_roundscale_pd(A, B) \ - (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \ - (__v8df)_mm512_undefined_pd(), \ - (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION) + ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \ + (__v8df)_mm512_undefined_pd(), \ + (__mmask8)-1, \ + _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_roundscale_pd(A, B, C, imm) \ - (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ + ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ (__v8df)(__m512d)(A), (__mmask8)(B), \ - _MM_FROUND_CUR_DIRECTION) + _MM_FROUND_CUR_DIRECTION)) #define _mm512_maskz_roundscale_pd(A, B, imm) \ - (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(A), \ - _MM_FROUND_CUR_DIRECTION) + ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)(A), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \ - (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ + ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ (__v8df)(__m512d)(A), (__mmask8)(B), \ - (int)(R)) + (int)(R))) #define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \ - (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(A), (int)(R)) + ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)(A), (int)(R))) #define _mm512_roundscale_round_pd(A, imm, R) \ - (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \ - (__v8df)_mm512_undefined_pd(), \ - (__mmask8)-1, (int)(R)) + ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \ + (__v8df)_mm512_undefined_pd(), \ + (__mmask8)-1, (int)(R))) #define _mm512_fmadd_round_pd(A, B, C, R) \ - (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(C), \ - (__mmask8)-1, (int)(R)) + ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (__v8df)(__m512d)(C), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \ - (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (__v8df)(__m512d)(C), \ + (__mmask8)(U), (int)(R))) #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \ - (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (__v8df)(__m512d)(C), \ + (__mmask8)(U), (int)(R))) #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \ - (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (__v8df)(__m512d)(C), \ + (__mmask8)(U), (int)(R))) #define _mm512_fmsub_round_pd(A, B, C, R) \ - (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - -(__v8df)(__m512d)(C), \ - (__mmask8)-1, (int)(R)) + ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + -(__v8df)(__m512d)(C), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \ - (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - -(__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + -(__v8df)(__m512d)(C), \ + (__mmask8)(U), (int)(R))) #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \ - (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - -(__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + -(__v8df)(__m512d)(C), \ + (__mmask8)(U), (int)(R))) #define _mm512_fnmadd_round_pd(A, B, C, R) \ - (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(C), \ - (__mmask8)-1, (int)(R)) + ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (__v8df)(__m512d)(C), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \ - (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (__v8df)(__m512d)(C), \ + (__mmask8)(U), (int)(R))) #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \ - (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (__v8df)(__m512d)(C), \ + (__mmask8)(U), (int)(R))) #define _mm512_fnmsub_round_pd(A, B, C, R) \ - (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - -(__v8df)(__m512d)(C), \ - (__mmask8)-1, (int)(R)) + ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + -(__v8df)(__m512d)(C), \ + (__mmask8)-1, (int)(R))) #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \ - (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - -(__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + -(__v8df)(__m512d)(C), \ + (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 @@ -2629,87 +2629,87 @@ _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) } #define _mm512_fmadd_round_ps(A, B, C, R) \ - (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(C), \ - (__mmask16)-1, (int)(R)) + ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + (__v16sf)(__m512)(C), \ + (__mmask16)-1, (int)(R))) #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \ - (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + (__v16sf)(__m512)(C), \ + (__mmask16)(U), (int)(R))) #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \ - (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + (__v16sf)(__m512)(C), \ + (__mmask16)(U), (int)(R))) #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \ - (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + (__v16sf)(__m512)(C), \ + (__mmask16)(U), (int)(R))) #define _mm512_fmsub_round_ps(A, B, C, R) \ - (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - -(__v16sf)(__m512)(C), \ - (__mmask16)-1, (int)(R)) + ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + -(__v16sf)(__m512)(C), \ + (__mmask16)-1, (int)(R))) #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \ - (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - -(__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + -(__v16sf)(__m512)(C), \ + (__mmask16)(U), (int)(R))) #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \ - (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - -(__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + -(__v16sf)(__m512)(C), \ + (__mmask16)(U), (int)(R))) #define _mm512_fnmadd_round_ps(A, B, C, R) \ - (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ - -(__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(C), \ - (__mmask16)-1, (int)(R)) + ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ + -(__v16sf)(__m512)(B), \ + (__v16sf)(__m512)(C), \ + (__mmask16)-1, (int)(R))) #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \ - (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + (__v16sf)(__m512)(C), \ + (__mmask16)(U), (int)(R))) #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \ - (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + (__v16sf)(__m512)(C), \ + (__mmask16)(U), (int)(R))) #define _mm512_fnmsub_round_ps(A, B, C, R) \ - (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ - -(__v16sf)(__m512)(B), \ - -(__v16sf)(__m512)(C), \ - (__mmask16)-1, (int)(R)) + ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ + -(__v16sf)(__m512)(B), \ + -(__v16sf)(__m512)(C), \ + (__mmask16)-1, (int)(R))) #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \ - (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - -(__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + -(__v16sf)(__m512)(C), \ + (__mmask16)(U), (int)(R))) static __inline__ __m512 __DEFAULT_FN_ATTRS512 @@ -2833,52 +2833,52 @@ _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) } #define _mm512_fmaddsub_round_pd(A, B, C, R) \ - (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(C), \ - (__mmask8)-1, (int)(R)) + ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (__v8df)(__m512d)(C), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \ - (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (__v8df)(__m512d)(C), \ + (__mmask8)(U), (int)(R))) #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \ - (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (__v8df)(__m512d)(C), \ + (__mmask8)(U), (int)(R))) #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \ - (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (__v8df)(__m512d)(C), \ + (__mmask8)(U), (int)(R))) #define _mm512_fmsubadd_round_pd(A, B, C, R) \ - (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - -(__v8df)(__m512d)(C), \ - (__mmask8)-1, (int)(R)) + ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + -(__v8df)(__m512d)(C), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \ - (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - -(__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + -(__v8df)(__m512d)(C), \ + (__mmask8)(U), (int)(R))) #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \ - (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - -(__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + -(__v8df)(__m512d)(C), \ + (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 @@ -2952,52 +2952,52 @@ _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) } #define _mm512_fmaddsub_round_ps(A, B, C, R) \ - (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(C), \ - (__mmask16)-1, (int)(R)) + ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + (__v16sf)(__m512)(C), \ + (__mmask16)-1, (int)(R))) #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \ - (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + (__v16sf)(__m512)(C), \ + (__mmask16)(U), (int)(R))) #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \ - (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + (__v16sf)(__m512)(C), \ + (__mmask16)(U), (int)(R))) #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \ - (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + (__v16sf)(__m512)(C), \ + (__mmask16)(U), (int)(R))) #define _mm512_fmsubadd_round_ps(A, B, C, R) \ - (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - -(__v16sf)(__m512)(C), \ - (__mmask16)-1, (int)(R)) + ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + -(__v16sf)(__m512)(C), \ + (__mmask16)-1, (int)(R))) #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \ - (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - -(__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + -(__v16sf)(__m512)(C), \ + (__mmask16)(U), (int)(R))) #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \ - (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - -(__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + -(__v16sf)(__m512)(C), \ + (__mmask16)(U), (int)(R))) static __inline__ __m512 __DEFAULT_FN_ATTRS512 @@ -3071,10 +3071,10 @@ _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) } #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \ - (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (__v8df)(__m512d)(C), \ + (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 @@ -3088,10 +3088,10 @@ _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) } #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \ - (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + (__v16sf)(__m512)(C), \ + (__mmask16)(U), (int)(R))) static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) @@ -3104,10 +3104,10 @@ _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) } #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \ - (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (__v8df)(__m512d)(C), \ + (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 @@ -3121,10 +3121,10 @@ _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) } #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \ - (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + (__v16sf)(__m512)(C), \ + (__mmask16)(U), (int)(R))) static __inline__ __m512 __DEFAULT_FN_ATTRS512 @@ -3138,10 +3138,10 @@ _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) } #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \ - (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ - -(__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ + -(__v8df)(__m512d)(B), \ + (__v8df)(__m512d)(C), \ + (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 @@ -3155,10 +3155,10 @@ _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) } #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \ - (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ - -(__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ + -(__v16sf)(__m512)(B), \ + (__v16sf)(__m512)(C), \ + (__mmask16)(U), (int)(R))) static __inline__ __m512 __DEFAULT_FN_ATTRS512 @@ -3172,17 +3172,17 @@ _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) } #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \ - (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ - -(__v8df)(__m512d)(B), \ - -(__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ + -(__v8df)(__m512d)(B), \ + -(__v8df)(__m512d)(C), \ + (__mmask8)(U), (int)(R))) #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \ - (__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(C), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (__v8df)(__m512d)(C), \ + (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 @@ -3206,17 +3206,17 @@ _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) } #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \ - (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ - -(__v16sf)(__m512)(B), \ - -(__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ + -(__v16sf)(__m512)(B), \ + -(__v16sf)(__m512)(C), \ + (__mmask16)(U), (int)(R))) #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \ - (__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(C), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + (__v16sf)(__m512)(C), \ + (__mmask16)(U), (int)(R))) static __inline__ __m512 __DEFAULT_FN_ATTRS512 @@ -3312,63 +3312,63 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, } #define _mm512_alignr_epi64(A, B, I) \ - (__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), (int)(I)) + ((__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), (int)(I))) #define _mm512_mask_alignr_epi64(W, U, A, B, imm) \ - (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ - (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ - (__v8di)(__m512i)(W)) + ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ + (__v8di)(__m512i)(W))) #define _mm512_maskz_alignr_epi64(U, A, B, imm) \ - (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ - (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ - (__v8di)_mm512_setzero_si512()) + ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ + (__v8di)_mm512_setzero_si512())) #define _mm512_alignr_epi32(A, B, I) \ - (__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), (int)(I)) + ((__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \ + (__v16si)(__m512i)(B), (int)(I))) #define _mm512_mask_alignr_epi32(W, U, A, B, imm) \ - (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ - (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ - (__v16si)(__m512i)(W)) + ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ + (__v16si)(__m512i)(W))) #define _mm512_maskz_alignr_epi32(U, A, B, imm) \ - (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ - (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ - (__v16si)_mm512_setzero_si512()) + ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ + (__v16si)_mm512_setzero_si512())) /* Vector Extract */ #define _mm512_extractf64x4_pd(A, I) \ - (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \ - (__v4df)_mm256_undefined_pd(), \ - (__mmask8)-1) + ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \ + (__v4df)_mm256_undefined_pd(), \ + (__mmask8)-1)) #define _mm512_mask_extractf64x4_pd(W, U, A, imm) \ - (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \ - (__v4df)(__m256d)(W), \ - (__mmask8)(U)) + ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \ + (__v4df)(__m256d)(W), \ + (__mmask8)(U))) #define _mm512_maskz_extractf64x4_pd(U, A, imm) \ - (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \ - (__v4df)_mm256_setzero_pd(), \ - (__mmask8)(U)) + ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \ + (__v4df)_mm256_setzero_pd(), \ + (__mmask8)(U))) #define _mm512_extractf32x4_ps(A, I) \ - (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \ - (__v4sf)_mm_undefined_ps(), \ - (__mmask8)-1) + ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \ + (__v4sf)_mm_undefined_ps(), \ + (__mmask8)-1)) #define _mm512_mask_extractf32x4_ps(W, U, A, imm) \ - (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \ - (__v4sf)(__m128)(W), \ - (__mmask8)(U)) + ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \ + (__v4sf)(__m128)(W), \ + (__mmask8)(U))) #define _mm512_maskz_extractf32x4_ps(U, A, imm) \ - (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U)) + ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U))) /* Vector Blend */ @@ -3407,14 +3407,14 @@ _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) /* Compare */ #define _mm512_cmp_round_ps_mask(A, B, P, R) \ - (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), (int)(P), \ - (__mmask16)-1, (int)(R)) + ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), (int)(P), \ + (__mmask16)-1, (int)(R))) #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \ - (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), (int)(P), \ - (__mmask16)(U), (int)(R)) + ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), (int)(P), \ + (__mmask16)(U), (int)(R))) #define _mm512_cmp_ps_mask(A, B, P) \ _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) @@ -3462,14 +3462,14 @@ _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q) #define _mm512_cmp_round_pd_mask(A, B, P, R) \ - (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), (int)(P), \ - (__mmask8)-1, (int)(R)) + ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), (int)(P), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \ - (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), (int)(P), \ - (__mmask8)(U), (int)(R)) + ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), (int)(P), \ + (__mmask8)(U), (int)(R))) #define _mm512_cmp_pd_mask(A, B, P) \ _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) @@ -3519,19 +3519,19 @@ _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) /* Conversion */ #define _mm512_cvtt_roundps_epu32(A, R) \ - (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ - (__v16si)_mm512_undefined_epi32(), \ - (__mmask16)-1, (int)(R)) + ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ + (__v16si)_mm512_undefined_epi32(), \ + (__mmask16)-1, (int)(R))) #define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \ - (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ - (__v16si)(__m512i)(W), \ - (__mmask16)(U), (int)(R)) + ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ + (__v16si)(__m512i)(W), \ + (__mmask16)(U), (int)(R))) #define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \ - (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ - (__v16si)_mm512_setzero_si512(), \ - (__mmask16)(U), (int)(R)) + ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ + (__v16si)_mm512_setzero_si512(), \ + (__mmask16)(U), (int)(R))) static __inline __m512i __DEFAULT_FN_ATTRS512 @@ -3563,34 +3563,34 @@ _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A) } #define _mm512_cvt_roundepi32_ps(A, R) \ - (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)) + ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)-1, (int)(R))) #define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \ - (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ - (__v16sf)(__m512)(W), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ + (__v16sf)(__m512)(W), \ + (__mmask16)(U), (int)(R))) #define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \ - (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)(U), (int)(R))) #define _mm512_cvt_roundepu32_ps(A, R) \ - (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R)) + ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)-1, (int)(R))) #define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \ - (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ - (__v16sf)(__m512)(W), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ + (__v16sf)(__m512)(W), \ + (__mmask16)(U), (int)(R))) #define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \ - (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)(U), (int)(R))) static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_ps (__m512i __A) @@ -3705,19 +3705,19 @@ _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A) } #define _mm512_cvt_roundpd_ps(A, R) \ - (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ - (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)-1, (int)(R)) + ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ + (__v8sf)_mm256_setzero_ps(), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \ - (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ - (__v8sf)(__m256)(W), (__mmask8)(U), \ - (int)(R)) + ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ + (__v8sf)(__m256)(W), (__mmask8)(U), \ + (int)(R))) #define _mm512_maskz_cvt_roundpd_ps(U, A, R) \ - (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ - (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U), (int)(R)) + ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ + (__v8sf)_mm256_setzero_ps(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ps (__m512d __A) @@ -3765,38 +3765,38 @@ _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A) } #define _mm512_cvt_roundps_ph(A, I) \ - (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ - (__v16hi)_mm256_undefined_si256(), \ - (__mmask16)-1) + ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ + (__v16hi)_mm256_undefined_si256(), \ + (__mmask16)-1)) #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \ - (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ - (__v16hi)(__m256i)(U), \ - (__mmask16)(W)) + ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ + (__v16hi)(__m256i)(U), \ + (__mmask16)(W))) #define _mm512_maskz_cvt_roundps_ph(W, A, I) \ - (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ - (__v16hi)_mm256_setzero_si256(), \ - (__mmask16)(W)) + ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ + (__v16hi)_mm256_setzero_si256(), \ + (__mmask16)(W))) #define _mm512_cvtps_ph _mm512_cvt_roundps_ph #define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph #define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph #define _mm512_cvt_roundph_ps(A, R) \ - (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ - (__v16sf)_mm512_undefined_ps(), \ - (__mmask16)-1, (int)(R)) + ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ + (__v16sf)_mm512_undefined_ps(), \ + (__mmask16)-1, (int)(R))) #define _mm512_mask_cvt_roundph_ps(W, U, A, R) \ - (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ - (__v16sf)(__m512)(W), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ + (__v16sf)(__m512)(W), \ + (__mmask16)(U), (int)(R))) #define _mm512_maskz_cvt_roundph_ps(U, A, R) \ - (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)(U), (int)(R))) static __inline __m512 __DEFAULT_FN_ATTRS512 @@ -3828,19 +3828,19 @@ _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A) } #define _mm512_cvtt_roundpd_epi32(A, R) \ - (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ - (__v8si)_mm256_setzero_si256(), \ - (__mmask8)-1, (int)(R)) + ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ + (__v8si)_mm256_setzero_si256(), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \ - (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ - (__v8si)(__m256i)(W), \ - (__mmask8)(U), (int)(R)) + ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ + (__v8si)(__m256i)(W), \ + (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \ - (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ - (__v8si)_mm256_setzero_si256(), \ - (__mmask8)(U), (int)(R)) + ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ + (__v8si)_mm256_setzero_si256(), \ + (__mmask8)(U), (int)(R))) static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epi32(__m512d __a) @@ -3870,19 +3870,19 @@ _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A) } #define _mm512_cvtt_roundps_epi32(A, R) \ - (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ - (__v16si)_mm512_setzero_si512(), \ - (__mmask16)-1, (int)(R)) + ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ + (__v16si)_mm512_setzero_si512(), \ + (__mmask16)-1, (int)(R))) #define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \ - (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ - (__v16si)(__m512i)(W), \ - (__mmask16)(U), (int)(R)) + ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ + (__v16si)(__m512i)(W), \ + (__mmask16)(U), (int)(R))) #define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \ - (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ - (__v16si)_mm512_setzero_si512(), \ - (__mmask16)(U), (int)(R)) + ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ + (__v16si)_mm512_setzero_si512(), \ + (__mmask16)(U), (int)(R))) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epi32(__m512 __a) @@ -3912,19 +3912,19 @@ _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A) } #define _mm512_cvt_roundps_epi32(A, R) \ - (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ - (__v16si)_mm512_setzero_si512(), \ - (__mmask16)-1, (int)(R)) + ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ + (__v16si)_mm512_setzero_si512(), \ + (__mmask16)-1, (int)(R))) #define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \ - (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ - (__v16si)(__m512i)(W), \ - (__mmask16)(U), (int)(R)) + ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ + (__v16si)(__m512i)(W), \ + (__mmask16)(U), (int)(R))) #define _mm512_maskz_cvt_roundps_epi32(U, A, R) \ - (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ - (__v16si)_mm512_setzero_si512(), \ - (__mmask16)(U), (int)(R)) + ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ + (__v16si)_mm512_setzero_si512(), \ + (__mmask16)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epi32 (__m512 __A) @@ -3955,19 +3955,19 @@ _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A) } #define _mm512_cvt_roundpd_epi32(A, R) \ - (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ - (__v8si)_mm256_setzero_si256(), \ - (__mmask8)-1, (int)(R)) + ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ + (__v8si)_mm256_setzero_si256(), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \ - (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ - (__v8si)(__m256i)(W), \ - (__mmask8)(U), (int)(R)) + ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ + (__v8si)(__m256i)(W), \ + (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \ - (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ - (__v8si)_mm256_setzero_si256(), \ - (__mmask8)(U), (int)(R)) + ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ + (__v8si)_mm256_setzero_si256(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epi32 (__m512d __A) @@ -3999,19 +3999,19 @@ _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A) } #define _mm512_cvt_roundps_epu32(A, R) \ - (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ - (__v16si)_mm512_setzero_si512(), \ - (__mmask16)-1, (int)(R)) + ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ + (__v16si)_mm512_setzero_si512(), \ + (__mmask16)-1, (int)(R))) #define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \ - (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ - (__v16si)(__m512i)(W), \ - (__mmask16)(U), (int)(R)) + ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ + (__v16si)(__m512i)(W), \ + (__mmask16)(U), (int)(R))) #define _mm512_maskz_cvt_roundps_epu32(U, A, R) \ - (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ - (__v16si)_mm512_setzero_si512(), \ - (__mmask16)(U), (int)(R)) + ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ + (__v16si)_mm512_setzero_si512(), \ + (__mmask16)(U), (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epu32 ( __m512 __A) @@ -4043,19 +4043,19 @@ _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A) } #define _mm512_cvt_roundpd_epu32(A, R) \ - (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ - (__v8si)_mm256_setzero_si256(), \ - (__mmask8)-1, (int)(R)) + ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ + (__v8si)_mm256_setzero_si256(), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \ - (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ - (__v8si)(__m256i)(W), \ - (__mmask8)(U), (int)(R)) + ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ + (__v8si)(__m256i)(W), \ + (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \ - (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ - (__v8si)_mm256_setzero_si256(), \ - (__mmask8)(U), (int)(R)) + ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ + (__v8si)_mm256_setzero_si256(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epu32 (__m512d __A) @@ -4975,70 +4975,70 @@ _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) #define _mm512_cmp_epi32_mask(a, b, p) \ - (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ - (__v16si)(__m512i)(b), (int)(p), \ - (__mmask16)-1) + ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ + (__v16si)(__m512i)(b), (int)(p), \ + (__mmask16)-1)) #define _mm512_cmp_epu32_mask(a, b, p) \ - (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ - (__v16si)(__m512i)(b), (int)(p), \ - (__mmask16)-1) + ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ + (__v16si)(__m512i)(b), (int)(p), \ + (__mmask16)-1)) #define _mm512_cmp_epi64_mask(a, b, p) \ - (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ - (__v8di)(__m512i)(b), (int)(p), \ - (__mmask8)-1) + ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ + (__v8di)(__m512i)(b), (int)(p), \ + (__mmask8)-1)) #define _mm512_cmp_epu64_mask(a, b, p) \ - (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ - (__v8di)(__m512i)(b), (int)(p), \ - (__mmask8)-1) + ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ + (__v8di)(__m512i)(b), (int)(p), \ + (__mmask8)-1)) #define _mm512_mask_cmp_epi32_mask(m, a, b, p) \ - (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ - (__v16si)(__m512i)(b), (int)(p), \ - (__mmask16)(m)) + ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ + (__v16si)(__m512i)(b), (int)(p), \ + (__mmask16)(m))) #define _mm512_mask_cmp_epu32_mask(m, a, b, p) \ - (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ - (__v16si)(__m512i)(b), (int)(p), \ - (__mmask16)(m)) + ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ + (__v16si)(__m512i)(b), (int)(p), \ + (__mmask16)(m))) #define _mm512_mask_cmp_epi64_mask(m, a, b, p) \ - (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ - (__v8di)(__m512i)(b), (int)(p), \ - (__mmask8)(m)) + ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ + (__v8di)(__m512i)(b), (int)(p), \ + (__mmask8)(m))) #define _mm512_mask_cmp_epu64_mask(m, a, b, p) \ - (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ - (__v8di)(__m512i)(b), (int)(p), \ - (__mmask8)(m)) + ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ + (__v8di)(__m512i)(b), (int)(p), \ + (__mmask8)(m))) #define _mm512_rol_epi32(a, b) \ - (__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b)) + ((__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b))) #define _mm512_mask_rol_epi32(W, U, a, b) \ - (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ - (__v16si)_mm512_rol_epi32((a), (b)), \ - (__v16si)(__m512i)(W)) + ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_rol_epi32((a), (b)), \ + (__v16si)(__m512i)(W))) #define _mm512_maskz_rol_epi32(U, a, b) \ - (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ - (__v16si)_mm512_rol_epi32((a), (b)), \ - (__v16si)_mm512_setzero_si512()) + ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_rol_epi32((a), (b)), \ + (__v16si)_mm512_setzero_si512())) #define _mm512_rol_epi64(a, b) \ - (__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b)) + ((__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b))) #define _mm512_mask_rol_epi64(W, U, a, b) \ - (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ - (__v8di)_mm512_rol_epi64((a), (b)), \ - (__v8di)(__m512i)(W)) + ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_rol_epi64((a), (b)), \ + (__v8di)(__m512i)(W))) #define _mm512_maskz_rol_epi64(U, a, b) \ - (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ - (__v8di)_mm512_rol_epi64((a), (b)), \ - (__v8di)_mm512_setzero_si512()) + ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_rol_epi64((a), (b)), \ + (__v8di)_mm512_setzero_si512())) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi32 (__m512i __A, __m512i __B) @@ -5085,30 +5085,30 @@ _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) } #define _mm512_ror_epi32(A, B) \ - (__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B)) + ((__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B))) #define _mm512_mask_ror_epi32(W, U, A, B) \ - (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ - (__v16si)_mm512_ror_epi32((A), (B)), \ - (__v16si)(__m512i)(W)) + ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_ror_epi32((A), (B)), \ + (__v16si)(__m512i)(W))) #define _mm512_maskz_ror_epi32(U, A, B) \ - (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ - (__v16si)_mm512_ror_epi32((A), (B)), \ - (__v16si)_mm512_setzero_si512()) + ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_ror_epi32((A), (B)), \ + (__v16si)_mm512_setzero_si512())) #define _mm512_ror_epi64(A, B) \ - (__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B)) + ((__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B))) #define _mm512_mask_ror_epi64(W, U, A, B) \ - (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ - (__v8di)_mm512_ror_epi64((A), (B)), \ - (__v8di)(__m512i)(W)) + ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_ror_epi64((A), (B)), \ + (__v8di)(__m512i)(W))) #define _mm512_maskz_ror_epi64(U, A, B) \ - (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ - (__v8di)_mm512_ror_epi64((A), (B)), \ - (__v8di)_mm512_setzero_si512()) + ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_ror_epi64((A), (B)), \ + (__v8di)_mm512_setzero_si512())) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_slli_epi32(__m512i __A, unsigned int __B) @@ -5304,168 +5304,168 @@ _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A) } #define _mm512_fixupimm_round_pd(A, B, C, imm, R) \ - (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8di)(__m512i)(C), (int)(imm), \ - (__mmask8)-1, (int)(R)) + ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (__v8di)(__m512i)(C), (int)(imm), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \ - (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8di)(__m512i)(C), (int)(imm), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (__v8di)(__m512i)(C), (int)(imm), \ + (__mmask8)(U), (int)(R))) #define _mm512_fixupimm_pd(A, B, C, imm) \ - (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8di)(__m512i)(C), (int)(imm), \ - (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION) + ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (__v8di)(__m512i)(C), (int)(imm), \ + (__mmask8)-1, \ + _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \ - (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8di)(__m512i)(C), (int)(imm), \ - (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION) + ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (__v8di)(__m512i)(C), (int)(imm), \ + (__mmask8)(U), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \ - (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8di)(__m512i)(C), \ - (int)(imm), (__mmask8)(U), \ - (int)(R)) + ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (__v8di)(__m512i)(C), \ + (int)(imm), (__mmask8)(U), \ + (int)(R))) #define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \ - (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8di)(__m512i)(C), \ - (int)(imm), (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION) + ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (__v8di)(__m512i)(C), \ + (int)(imm), (__mmask8)(U), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm512_fixupimm_round_ps(A, B, C, imm, R) \ - (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16si)(__m512i)(C), (int)(imm), \ - (__mmask16)-1, (int)(R)) + ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + (__v16si)(__m512i)(C), (int)(imm), \ + (__mmask16)-1, (int)(R))) #define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \ - (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16si)(__m512i)(C), (int)(imm), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + (__v16si)(__m512i)(C), (int)(imm), \ + (__mmask16)(U), (int)(R))) #define _mm512_fixupimm_ps(A, B, C, imm) \ - (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16si)(__m512i)(C), (int)(imm), \ - (__mmask16)-1, \ - _MM_FROUND_CUR_DIRECTION) + ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + (__v16si)(__m512i)(C), (int)(imm), \ + (__mmask16)-1, \ + _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \ - (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16si)(__m512i)(C), (int)(imm), \ - (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION) + ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + (__v16si)(__m512i)(C), (int)(imm), \ + (__mmask16)(U), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \ - (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16si)(__m512i)(C), \ - (int)(imm), (__mmask16)(U), \ - (int)(R)) + ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + (__v16si)(__m512i)(C), \ + (int)(imm), (__mmask16)(U), \ + (int)(R))) #define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \ - (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16si)(__m512i)(C), \ - (int)(imm), (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION) + ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + (__v16si)(__m512i)(C), \ + (int)(imm), (__mmask16)(U), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm_fixupimm_round_sd(A, B, C, imm, R) \ - (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)-1, (int)(R)) + ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2di)(__m128i)(C), (int)(imm), \ + (__mmask8)-1, (int)(R))) #define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \ - (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)(U), (int)(R)) + ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2di)(__m128i)(C), (int)(imm), \ + (__mmask8)(U), (int)(R))) #define _mm_fixupimm_sd(A, B, C, imm) \ - (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION) - -#define _mm_mask_fixupimm_sd(A, U, B, C, imm) \ - (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION) - -#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \ - (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ + ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)(U), (int)(R)) + (__mmask8)-1, \ + _MM_FROUND_CUR_DIRECTION)) -#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \ - (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ +#define _mm_mask_fixupimm_sd(A, U, B, C, imm) \ + ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION) + _MM_FROUND_CUR_DIRECTION)) + +#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \ + ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2di)(__m128i)(C), (int)(imm), \ + (__mmask8)(U), (int)(R))) + +#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \ + ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2di)(__m128i)(C), (int)(imm), \ + (__mmask8)(U), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm_fixupimm_round_ss(A, B, C, imm, R) \ - (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)-1, (int)(R)) + ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4si)(__m128i)(C), (int)(imm), \ + (__mmask8)-1, (int)(R))) #define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \ - (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)(U), (int)(R)) + ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4si)(__m128i)(C), (int)(imm), \ + (__mmask8)(U), (int)(R))) #define _mm_fixupimm_ss(A, B, C, imm) \ - (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION) - -#define _mm_mask_fixupimm_ss(A, U, B, C, imm) \ - (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION) - -#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \ - (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)(U), (int)(R)) + (__mmask8)-1, \ + _MM_FROUND_CUR_DIRECTION)) -#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \ - (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ +#define _mm_mask_fixupimm_ss(A, U, B, C, imm) \ + ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION) + _MM_FROUND_CUR_DIRECTION)) + +#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \ + ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4si)(__m128i)(C), (int)(imm), \ + (__mmask8)(U), (int)(R))) + +#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \ + ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4si)(__m128i)(C), (int)(imm), \ + (__mmask8)(U), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm_getexp_round_sd(A, B, R) \ - (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)) + ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)-1, (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 @@ -5486,10 +5486,10 @@ _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) } #define _mm_mask_getexp_round_sd(W, U, A, B, R) \ - (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)) + ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)(__m128d)(W), \ + (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B) @@ -5502,16 +5502,16 @@ _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B) } #define _mm_maskz_getexp_round_sd(U, A, B, R) \ - (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)) + ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(U), (int)(R))) #define _mm_getexp_round_ss(A, B, R) \ - (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)) + ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)-1, (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ss (__m128 __A, __m128 __B) @@ -5531,10 +5531,10 @@ _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) } #define _mm_mask_getexp_round_ss(W, U, A, B, R) \ - (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)(__m128)(W), \ - (__mmask8)(U), (int)(R)) + ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(W), \ + (__mmask8)(U), (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B) @@ -5547,100 +5547,100 @@ _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B) } #define _mm_maskz_getexp_round_ss(U, A, B, R) \ - (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)) + ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U), (int)(R))) #define _mm_getmant_round_sd(A, B, C, D, R) \ - (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (int)(((D)<<2) | (C)), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)) + ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (int)(((D)<<2) | (C)), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)-1, (int)(R))) #define _mm_getmant_sd(A, B, C, D) \ - (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (int)(((D)<<2) | (C)), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION) + ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (int)(((D)<<2) | (C)), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)-1, \ + _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_getmant_sd(W, U, A, B, C, D) \ - (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (int)(((D)<<2) | (C)), \ - (__v2df)(__m128d)(W), \ - (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION) + ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (int)(((D)<<2) | (C)), \ + (__v2df)(__m128d)(W), \ + (__mmask8)(U), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \ - (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (int)(((D)<<2) | (C)), \ - (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)) + ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (int)(((D)<<2) | (C)), \ + (__v2df)(__m128d)(W), \ + (__mmask8)(U), (int)(R))) #define _mm_maskz_getmant_sd(U, A, B, C, D) \ - (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (int)(((D)<<2) | (C)), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION) + ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (int)(((D)<<2) | (C)), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(U), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \ - (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (int)(((D)<<2) | (C)), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)) + ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (int)(((D)<<2) | (C)), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(U), (int)(R))) -#define _mm_getmant_round_ss(A, B, C, D, R) \ - (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (int)(((D)<<2) | (C)), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)) +#define _mm_getmant_round_ss(A, B, C, D, R) \ + ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (int)(((D)<<2) | (C)), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)-1, (int)(R))) #define _mm_getmant_ss(A, B, C, D) \ - (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (int)(((D)<<2) | (C)), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION) + ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (int)(((D)<<2) | (C)), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)-1, \ + _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_getmant_ss(W, U, A, B, C, D) \ - (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (int)(((D)<<2) | (C)), \ - (__v4sf)(__m128)(W), \ - (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION) + ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (int)(((D)<<2) | (C)), \ + (__v4sf)(__m128)(W), \ + (__mmask8)(U), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \ - (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (int)(((D)<<2) | (C)), \ - (__v4sf)(__m128)(W), \ - (__mmask8)(U), (int)(R)) + ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (int)(((D)<<2) | (C)), \ + (__v4sf)(__m128)(W), \ + (__mmask8)(U), (int)(R))) #define _mm_maskz_getmant_ss(U, A, B, C, D) \ - (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (int)(((D)<<2) | (C)), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION) + ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (int)(((D)<<2) | (C)), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \ - (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (int)(((D)<<2) | (C)), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)) + ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (int)(((D)<<2) | (C)), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U), (int)(R))) static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kmov (__mmask16 __A) @@ -5649,16 +5649,16 @@ _mm512_kmov (__mmask16 __A) } #define _mm_comi_round_sd(A, B, P, R) \ - (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \ - (int)(P), (int)(R)) + ((int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \ + (int)(P), (int)(R))) #define _mm_comi_round_ss(A, B, P, R) \ - (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \ - (int)(P), (int)(R)) + ((int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \ + (int)(P), (int)(R))) #ifdef __x86_64__ #define _mm_cvt_roundsd_si64(A, R) \ - (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)) + ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))) #endif static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -5926,54 +5926,54 @@ _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) } #define _mm512_ternarylogic_epi32(A, B, C, imm) \ - (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), \ - (__v16si)(__m512i)(C), (int)(imm), \ - (__mmask16)-1) + ((__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ + (__v16si)(__m512i)(B), \ + (__v16si)(__m512i)(C), (int)(imm), \ + (__mmask16)-1)) #define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \ - (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), \ - (__v16si)(__m512i)(C), (int)(imm), \ - (__mmask16)(U)) + ((__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ + (__v16si)(__m512i)(B), \ + (__v16si)(__m512i)(C), (int)(imm), \ + (__mmask16)(U))) #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \ - (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), \ - (__v16si)(__m512i)(C), \ - (int)(imm), (__mmask16)(U)) + ((__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \ + (__v16si)(__m512i)(B), \ + (__v16si)(__m512i)(C), \ + (int)(imm), (__mmask16)(U))) #define _mm512_ternarylogic_epi64(A, B, C, imm) \ - (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), \ - (__v8di)(__m512i)(C), (int)(imm), \ - (__mmask8)-1) + ((__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), \ + (__v8di)(__m512i)(C), (int)(imm), \ + (__mmask8)-1)) #define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \ - (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), \ - (__v8di)(__m512i)(C), (int)(imm), \ - (__mmask8)(U)) - -#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \ - (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \ + ((__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ (__v8di)(__m512i)(B), \ (__v8di)(__m512i)(C), (int)(imm), \ - (__mmask8)(U)) + (__mmask8)(U))) + +#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \ + ((__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), \ + (__v8di)(__m512i)(C), (int)(imm), \ + (__mmask8)(U))) #ifdef __x86_64__ #define _mm_cvt_roundsd_i64(A, R) \ - (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)) + ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))) #endif #define _mm_cvt_roundsd_si32(A, R) \ - (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)) + ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R))) #define _mm_cvt_roundsd_i32(A, R) \ - (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)) + ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R))) #define _mm_cvt_roundsd_u32(A, R) \ - (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)) + ((unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R))) static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtsd_u32 (__m128d __A) @@ -5984,8 +5984,8 @@ _mm_cvtsd_u32 (__m128d __A) #ifdef __x86_64__ #define _mm_cvt_roundsd_u64(A, R) \ - (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \ - (int)(R)) + ((unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \ + (int)(R))) static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 _mm_cvtsd_u64 (__m128d __A) @@ -5997,21 +5997,21 @@ _mm_cvtsd_u64 (__m128d __A) #endif #define _mm_cvt_roundss_si32(A, R) \ - (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)) + ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R))) #define _mm_cvt_roundss_i32(A, R) \ - (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)) + ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R))) #ifdef __x86_64__ #define _mm_cvt_roundss_si64(A, R) \ - (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)) + ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R))) #define _mm_cvt_roundss_i64(A, R) \ - (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)) + ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R))) #endif #define _mm_cvt_roundss_u32(A, R) \ - (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)) + ((unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R))) static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtss_u32 (__m128 __A) @@ -6022,8 +6022,8 @@ _mm_cvtss_u32 (__m128 __A) #ifdef __x86_64__ #define _mm_cvt_roundss_u64(A, R) \ - (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \ - (int)(R)) + ((unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \ + (int)(R))) static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 _mm_cvtss_u64 (__m128 __A) @@ -6035,10 +6035,10 @@ _mm_cvtss_u64 (__m128 __A) #endif #define _mm_cvtt_roundsd_i32(A, R) \ - (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)) + ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R))) #define _mm_cvtt_roundsd_si32(A, R) \ - (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)) + ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R))) static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsd_i32 (__m128d __A) @@ -6049,10 +6049,10 @@ _mm_cvttsd_i32 (__m128d __A) #ifdef __x86_64__ #define _mm_cvtt_roundsd_si64(A, R) \ - (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)) + ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R))) #define _mm_cvtt_roundsd_i64(A, R) \ - (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)) + ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R))) static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvttsd_i64 (__m128d __A) @@ -6063,7 +6063,7 @@ _mm_cvttsd_i64 (__m128d __A) #endif #define _mm_cvtt_roundsd_u32(A, R) \ - (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)) + ((unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R))) static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttsd_u32 (__m128d __A) @@ -6074,8 +6074,8 @@ _mm_cvttsd_u32 (__m128d __A) #ifdef __x86_64__ #define _mm_cvtt_roundsd_u64(A, R) \ - (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \ - (int)(R)) + ((unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \ + (int)(R))) static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 _mm_cvttsd_u64 (__m128d __A) @@ -6087,10 +6087,10 @@ _mm_cvttsd_u64 (__m128d __A) #endif #define _mm_cvtt_roundss_i32(A, R) \ - (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)) + ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R))) #define _mm_cvtt_roundss_si32(A, R) \ - (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)) + ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R))) static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttss_i32 (__m128 __A) @@ -6101,10 +6101,10 @@ _mm_cvttss_i32 (__m128 __A) #ifdef __x86_64__ #define _mm_cvtt_roundss_i64(A, R) \ - (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)) + ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R))) #define _mm_cvtt_roundss_si64(A, R) \ - (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)) + ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R))) static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvttss_i64 (__m128 __A) @@ -6115,7 +6115,7 @@ _mm_cvttss_i64 (__m128 __A) #endif #define _mm_cvtt_roundss_u32(A, R) \ - (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)) + ((unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R))) static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttss_u32 (__m128 __A) @@ -6126,8 +6126,8 @@ _mm_cvttss_u32 (__m128 __A) #ifdef __x86_64__ #define _mm_cvtt_roundss_u64(A, R) \ - (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \ - (int)(R)) + ((unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \ + (int)(R))) static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 _mm_cvttss_u64 (__m128 __A) @@ -6139,30 +6139,30 @@ _mm_cvttss_u64 (__m128 __A) #endif #define _mm512_permute_pd(X, C) \ - (__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C)) + ((__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C))) #define _mm512_mask_permute_pd(W, U, X, C) \ - (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ - (__v8df)_mm512_permute_pd((X), (C)), \ - (__v8df)(__m512d)(W)) + ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_permute_pd((X), (C)), \ + (__v8df)(__m512d)(W))) #define _mm512_maskz_permute_pd(U, X, C) \ - (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ - (__v8df)_mm512_permute_pd((X), (C)), \ - (__v8df)_mm512_setzero_pd()) + ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_permute_pd((X), (C)), \ + (__v8df)_mm512_setzero_pd())) #define _mm512_permute_ps(X, C) \ - (__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C)) + ((__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C))) #define _mm512_mask_permute_ps(W, U, X, C) \ - (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ - (__v16sf)_mm512_permute_ps((X), (C)), \ - (__v16sf)(__m512)(W)) + ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_permute_ps((X), (C)), \ + (__v16sf)(__m512)(W))) #define _mm512_maskz_permute_ps(U, X, C) \ - (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ - (__v16sf)_mm512_permute_ps((X), (C)), \ - (__v16sf)_mm512_setzero_ps()) + ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_permute_ps((X), (C)), \ + (__v16sf)_mm512_setzero_ps())) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutevar_pd(__m512d __A, __m512i __C) @@ -6274,19 +6274,19 @@ _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B) #define _mm512_cvtt_roundpd_epu32(A, R) \ - (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ - (__v8si)_mm256_undefined_si256(), \ - (__mmask8)-1, (int)(R)) + ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ + (__v8si)_mm256_undefined_si256(), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \ - (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ - (__v8si)(__m256i)(W), \ - (__mmask8)(U), (int)(R)) + ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ + (__v8si)(__m256i)(W), \ + (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \ - (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ - (__v8si)_mm256_setzero_si256(), \ - (__mmask8)(U), (int)(R)) + ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ + (__v8si)_mm256_setzero_si256(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epu32 (__m512d __A) @@ -6318,106 +6318,106 @@ _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A) } #define _mm_roundscale_round_sd(A, B, imm, R) \ - (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(imm), \ - (int)(R)) + ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)-1, (int)(imm), \ + (int)(R))) #define _mm_roundscale_sd(A, B, imm) \ - (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(imm), \ - _MM_FROUND_CUR_DIRECTION) + ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)-1, (int)(imm), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_roundscale_sd(W, U, A, B, imm) \ - (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(imm), \ - _MM_FROUND_CUR_DIRECTION) + ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)(__m128d)(W), \ + (__mmask8)(U), (int)(imm), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \ - (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(I), \ - (int)(R)) + ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)(__m128d)(W), \ + (__mmask8)(U), (int)(I), \ + (int)(R))) #define _mm_maskz_roundscale_sd(U, A, B, I) \ - (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(I), \ - _MM_FROUND_CUR_DIRECTION) + ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(U), (int)(I), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \ - (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(I), \ - (int)(R)) + ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(U), (int)(I), \ + (int)(R))) #define _mm_roundscale_round_ss(A, B, imm, R) \ - (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(imm), \ - (int)(R)) + ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)-1, (int)(imm), \ + (int)(R))) #define _mm_roundscale_ss(A, B, imm) \ - (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(imm), \ - _MM_FROUND_CUR_DIRECTION) + ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)-1, (int)(imm), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_roundscale_ss(W, U, A, B, I) \ - (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)(__m128)(W), \ - (__mmask8)(U), (int)(I), \ - _MM_FROUND_CUR_DIRECTION) + ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(W), \ + (__mmask8)(U), (int)(I), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \ - (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)(__m128)(W), \ - (__mmask8)(U), (int)(I), \ - (int)(R)) + ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(W), \ + (__mmask8)(U), (int)(I), \ + (int)(R))) #define _mm_maskz_roundscale_ss(U, A, B, I) \ - (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(I), \ - _MM_FROUND_CUR_DIRECTION) + ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U), (int)(I), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \ - (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(I), \ - (int)(R)) + ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U), (int)(I), \ + (int)(R))) #define _mm512_scalef_round_pd(A, B, R) \ - (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)_mm512_undefined_pd(), \ - (__mmask8)-1, (int)(R)) + ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (__v8df)_mm512_undefined_pd(), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_scalef_round_pd(W, U, A, B, R) \ - (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)(__m512d)(W), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (__v8df)(__m512d)(W), \ + (__mmask8)(U), (int)(R))) #define _mm512_maskz_scalef_round_pd(U, A, B, R) \ - (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_scalef_pd (__m512d __A, __m512d __B) @@ -6452,22 +6452,22 @@ _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B) } #define _mm512_scalef_round_ps(A, B, R) \ - (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)_mm512_undefined_ps(), \ - (__mmask16)-1, (int)(R)) + ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + (__v16sf)_mm512_undefined_ps(), \ + (__mmask16)-1, (int)(R))) #define _mm512_mask_scalef_round_ps(W, U, A, B, R) \ - (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)(__m512)(W), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + (__v16sf)(__m512)(W), \ + (__mmask16)(U), (int)(R))) #define _mm512_maskz_scalef_round_ps(U, A, B, R) \ - (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)(U), (int)(R))) static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_scalef_ps (__m512 __A, __m512 __B) @@ -6502,10 +6502,10 @@ _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B) } #define _mm_scalef_round_sd(A, B, R) \ - (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)) + ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)-1, (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_sd (__m128d __A, __m128d __B) @@ -6527,10 +6527,10 @@ _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) } #define _mm_mask_scalef_round_sd(W, U, A, B, R) \ - (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)) + ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)(__m128d)(W), \ + (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B) @@ -6543,16 +6543,16 @@ _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B) } #define _mm_maskz_scalef_round_sd(U, A, B, R) \ - (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)) + ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(U), (int)(R))) #define _mm_scalef_round_ss(A, B, R) \ - (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)) + ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)-1, (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ss (__m128 __A, __m128 __B) @@ -6574,10 +6574,10 @@ _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) } #define _mm_mask_scalef_round_ss(W, U, A, B, R) \ - (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)(__m128)(W), \ - (__mmask8)(U), (int)(R)) + ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(W), \ + (__mmask8)(U), (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B) @@ -6590,11 +6590,11 @@ _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B) } #define _mm_maskz_scalef_round_ss(U, A, B, R) \ - (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), \ - (int)(R)) + ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U), \ + (int)(R))) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srai_epi32(__m512i __A, unsigned int __B) @@ -6642,94 +6642,94 @@ _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B) } #define _mm512_shuffle_f32x4(A, B, imm) \ - (__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), (int)(imm)) + ((__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), (int)(imm))) #define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \ - (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ - (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \ - (__v16sf)(__m512)(W)) + ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \ + (__v16sf)(__m512)(W))) #define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \ - (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ - (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \ - (__v16sf)_mm512_setzero_ps()) + ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \ + (__v16sf)_mm512_setzero_ps())) #define _mm512_shuffle_f64x2(A, B, imm) \ - (__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), (int)(imm)) + ((__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), (int)(imm))) #define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \ - (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ - (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \ - (__v8df)(__m512d)(W)) + ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \ + (__v8df)(__m512d)(W))) #define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \ - (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ - (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \ - (__v8df)_mm512_setzero_pd()) + ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \ + (__v8df)_mm512_setzero_pd())) #define _mm512_shuffle_i32x4(A, B, imm) \ - (__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), (int)(imm)) + ((__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \ + (__v16si)(__m512i)(B), (int)(imm))) #define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \ - (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ - (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \ - (__v16si)(__m512i)(W)) + ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \ + (__v16si)(__m512i)(W))) #define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \ - (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ - (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \ - (__v16si)_mm512_setzero_si512()) + ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \ + (__v16si)_mm512_setzero_si512())) #define _mm512_shuffle_i64x2(A, B, imm) \ - (__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), (int)(imm)) + ((__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), (int)(imm))) #define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \ - (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ - (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \ - (__v8di)(__m512i)(W)) + ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \ + (__v8di)(__m512i)(W))) #define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \ - (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ - (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \ - (__v8di)_mm512_setzero_si512()) + ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \ + (__v8di)_mm512_setzero_si512())) #define _mm512_shuffle_pd(A, B, M) \ - (__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(B), (int)(M)) + ((__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), (int)(M))) #define _mm512_mask_shuffle_pd(W, U, A, B, M) \ - (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ - (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ - (__v8df)(__m512d)(W)) + ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ + (__v8df)(__m512d)(W))) #define _mm512_maskz_shuffle_pd(U, A, B, M) \ - (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ - (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ - (__v8df)_mm512_setzero_pd()) + ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ + (__v8df)_mm512_setzero_pd())) #define _mm512_shuffle_ps(A, B, M) \ - (__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), (int)(M)) + ((__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), (int)(M))) #define _mm512_mask_shuffle_ps(W, U, A, B, M) \ - (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ - (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ - (__v16sf)(__m512)(W)) + ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ + (__v16sf)(__m512)(W))) #define _mm512_maskz_shuffle_ps(U, A, B, M) \ - (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ - (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ - (__v16sf)_mm512_setzero_ps()) + ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ + (__v16sf)_mm512_setzero_ps())) #define _mm_sqrt_round_sd(A, B, R) \ - (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R)) + ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)-1, (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) @@ -6742,10 +6742,10 @@ _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) } #define _mm_mask_sqrt_round_sd(W, U, A, B, R) \ - (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)) + ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)(__m128d)(W), \ + (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B) @@ -6758,16 +6758,16 @@ _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B) } #define _mm_maskz_sqrt_round_sd(U, A, B, R) \ - (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)) + ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(U), (int)(R))) #define _mm_sqrt_round_ss(A, B, R) \ - (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R)) + ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)-1, (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) @@ -6780,10 +6780,10 @@ _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) } #define _mm_mask_sqrt_round_ss(W, U, A, B, R) \ - (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)(__m128)(W), (__mmask8)(U), \ - (int)(R)) + ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(W), (__mmask8)(U), \ + (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B) @@ -6796,10 +6796,10 @@ _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B) } #define _mm_maskz_sqrt_round_ss(U, A, B, R) \ - (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)) + ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_broadcast_f32x4(__m128 __A) @@ -7366,183 +7366,183 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) } #define _mm512_extracti32x4_epi32(A, imm) \ - (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ - (__v4si)_mm_undefined_si128(), \ - (__mmask8)-1) + ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ + (__v4si)_mm_undefined_si128(), \ + (__mmask8)-1)) #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \ - (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ - (__v4si)(__m128i)(W), \ - (__mmask8)(U)) + ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ + (__v4si)(__m128i)(W), \ + (__mmask8)(U))) #define _mm512_maskz_extracti32x4_epi32(U, A, imm) \ - (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ - (__v4si)_mm_setzero_si128(), \ - (__mmask8)(U)) + ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ + (__v4si)_mm_setzero_si128(), \ + (__mmask8)(U))) #define _mm512_extracti64x4_epi64(A, imm) \ - (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ - (__v4di)_mm256_undefined_si256(), \ - (__mmask8)-1) + ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ + (__v4di)_mm256_undefined_si256(), \ + (__mmask8)-1)) #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \ - (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ - (__v4di)(__m256i)(W), \ - (__mmask8)(U)) + ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ + (__v4di)(__m256i)(W), \ + (__mmask8)(U))) #define _mm512_maskz_extracti64x4_epi64(U, A, imm) \ - (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ - (__v4di)_mm256_setzero_si256(), \ - (__mmask8)(U)) + ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ + (__v4di)_mm256_setzero_si256(), \ + (__mmask8)(U))) #define _mm512_insertf64x4(A, B, imm) \ - (__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \ - (__v4df)(__m256d)(B), (int)(imm)) + ((__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \ + (__v4df)(__m256d)(B), (int)(imm))) #define _mm512_mask_insertf64x4(W, U, A, B, imm) \ - (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ - (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ - (__v8df)(__m512d)(W)) + ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ + (__v8df)(__m512d)(W))) #define _mm512_maskz_insertf64x4(U, A, B, imm) \ - (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ - (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ - (__v8df)_mm512_setzero_pd()) + ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ + (__v8df)_mm512_setzero_pd())) #define _mm512_inserti64x4(A, B, imm) \ - (__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \ - (__v4di)(__m256i)(B), (int)(imm)) + ((__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \ + (__v4di)(__m256i)(B), (int)(imm))) #define _mm512_mask_inserti64x4(W, U, A, B, imm) \ - (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ - (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ - (__v8di)(__m512i)(W)) + ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ + (__v8di)(__m512i)(W))) #define _mm512_maskz_inserti64x4(U, A, B, imm) \ - (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ - (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ - (__v8di)_mm512_setzero_si512()) + ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ + (__v8di)_mm512_setzero_si512())) #define _mm512_insertf32x4(A, B, imm) \ - (__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \ - (__v4sf)(__m128)(B), (int)(imm)) + ((__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \ + (__v4sf)(__m128)(B), (int)(imm))) #define _mm512_mask_insertf32x4(W, U, A, B, imm) \ - (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ - (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ - (__v16sf)(__m512)(W)) + ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ + (__v16sf)(__m512)(W))) #define _mm512_maskz_insertf32x4(U, A, B, imm) \ - (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ - (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ - (__v16sf)_mm512_setzero_ps()) + ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ + (__v16sf)_mm512_setzero_ps())) #define _mm512_inserti32x4(A, B, imm) \ - (__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \ - (__v4si)(__m128i)(B), (int)(imm)) + ((__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \ + (__v4si)(__m128i)(B), (int)(imm))) #define _mm512_mask_inserti32x4(W, U, A, B, imm) \ - (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ - (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ - (__v16si)(__m512i)(W)) + ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ + (__v16si)(__m512i)(W))) #define _mm512_maskz_inserti32x4(U, A, B, imm) \ - (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ - (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ - (__v16si)_mm512_setzero_si512()) + ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ + (__v16si)_mm512_setzero_si512())) #define _mm512_getmant_round_pd(A, B, C, R) \ - (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ - (int)(((C)<<2) | (B)), \ - (__v8df)_mm512_undefined_pd(), \ - (__mmask8)-1, (int)(R)) + ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ + (int)(((C)<<2) | (B)), \ + (__v8df)_mm512_undefined_pd(), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \ - (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ - (int)(((C)<<2) | (B)), \ - (__v8df)(__m512d)(W), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ + (int)(((C)<<2) | (B)), \ + (__v8df)(__m512d)(W), \ + (__mmask8)(U), (int)(R))) #define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \ - (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ - (int)(((C)<<2) | (B)), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ + (int)(((C)<<2) | (B)), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)(U), (int)(R))) #define _mm512_getmant_pd(A, B, C) \ - (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ - (int)(((C)<<2) | (B)), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION) + ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ + (int)(((C)<<2) | (B)), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)-1, \ + _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_getmant_pd(W, U, A, B, C) \ - (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ - (int)(((C)<<2) | (B)), \ - (__v8df)(__m512d)(W), \ - (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION) + ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ + (int)(((C)<<2) | (B)), \ + (__v8df)(__m512d)(W), \ + (__mmask8)(U), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm512_maskz_getmant_pd(U, A, B, C) \ - (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ - (int)(((C)<<2) | (B)), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION) + ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ + (int)(((C)<<2) | (B)), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)(U), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm512_getmant_round_ps(A, B, C, R) \ - (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ - (int)(((C)<<2) | (B)), \ - (__v16sf)_mm512_undefined_ps(), \ - (__mmask16)-1, (int)(R)) + ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ + (int)(((C)<<2) | (B)), \ + (__v16sf)_mm512_undefined_ps(), \ + (__mmask16)-1, (int)(R))) #define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \ - (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ - (int)(((C)<<2) | (B)), \ - (__v16sf)(__m512)(W), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ + (int)(((C)<<2) | (B)), \ + (__v16sf)(__m512)(W), \ + (__mmask16)(U), (int)(R))) #define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \ - (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ - (int)(((C)<<2) | (B)), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ + (int)(((C)<<2) | (B)), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)(U), (int)(R))) #define _mm512_getmant_ps(A, B, C) \ - (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ - (int)(((C)<<2)|(B)), \ - (__v16sf)_mm512_undefined_ps(), \ - (__mmask16)-1, \ - _MM_FROUND_CUR_DIRECTION) + ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ + (int)(((C)<<2)|(B)), \ + (__v16sf)_mm512_undefined_ps(), \ + (__mmask16)-1, \ + _MM_FROUND_CUR_DIRECTION)) #define _mm512_mask_getmant_ps(W, U, A, B, C) \ - (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ - (int)(((C)<<2)|(B)), \ - (__v16sf)(__m512)(W), \ - (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION) + ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ + (int)(((C)<<2)|(B)), \ + (__v16sf)(__m512)(W), \ + (__mmask16)(U), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm512_maskz_getmant_ps(U, A, B, C) \ - (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ - (int)(((C)<<2)|(B)), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION) + ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ + (int)(((C)<<2)|(B)), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)(U), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm512_getexp_round_pd(A, R) \ - (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ - (__v8df)_mm512_undefined_pd(), \ - (__mmask8)-1, (int)(R)) + ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ + (__v8df)_mm512_undefined_pd(), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_getexp_round_pd(W, U, A, R) \ - (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(W), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(W), \ + (__mmask8)(U), (int)(R))) #define _mm512_maskz_getexp_round_pd(U, A, R) \ - (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_getexp_pd (__m512d __A) @@ -7572,19 +7572,19 @@ _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A) } #define _mm512_getexp_round_ps(A, R) \ - (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_undefined_ps(), \ - (__mmask16)-1, (int)(R)) + ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)_mm512_undefined_ps(), \ + (__mmask16)-1, (int)(R))) #define _mm512_mask_getexp_round_ps(W, U, A, R) \ - (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(W), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(W), \ + (__mmask16)(U), (int)(R))) #define _mm512_maskz_getexp_round_ps(U, A, R) \ - (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)) + ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)(U), (int)(R))) static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_getexp_ps (__m512 __A) @@ -7614,100 +7614,100 @@ _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A) } #define _mm512_i64gather_ps(index, addr, scale) \ - (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \ - (void const *)(addr), \ - (__v8di)(__m512i)(index), (__mmask8)-1, \ - (int)(scale)) + ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \ + (void const *)(addr), \ + (__v8di)(__m512i)(index), (__mmask8)-1, \ + (int)(scale))) #define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \ - (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\ - (void const *)(addr), \ - (__v8di)(__m512i)(index), \ - (__mmask8)(mask), (int)(scale)) - -#define _mm512_i64gather_epi32(index, addr, scale) \ - (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \ + ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\ (void const *)(addr), \ (__v8di)(__m512i)(index), \ - (__mmask8)-1, (int)(scale)) + (__mmask8)(mask), (int)(scale))) + +#define _mm512_i64gather_epi32(index, addr, scale) \ + ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \ + (void const *)(addr), \ + (__v8di)(__m512i)(index), \ + (__mmask8)-1, (int)(scale))) #define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \ - (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \ - (void const *)(addr), \ - (__v8di)(__m512i)(index), \ - (__mmask8)(mask), (int)(scale)) + ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \ + (void const *)(addr), \ + (__v8di)(__m512i)(index), \ + (__mmask8)(mask), (int)(scale))) #define _mm512_i64gather_pd(index, addr, scale) \ - (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \ - (void const *)(addr), \ - (__v8di)(__m512i)(index), (__mmask8)-1, \ - (int)(scale)) + ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \ + (void const *)(addr), \ + (__v8di)(__m512i)(index), (__mmask8)-1, \ + (int)(scale))) #define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \ - (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \ - (void const *)(addr), \ - (__v8di)(__m512i)(index), \ - (__mmask8)(mask), (int)(scale)) + ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \ + (void const *)(addr), \ + (__v8di)(__m512i)(index), \ + (__mmask8)(mask), (int)(scale))) #define _mm512_i64gather_epi64(index, addr, scale) \ - (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \ - (void const *)(addr), \ - (__v8di)(__m512i)(index), (__mmask8)-1, \ - (int)(scale)) + ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \ + (void const *)(addr), \ + (__v8di)(__m512i)(index), (__mmask8)-1, \ + (int)(scale))) #define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \ - (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \ - (void const *)(addr), \ - (__v8di)(__m512i)(index), \ - (__mmask8)(mask), (int)(scale)) + ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \ + (void const *)(addr), \ + (__v8di)(__m512i)(index), \ + (__mmask8)(mask), (int)(scale))) #define _mm512_i32gather_ps(index, addr, scale) \ - (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \ - (void const *)(addr), \ - (__v16si)(__m512)(index), \ - (__mmask16)-1, (int)(scale)) + ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \ + (void const *)(addr), \ + (__v16si)(__m512)(index), \ + (__mmask16)-1, (int)(scale))) #define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \ - (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \ - (void const *)(addr), \ - (__v16si)(__m512)(index), \ - (__mmask16)(mask), (int)(scale)) + ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \ + (void const *)(addr), \ + (__v16si)(__m512)(index), \ + (__mmask16)(mask), (int)(scale))) #define _mm512_i32gather_epi32(index, addr, scale) \ - (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \ - (void const *)(addr), \ - (__v16si)(__m512i)(index), \ - (__mmask16)-1, (int)(scale)) + ((__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \ + (void const *)(addr), \ + (__v16si)(__m512i)(index), \ + (__mmask16)-1, (int)(scale))) #define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \ - (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \ - (void const *)(addr), \ - (__v16si)(__m512i)(index), \ - (__mmask16)(mask), (int)(scale)) + ((__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \ + (void const *)(addr), \ + (__v16si)(__m512i)(index), \ + (__mmask16)(mask), (int)(scale))) #define _mm512_i32gather_pd(index, addr, scale) \ - (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \ - (void const *)(addr), \ - (__v8si)(__m256i)(index), (__mmask8)-1, \ - (int)(scale)) + ((__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \ + (void const *)(addr), \ + (__v8si)(__m256i)(index), (__mmask8)-1, \ + (int)(scale))) #define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \ - (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \ - (void const *)(addr), \ - (__v8si)(__m256i)(index), \ - (__mmask8)(mask), (int)(scale)) + ((__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \ + (void const *)(addr), \ + (__v8si)(__m256i)(index), \ + (__mmask8)(mask), (int)(scale))) #define _mm512_i32gather_epi64(index, addr, scale) \ - (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \ - (void const *)(addr), \ - (__v8si)(__m256i)(index), (__mmask8)-1, \ - (int)(scale)) + ((__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \ + (void const *)(addr), \ + (__v8si)(__m256i)(index), (__mmask8)-1, \ + (int)(scale))) #define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \ - (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \ - (void const *)(addr), \ - (__v8si)(__m256i)(index), \ - (__mmask8)(mask), (int)(scale)) + ((__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \ + (void const *)(addr), \ + (__v8si)(__m256i)(index), \ + (__mmask8)(mask), (int)(scale))) #define _mm512_i64scatter_ps(addr, index, v1, scale) \ __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \ @@ -7800,16 +7800,16 @@ _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) } #define _mm_fmadd_round_ss(A, B, C, R) \ - (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)(__m128)(C), (__mmask8)-1, \ - (int)(R)) + ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(C), (__mmask8)-1, \ + (int)(R))) #define _mm_mask_fmadd_round_ss(W, U, A, B, R) \ - (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ - (__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), (__mmask8)(U), \ - (int)(R)) + ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ + (__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), (__mmask8)(U), \ + (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) @@ -7822,10 +7822,10 @@ _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) } #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \ - (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)(__m128)(C), (__mmask8)(U), \ - (int)(R)) + ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(C), (__mmask8)(U), \ + (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) @@ -7838,10 +7838,10 @@ _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) } #define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \ - (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ - (__v4sf)(__m128)(X), \ - (__v4sf)(__m128)(Y), (__mmask8)(U), \ - (int)(R)) + ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ + (__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (__mmask8)(U), \ + (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) @@ -7854,16 +7854,16 @@ _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) } #define _mm_fmsub_round_ss(A, B, C, R) \ - (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - -(__v4sf)(__m128)(C), (__mmask8)-1, \ - (int)(R)) + ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + -(__v4sf)(__m128)(C), (__mmask8)-1, \ + (int)(R))) #define _mm_mask_fmsub_round_ss(W, U, A, B, R) \ - (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ - (__v4sf)(__m128)(A), \ - -(__v4sf)(__m128)(B), (__mmask8)(U), \ - (int)(R)) + ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ + (__v4sf)(__m128)(A), \ + -(__v4sf)(__m128)(B), (__mmask8)(U), \ + (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) @@ -7876,10 +7876,10 @@ _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) } #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \ - (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - -(__v4sf)(__m128)(C), (__mmask8)(U), \ - (int)(R)) + ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + -(__v4sf)(__m128)(C), (__mmask8)(U), \ + (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) @@ -7892,10 +7892,10 @@ _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) } #define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \ - (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \ - (__v4sf)(__m128)(X), \ - (__v4sf)(__m128)(Y), (__mmask8)(U), \ - (int)(R)) + ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \ + (__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (__mmask8)(U), \ + (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) @@ -7908,16 +7908,16 @@ _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) } #define _mm_fnmadd_round_ss(A, B, C, R) \ - (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ - -(__v4sf)(__m128)(B), \ - (__v4sf)(__m128)(C), (__mmask8)-1, \ - (int)(R)) + ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ + -(__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(C), (__mmask8)-1, \ + (int)(R))) #define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \ - (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ - -(__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), (__mmask8)(U), \ - (int)(R)) + ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ + -(__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), (__mmask8)(U), \ + (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) @@ -7930,10 +7930,10 @@ _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) } #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \ - (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ - -(__v4sf)(__m128)(B), \ - (__v4sf)(__m128)(C), (__mmask8)(U), \ - (int)(R)) + ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ + -(__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(C), (__mmask8)(U), \ + (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) @@ -7946,10 +7946,10 @@ _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) } #define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \ - (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ - -(__v4sf)(__m128)(X), \ - (__v4sf)(__m128)(Y), (__mmask8)(U), \ - (int)(R)) + ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ + -(__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (__mmask8)(U), \ + (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) @@ -7962,16 +7962,16 @@ _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) } #define _mm_fnmsub_round_ss(A, B, C, R) \ - (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ - -(__v4sf)(__m128)(B), \ - -(__v4sf)(__m128)(C), (__mmask8)-1, \ - (int)(R)) + ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ + -(__v4sf)(__m128)(B), \ + -(__v4sf)(__m128)(C), (__mmask8)-1, \ + (int)(R))) #define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \ - (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ - -(__v4sf)(__m128)(A), \ - -(__v4sf)(__m128)(B), (__mmask8)(U), \ - (int)(R)) + ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ + -(__v4sf)(__m128)(A), \ + -(__v4sf)(__m128)(B), (__mmask8)(U), \ + (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) @@ -7984,10 +7984,10 @@ _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) } #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \ - (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ - -(__v4sf)(__m128)(B), \ - -(__v4sf)(__m128)(C), (__mmask8)(U), \ - (int)(R)) + ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ + -(__v4sf)(__m128)(B), \ + -(__v4sf)(__m128)(C), (__mmask8)(U), \ + (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) @@ -8000,10 +8000,10 @@ _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) } #define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \ - (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \ - -(__v4sf)(__m128)(X), \ - (__v4sf)(__m128)(Y), (__mmask8)(U), \ - (int)(R)) + ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \ + -(__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (__mmask8)(U), \ + (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) @@ -8016,16 +8016,16 @@ _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) } #define _mm_fmadd_round_sd(A, B, C, R) \ - (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)(__m128d)(C), (__mmask8)-1, \ - (int)(R)) + ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)(__m128d)(C), (__mmask8)-1, \ + (int)(R))) #define _mm_mask_fmadd_round_sd(W, U, A, B, R) \ - (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ - (__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), (__mmask8)(U), \ - (int)(R)) + ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ + (__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), (__mmask8)(U), \ + (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) @@ -8038,10 +8038,10 @@ _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) } #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \ - (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)(__m128d)(C), (__mmask8)(U), \ - (int)(R)) + ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)(__m128d)(C), (__mmask8)(U), \ + (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) @@ -8054,10 +8054,10 @@ _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) } #define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \ - (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ - (__v2df)(__m128d)(X), \ - (__v2df)(__m128d)(Y), (__mmask8)(U), \ - (int)(R)) + ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ + (__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(Y), (__mmask8)(U), \ + (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) @@ -8070,16 +8070,16 @@ _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) } #define _mm_fmsub_round_sd(A, B, C, R) \ - (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - -(__v2df)(__m128d)(C), (__mmask8)-1, \ - (int)(R)) + ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + -(__v2df)(__m128d)(C), (__mmask8)-1, \ + (int)(R))) #define _mm_mask_fmsub_round_sd(W, U, A, B, R) \ - (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ - (__v2df)(__m128d)(A), \ - -(__v2df)(__m128d)(B), (__mmask8)(U), \ - (int)(R)) + ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ + (__v2df)(__m128d)(A), \ + -(__v2df)(__m128d)(B), (__mmask8)(U), \ + (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) @@ -8092,10 +8092,10 @@ _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) } #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \ - (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - -(__v2df)(__m128d)(C), \ - (__mmask8)(U), (int)(R)) + ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + -(__v2df)(__m128d)(C), \ + (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) @@ -8108,10 +8108,10 @@ _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) } #define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \ - (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \ - (__v2df)(__m128d)(X), \ - (__v2df)(__m128d)(Y), \ - (__mmask8)(U), (int)(R)) + ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \ + (__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(Y), \ + (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) @@ -8124,16 +8124,16 @@ _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) } #define _mm_fnmadd_round_sd(A, B, C, R) \ - (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ - -(__v2df)(__m128d)(B), \ - (__v2df)(__m128d)(C), (__mmask8)-1, \ - (int)(R)) + ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ + -(__v2df)(__m128d)(B), \ + (__v2df)(__m128d)(C), (__mmask8)-1, \ + (int)(R))) #define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \ - (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ - -(__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), (__mmask8)(U), \ - (int)(R)) + ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ + -(__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), (__mmask8)(U), \ + (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) @@ -8146,10 +8146,10 @@ _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) } #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \ - (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ - -(__v2df)(__m128d)(B), \ - (__v2df)(__m128d)(C), (__mmask8)(U), \ - (int)(R)) + ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ + -(__v2df)(__m128d)(B), \ + (__v2df)(__m128d)(C), (__mmask8)(U), \ + (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) @@ -8162,10 +8162,10 @@ _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) } #define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \ - (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ - -(__v2df)(__m128d)(X), \ - (__v2df)(__m128d)(Y), (__mmask8)(U), \ - (int)(R)) + ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ + -(__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(Y), (__mmask8)(U), \ + (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) @@ -8178,16 +8178,16 @@ _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) } #define _mm_fnmsub_round_sd(A, B, C, R) \ - (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ - -(__v2df)(__m128d)(B), \ - -(__v2df)(__m128d)(C), (__mmask8)-1, \ - (int)(R)) + ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ + -(__v2df)(__m128d)(B), \ + -(__v2df)(__m128d)(C), (__mmask8)-1, \ + (int)(R))) #define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \ - (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ - -(__v2df)(__m128d)(A), \ - -(__v2df)(__m128d)(B), (__mmask8)(U), \ - (int)(R)) + ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ + -(__v2df)(__m128d)(A), \ + -(__v2df)(__m128d)(B), (__mmask8)(U), \ + (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) @@ -8200,11 +8200,11 @@ _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) } #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \ - (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ - -(__v2df)(__m128d)(B), \ - -(__v2df)(__m128d)(C), \ - (__mmask8)(U), \ - (int)(R)) + ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ + -(__v2df)(__m128d)(B), \ + -(__v2df)(__m128d)(C), \ + (__mmask8)(U), \ + (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) @@ -8217,36 +8217,36 @@ _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) } #define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \ - (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \ - -(__v2df)(__m128d)(X), \ - (__v2df)(__m128d)(Y), \ - (__mmask8)(U), (int)(R)) + ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \ + -(__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(Y), \ + (__mmask8)(U), (int)(R))) #define _mm512_permutex_pd(X, C) \ - (__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C)) + ((__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C))) #define _mm512_mask_permutex_pd(W, U, X, C) \ - (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ - (__v8df)_mm512_permutex_pd((X), (C)), \ - (__v8df)(__m512d)(W)) + ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_permutex_pd((X), (C)), \ + (__v8df)(__m512d)(W))) #define _mm512_maskz_permutex_pd(U, X, C) \ - (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ - (__v8df)_mm512_permutex_pd((X), (C)), \ - (__v8df)_mm512_setzero_pd()) + ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_permutex_pd((X), (C)), \ + (__v8df)_mm512_setzero_pd())) #define _mm512_permutex_epi64(X, C) \ - (__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C)) + ((__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C))) #define _mm512_mask_permutex_epi64(W, U, X, C) \ - (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ - (__v8di)_mm512_permutex_epi64((X), (C)), \ - (__v8di)(__m512i)(W)) + ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_permutex_epi64((X), (C)), \ + (__v8di)(__m512i)(W))) #define _mm512_maskz_permutex_epi64(U, X, C) \ - (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ - (__v8di)_mm512_permutex_epi64((X), (C)), \ - (__v8di)_mm512_setzero_si512()) + ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_permutex_epi64((X), (C)), \ + (__v8di)_mm512_setzero_si512())) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutexvar_pd (__m512i __X, __m512d __Y) @@ -8416,10 +8416,10 @@ _mm512_kxor (__mmask16 __A, __mmask16 __B) #define _kxor_mask16 _mm512_kxor #define _kshiftli_mask16(A, I) \ - (__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I)) + ((__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I))) #define _kshiftri_mask16(A, I) \ - (__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I)) + ((__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I))) static __inline__ unsigned int __DEFAULT_FN_ATTRS _cvtmask16_u32(__mmask16 __A) { @@ -8538,48 +8538,48 @@ _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A) } #define _mm_cmp_round_ss_mask(X, Y, P, R) \ - (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ - (__v4sf)(__m128)(Y), (int)(P), \ - (__mmask8)-1, (int)(R)) + ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (int)(P), \ + (__mmask8)-1, (int)(R))) #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \ - (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ - (__v4sf)(__m128)(Y), (int)(P), \ - (__mmask8)(M), (int)(R)) + ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (int)(P), \ + (__mmask8)(M), (int)(R))) #define _mm_cmp_ss_mask(X, Y, P) \ - (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ - (__v4sf)(__m128)(Y), (int)(P), \ - (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION) + ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (int)(P), \ + (__mmask8)-1, \ + _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_cmp_ss_mask(M, X, Y, P) \ - (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ - (__v4sf)(__m128)(Y), (int)(P), \ - (__mmask8)(M), \ - _MM_FROUND_CUR_DIRECTION) + ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (int)(P), \ + (__mmask8)(M), \ + _MM_FROUND_CUR_DIRECTION)) #define _mm_cmp_round_sd_mask(X, Y, P, R) \ - (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ - (__v2df)(__m128d)(Y), (int)(P), \ - (__mmask8)-1, (int)(R)) + ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(Y), (int)(P), \ + (__mmask8)-1, (int)(R))) #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \ - (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ - (__v2df)(__m128d)(Y), (int)(P), \ - (__mmask8)(M), (int)(R)) + ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(Y), (int)(P), \ + (__mmask8)(M), (int)(R))) #define _mm_cmp_sd_mask(X, Y, P) \ - (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ - (__v2df)(__m128d)(Y), (int)(P), \ - (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION) + ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(Y), (int)(P), \ + (__mmask8)-1, \ + _MM_FROUND_CUR_DIRECTION)) #define _mm_mask_cmp_sd_mask(M, X, Y, P) \ - (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ - (__v2df)(__m128d)(Y), (int)(P), \ - (__mmask8)(M), \ - _MM_FROUND_CUR_DIRECTION) + ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(Y), (int)(P), \ + (__mmask8)(M), \ + _MM_FROUND_CUR_DIRECTION)) /* Bit Test */ @@ -8760,17 +8760,17 @@ _mm_maskz_load_sd (__mmask8 __U, const double* __A) } #define _mm512_shuffle_epi32(A, I) \ - (__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I)) + ((__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I))) #define _mm512_mask_shuffle_epi32(W, U, A, I) \ - (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ - (__v16si)_mm512_shuffle_epi32((A), (I)), \ - (__v16si)(__m512i)(W)) + ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_shuffle_epi32((A), (I)), \ + (__v16si)(__m512i)(W))) #define _mm512_maskz_shuffle_epi32(U, A, I) \ - (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ - (__v16si)_mm512_shuffle_epi32((A), (I)), \ - (__v16si)_mm512_setzero_si512()) + ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_shuffle_epi32((A), (I)), \ + (__v16si)_mm512_setzero_si512())) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A) @@ -8901,19 +8901,19 @@ _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A) } #define _mm512_cvt_roundps_pd(A, R) \ - (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ - (__v8df)_mm512_undefined_pd(), \ - (__mmask8)-1, (int)(R)) + ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ + (__v8df)_mm512_undefined_pd(), \ + (__mmask8)-1, (int)(R))) #define _mm512_mask_cvt_roundps_pd(W, U, A, R) \ - (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ - (__v8df)(__m512d)(W), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ + (__v8df)(__m512d)(W), \ + (__mmask8)(U), (int)(R))) #define _mm512_maskz_cvt_roundps_pd(U, A, R) \ - (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)) + ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtps_pd (__m256 __A) @@ -9010,22 +9010,22 @@ _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A) } #define _mm_cvt_roundsd_ss(A, B, R) \ - (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ - (__v2df)(__m128d)(B), \ - (__v4sf)_mm_undefined_ps(), \ - (__mmask8)-1, (int)(R)) + ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ + (__v2df)(__m128d)(B), \ + (__v4sf)_mm_undefined_ps(), \ + (__mmask8)-1, (int)(R))) #define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \ - (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ - (__v2df)(__m128d)(B), \ - (__v4sf)(__m128)(W), \ - (__mmask8)(U), (int)(R)) + ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ + (__v2df)(__m128d)(B), \ + (__v4sf)(__m128)(W), \ + (__mmask8)(U), (int)(R))) #define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \ - (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ - (__v2df)(__m128d)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R)) + ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ + (__v2df)(__m128d)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) @@ -9058,47 +9058,47 @@ _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B) #ifdef __x86_64__ #define _mm_cvt_roundi64_sd(A, B, R) \ - (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ - (int)(R)) + ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ + (int)(R))) #define _mm_cvt_roundsi64_sd(A, B, R) \ - (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ - (int)(R)) + ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ + (int)(R))) #endif #define _mm_cvt_roundsi32_ss(A, B, R) \ - (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)) + ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R))) #define _mm_cvt_roundi32_ss(A, B, R) \ - (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)) + ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R))) #ifdef __x86_64__ #define _mm_cvt_roundsi64_ss(A, B, R) \ - (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ - (int)(R)) + ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ + (int)(R))) #define _mm_cvt_roundi64_ss(A, B, R) \ - (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ - (int)(R)) + ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ + (int)(R))) #endif #define _mm_cvt_roundss_sd(A, B, R) \ - (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ - (__v4sf)(__m128)(B), \ - (__v2df)_mm_undefined_pd(), \ - (__mmask8)-1, (int)(R)) + ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ + (__v4sf)(__m128)(B), \ + (__v2df)_mm_undefined_pd(), \ + (__mmask8)-1, (int)(R))) #define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \ - (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ - (__v4sf)(__m128)(B), \ - (__v2df)(__m128d)(W), \ - (__mmask8)(U), (int)(R)) + ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ + (__v4sf)(__m128)(B), \ + (__v2df)(__m128d)(W), \ + (__mmask8)(U), (int)(R))) #define _mm_maskz_cvt_roundss_sd(U, A, B, R) \ - (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ - (__v4sf)(__m128)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U), (int)(R)) + ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ + (__v4sf)(__m128)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(U), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B) @@ -9127,8 +9127,8 @@ _mm_cvtu32_sd (__m128d __A, unsigned __B) #ifdef __x86_64__ #define _mm_cvt_roundu64_sd(A, B, R) \ - (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \ - (unsigned long long)(B), (int)(R)) + ((__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \ + (unsigned long long)(B), (int)(R))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu64_sd (__m128d __A, unsigned long long __B) @@ -9139,8 +9139,8 @@ _mm_cvtu64_sd (__m128d __A, unsigned long long __B) #endif #define _mm_cvt_roundu32_ss(A, B, R) \ - (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \ - (int)(R)) + ((__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \ + (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu32_ss (__m128 __A, unsigned __B) @@ -9151,8 +9151,8 @@ _mm_cvtu32_ss (__m128 __A, unsigned __B) #ifdef __x86_64__ #define _mm_cvt_roundu64_ss(A, B, R) \ - (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \ - (unsigned long long)(B), (int)(R)) + ((__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \ + (unsigned long long)(B), (int)(R))) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu64_ss (__m128 __A, unsigned long long __B) diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h index 926033e20152c..48370d0bf0ee0 100644 --- a/clang/lib/Headers/avx512fp16intrin.h +++ b/clang/lib/Headers/avx512fp16intrin.h @@ -269,10 +269,539 @@ _mm512_zextph256_ph512(__m256h __a) { 29, 30, 31); } +#define _mm_comi_round_sh(A, B, P, R) \ + __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, (int)(P), (int)(R)) + +#define _mm_comi_sh(A, B, pred) \ + _mm_comi_round_sh((A), (B), (pred), _MM_FROUND_CUR_DIRECTION) + +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comieq_sh(__m128h A, + __m128h B) { + return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_EQ_OS, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comilt_sh(__m128h A, + __m128h B) { + return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LT_OS, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comile_sh(__m128h A, + __m128h B) { + return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LE_OS, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comigt_sh(__m128h A, + __m128h B) { + return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GT_OS, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comige_sh(__m128h A, + __m128h B) { + return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GE_OS, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comineq_sh(__m128h A, + __m128h B) { + return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_NEQ_US, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomieq_sh(__m128h A, + __m128h B) { + return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_EQ_OQ, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomilt_sh(__m128h A, + __m128h B) { + return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LT_OQ, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomile_sh(__m128h A, + __m128h B) { + return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LE_OQ, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomigt_sh(__m128h A, + __m128h B) { + return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GT_OQ, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomige_sh(__m128h A, + __m128h B) { + return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GE_OQ, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomineq_sh(__m128h A, + __m128h B) { + return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_NEQ_UQ, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_add_ph(__m512h __A, + __m512h __B) { + return (__m512h)((__v32hf)__A + (__v32hf)__B); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_mask_add_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) { + return (__m512h)__builtin_ia32_selectph_512( + (__mmask32)__U, (__v32hf)_mm512_add_ph(__A, __B), (__v32hf)__W); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_maskz_add_ph(__mmask32 __U, __m512h __A, __m512h __B) { + return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, + (__v32hf)_mm512_add_ph(__A, __B), + (__v32hf)_mm512_setzero_ph()); +} + +#define _mm512_add_round_ph(A, B, R) \ + ((__m512h)__builtin_ia32_addph512((__v32hf)(__m512h)(A), \ + (__v32hf)(__m512h)(B), (int)(R))) + +#define _mm512_mask_add_round_ph(W, U, A, B, R) \ + ((__m512h)__builtin_ia32_selectph_512( \ + (__mmask32)(U), (__v32hf)_mm512_add_round_ph((A), (B), (R)), \ + (__v32hf)(__m512h)(W))) + +#define _mm512_maskz_add_round_ph(U, A, B, R) \ + ((__m512h)__builtin_ia32_selectph_512( \ + (__mmask32)(U), (__v32hf)_mm512_add_round_ph((A), (B), (R)), \ + (__v32hf)_mm512_setzero_ph())) + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_sub_ph(__m512h __A, + __m512h __B) { + return (__m512h)((__v32hf)__A - (__v32hf)__B); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_mask_sub_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) { + return (__m512h)__builtin_ia32_selectph_512( + (__mmask32)__U, (__v32hf)_mm512_sub_ph(__A, __B), (__v32hf)__W); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_maskz_sub_ph(__mmask32 __U, __m512h __A, __m512h __B) { + return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, + (__v32hf)_mm512_sub_ph(__A, __B), + (__v32hf)_mm512_setzero_ph()); +} + +#define _mm512_sub_round_ph(A, B, R) \ + ((__m512h)__builtin_ia32_subph512((__v32hf)(__m512h)(A), \ + (__v32hf)(__m512h)(B), (int)(R))) + +#define _mm512_mask_sub_round_ph(W, U, A, B, R) \ + ((__m512h)__builtin_ia32_selectph_512( \ + (__mmask32)(U), (__v32hf)_mm512_sub_round_ph((A), (B), (R)), \ + (__v32hf)(__m512h)(W))) + +#define _mm512_maskz_sub_round_ph(U, A, B, R) \ + ((__m512h)__builtin_ia32_selectph_512( \ + (__mmask32)(U), (__v32hf)_mm512_sub_round_ph((A), (B), (R)), \ + (__v32hf)_mm512_setzero_ph())) + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mul_ph(__m512h __A, + __m512h __B) { + return (__m512h)((__v32hf)__A * (__v32hf)__B); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_mask_mul_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) { + return (__m512h)__builtin_ia32_selectph_512( + (__mmask32)__U, (__v32hf)_mm512_mul_ph(__A, __B), (__v32hf)__W); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_maskz_mul_ph(__mmask32 __U, __m512h __A, __m512h __B) { + return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, + (__v32hf)_mm512_mul_ph(__A, __B), + (__v32hf)_mm512_setzero_ph()); +} + +#define _mm512_mul_round_ph(A, B, R) \ + ((__m512h)__builtin_ia32_mulph512((__v32hf)(__m512h)(A), \ + (__v32hf)(__m512h)(B), (int)(R))) + +#define _mm512_mask_mul_round_ph(W, U, A, B, R) \ + ((__m512h)__builtin_ia32_selectph_512( \ + (__mmask32)(U), (__v32hf)_mm512_mul_round_ph((A), (B), (R)), \ + (__v32hf)(__m512h)(W))) + +#define _mm512_maskz_mul_round_ph(U, A, B, R) \ + ((__m512h)__builtin_ia32_selectph_512( \ + (__mmask32)(U), (__v32hf)_mm512_mul_round_ph((A), (B), (R)), \ + (__v32hf)_mm512_setzero_ph())) + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_div_ph(__m512h __A, + __m512h __B) { + return (__m512h)((__v32hf)__A / (__v32hf)__B); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_mask_div_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) { + return (__m512h)__builtin_ia32_selectph_512( + (__mmask32)__U, (__v32hf)_mm512_div_ph(__A, __B), (__v32hf)__W); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_maskz_div_ph(__mmask32 __U, __m512h __A, __m512h __B) { + return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, + (__v32hf)_mm512_div_ph(__A, __B), + (__v32hf)_mm512_setzero_ph()); +} + +#define _mm512_div_round_ph(A, B, R) \ + ((__m512h)__builtin_ia32_divph512((__v32hf)(__m512h)(A), \ + (__v32hf)(__m512h)(B), (int)(R))) + +#define _mm512_mask_div_round_ph(W, U, A, B, R) \ + ((__m512h)__builtin_ia32_selectph_512( \ + (__mmask32)(U), (__v32hf)_mm512_div_round_ph((A), (B), (R)), \ + (__v32hf)(__m512h)(W))) + +#define _mm512_maskz_div_round_ph(U, A, B, R) \ + ((__m512h)__builtin_ia32_selectph_512( \ + (__mmask32)(U), (__v32hf)_mm512_div_round_ph((A), (B), (R)), \ + (__v32hf)_mm512_setzero_ph())) + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_min_ph(__m512h __A, + __m512h __B) { + return (__m512h)__builtin_ia32_minph512((__v32hf)__A, (__v32hf)__B, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_mask_min_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) { + return (__m512h)__builtin_ia32_selectph_512( + (__mmask32)__U, (__v32hf)_mm512_min_ph(__A, __B), (__v32hf)__W); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_maskz_min_ph(__mmask32 __U, __m512h __A, __m512h __B) { + return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, + (__v32hf)_mm512_min_ph(__A, __B), + (__v32hf)_mm512_setzero_ph()); +} + +#define _mm512_min_round_ph(A, B, R) \ + ((__m512h)__builtin_ia32_minph512((__v32hf)(__m512h)(A), \ + (__v32hf)(__m512h)(B), (int)(R))) + +#define _mm512_mask_min_round_ph(W, U, A, B, R) \ + ((__m512h)__builtin_ia32_selectph_512( \ + (__mmask32)(U), (__v32hf)_mm512_min_round_ph((A), (B), (R)), \ + (__v32hf)(__m512h)(W))) + +#define _mm512_maskz_min_round_ph(U, A, B, R) \ + ((__m512h)__builtin_ia32_selectph_512( \ + (__mmask32)(U), (__v32hf)_mm512_min_round_ph((A), (B), (R)), \ + (__v32hf)_mm512_setzero_ph())) + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_max_ph(__m512h __A, + __m512h __B) { + return (__m512h)__builtin_ia32_maxph512((__v32hf)__A, (__v32hf)__B, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_mask_max_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) { + return (__m512h)__builtin_ia32_selectph_512( + (__mmask32)__U, (__v32hf)_mm512_max_ph(__A, __B), (__v32hf)__W); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_maskz_max_ph(__mmask32 __U, __m512h __A, __m512h __B) { + return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, + (__v32hf)_mm512_max_ph(__A, __B), + (__v32hf)_mm512_setzero_ph()); +} + +#define _mm512_max_round_ph(A, B, R) \ + ((__m512h)__builtin_ia32_maxph512((__v32hf)(__m512h)(A), \ + (__v32hf)(__m512h)(B), (int)(R))) + +#define _mm512_mask_max_round_ph(W, U, A, B, R) \ + ((__m512h)__builtin_ia32_selectph_512( \ + (__mmask32)(U), (__v32hf)_mm512_max_round_ph((A), (B), (R)), \ + (__v32hf)(__m512h)(W))) + +#define _mm512_maskz_max_round_ph(U, A, B, R) \ + ((__m512h)__builtin_ia32_selectph_512( \ + (__mmask32)(U), (__v32hf)_mm512_max_round_ph((A), (B), (R)), \ + (__v32hf)_mm512_setzero_ph())) + static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_abs_ph(__m512h __A) { return (__m512h)_mm512_and_epi32(_mm512_set1_epi32(0x7FFF7FFF), (__m512i)__A); } +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_add_sh(__m128h __A, + __m128h __B) { + __A[0] += __B[0]; + return __A; +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_add_sh(__m128h __W, + __mmask8 __U, + __m128h __A, + __m128h __B) { + __A = _mm_add_sh(__A, __B); + return __builtin_ia32_selectsh_128(__U, __A, __W); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_add_sh(__mmask8 __U, + __m128h __A, + __m128h __B) { + __A = _mm_add_sh(__A, __B); + return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph()); +} + +#define _mm_add_round_sh(A, B, R) \ + ((__m128h)__builtin_ia32_addsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)-1, (int)(R))) + +#define _mm_mask_add_round_sh(W, U, A, B, R) \ + ((__m128h)__builtin_ia32_addsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm_maskz_add_round_sh(U, A, B, R) \ + ((__m128h)__builtin_ia32_addsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)(U), (int)(R))) + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sub_sh(__m128h __A, + __m128h __B) { + __A[0] -= __B[0]; + return __A; +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sub_sh(__m128h __W, + __mmask8 __U, + __m128h __A, + __m128h __B) { + __A = _mm_sub_sh(__A, __B); + return __builtin_ia32_selectsh_128(__U, __A, __W); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sh(__mmask8 __U, + __m128h __A, + __m128h __B) { + __A = _mm_sub_sh(__A, __B); + return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph()); +} + +#define _mm_sub_round_sh(A, B, R) \ + ((__m128h)__builtin_ia32_subsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)-1, (int)(R))) + +#define _mm_mask_sub_round_sh(W, U, A, B, R) \ + ((__m128h)__builtin_ia32_subsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm_maskz_sub_round_sh(U, A, B, R) \ + ((__m128h)__builtin_ia32_subsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)(U), (int)(R))) + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mul_sh(__m128h __A, + __m128h __B) { + __A[0] *= __B[0]; + return __A; +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_mul_sh(__m128h __W, + __mmask8 __U, + __m128h __A, + __m128h __B) { + __A = _mm_mul_sh(__A, __B); + return __builtin_ia32_selectsh_128(__U, __A, __W); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sh(__mmask8 __U, + __m128h __A, + __m128h __B) { + __A = _mm_mul_sh(__A, __B); + return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph()); +} + +#define _mm_mul_round_sh(A, B, R) \ + ((__m128h)__builtin_ia32_mulsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)-1, (int)(R))) + +#define _mm_mask_mul_round_sh(W, U, A, B, R) \ + ((__m128h)__builtin_ia32_mulsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm_maskz_mul_round_sh(U, A, B, R) \ + ((__m128h)__builtin_ia32_mulsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)(U), (int)(R))) + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_div_sh(__m128h __A, + __m128h __B) { + __A[0] /= __B[0]; + return __A; +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_div_sh(__m128h __W, + __mmask8 __U, + __m128h __A, + __m128h __B) { + __A = _mm_div_sh(__A, __B); + return __builtin_ia32_selectsh_128(__U, __A, __W); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_div_sh(__mmask8 __U, + __m128h __A, + __m128h __B) { + __A = _mm_div_sh(__A, __B); + return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph()); +} + +#define _mm_div_round_sh(A, B, R) \ + ((__m128h)__builtin_ia32_divsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)-1, (int)(R))) + +#define _mm_mask_div_round_sh(W, U, A, B, R) \ + ((__m128h)__builtin_ia32_divsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm_maskz_div_round_sh(U, A, B, R) \ + ((__m128h)__builtin_ia32_divsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)(U), (int)(R))) + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_min_sh(__m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_minsh_round_mask( + (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_min_sh(__m128h __W, + __mmask8 __U, + __m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_minsh_round_mask((__v8hf)__A, (__v8hf)__B, + (__v8hf)__W, (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_min_sh(__mmask8 __U, + __m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_minsh_round_mask( + (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_min_round_sh(A, B, R) \ + ((__m128h)__builtin_ia32_minsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)-1, (int)(R))) + +#define _mm_mask_min_round_sh(W, U, A, B, R) \ + ((__m128h)__builtin_ia32_minsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm_maskz_min_round_sh(U, A, B, R) \ + ((__m128h)__builtin_ia32_minsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)(U), (int)(R))) + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_max_sh(__m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_maxsh_round_mask( + (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_max_sh(__m128h __W, + __mmask8 __U, + __m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_maxsh_round_mask((__v8hf)__A, (__v8hf)__B, + (__v8hf)__W, (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_max_sh(__mmask8 __U, + __m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_maxsh_round_mask( + (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_max_round_sh(A, B, R) \ + ((__m128h)__builtin_ia32_maxsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)-1, (int)(R))) + +#define _mm_mask_max_round_sh(W, U, A, B, R) \ + ((__m128h)__builtin_ia32_maxsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm_maskz_max_round_sh(U, A, B, R) \ + ((__m128h)__builtin_ia32_maxsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)(U), (int)(R))) + +#define _mm512_cmp_round_ph_mask(A, B, P, R) \ + ((__mmask32)__builtin_ia32_cmpph512_mask((__v32hf)(__m512h)(A), \ + (__v32hf)(__m512h)(B), (int)(P), \ + (__mmask32)-1, (int)(R))) + +#define _mm512_mask_cmp_round_ph_mask(U, A, B, P, R) \ + ((__mmask32)__builtin_ia32_cmpph512_mask((__v32hf)(__m512h)(A), \ + (__v32hf)(__m512h)(B), (int)(P), \ + (__mmask32)(U), (int)(R))) + +#define _mm512_cmp_ph_mask(A, B, P) \ + _mm512_cmp_round_ph_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_mask_cmp_ph_mask(U, A, B, P) \ + _mm512_mask_cmp_round_ph_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) + +#define _mm_cmp_round_sh_mask(X, Y, P, R) \ + ((__mmask8)__builtin_ia32_cmpsh_mask((__v8hf)(__m128h)(X), \ + (__v8hf)(__m128h)(Y), (int)(P), \ + (__mmask8)-1, (int)(R))) + +#define _mm_mask_cmp_round_sh_mask(M, X, Y, P, R) \ + ((__mmask8)__builtin_ia32_cmpsh_mask((__v8hf)(__m128h)(X), \ + (__v8hf)(__m128h)(Y), (int)(P), \ + (__mmask8)(M), (int)(R))) + +#define _mm_cmp_sh_mask(X, Y, P) \ + ((__mmask8)__builtin_ia32_cmpsh_mask( \ + (__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), (int)(P), (__mmask8)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm_mask_cmp_sh_mask(M, X, Y, P) \ + ((__mmask8)__builtin_ia32_cmpsh_mask( \ + (__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), (int)(P), (__mmask8)(M), \ + _MM_FROUND_CUR_DIRECTION)) // loads with vmovsh: static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_load_sh(void const *__dp) { struct __mm_load_sh_struct { @@ -418,6 +947,1502 @@ static __inline__ short __DEFAULT_FN_ATTRS128 _mm_cvtsi128_si16(__m128i __a) { return __b[0]; } +static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_rcp_ph(__m512h __A) { + return (__m512h)__builtin_ia32_rcpph512_mask( + (__v32hf)__A, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_mask_rcp_ph(__m512h __W, __mmask32 __U, __m512h __A) { + return (__m512h)__builtin_ia32_rcpph512_mask((__v32hf)__A, (__v32hf)__W, + (__mmask32)__U); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_maskz_rcp_ph(__mmask32 __U, __m512h __A) { + return (__m512h)__builtin_ia32_rcpph512_mask( + (__v32hf)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_rsqrt_ph(__m512h __A) { + return (__m512h)__builtin_ia32_rsqrtph512_mask( + (__v32hf)__A, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_mask_rsqrt_ph(__m512h __W, __mmask32 __U, __m512h __A) { + return (__m512h)__builtin_ia32_rsqrtph512_mask((__v32hf)__A, (__v32hf)__W, + (__mmask32)__U); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_maskz_rsqrt_ph(__mmask32 __U, __m512h __A) { + return (__m512h)__builtin_ia32_rsqrtph512_mask( + (__v32hf)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U); +} + +#define _mm512_getmant_ph(A, B, C) \ + ((__m512h)__builtin_ia32_getmantph512_mask( \ + (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \ + (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_getmant_ph(W, U, A, B, C) \ + ((__m512h)__builtin_ia32_getmantph512_mask( \ + (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), (__v32hf)(__m512h)(W), \ + (__mmask32)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_getmant_ph(U, A, B, C) \ + ((__m512h)__builtin_ia32_getmantph512_mask( \ + (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \ + (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_getmant_round_ph(A, B, C, R) \ + ((__m512h)__builtin_ia32_getmantph512_mask( \ + (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \ + (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, (int)(R))) + +#define _mm512_mask_getmant_round_ph(W, U, A, B, C, R) \ + ((__m512h)__builtin_ia32_getmantph512_mask( \ + (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), (__v32hf)(__m512h)(W), \ + (__mmask32)(U), (int)(R))) + +#define _mm512_maskz_getmant_round_ph(U, A, B, C, R) \ + ((__m512h)__builtin_ia32_getmantph512_mask( \ + (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \ + (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R))) + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_getexp_ph(__m512h __A) { + return (__m512h)__builtin_ia32_getexpph512_mask( + (__v32hf)__A, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_mask_getexp_ph(__m512h __W, __mmask32 __U, __m512h __A) { + return (__m512h)__builtin_ia32_getexpph512_mask( + (__v32hf)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_maskz_getexp_ph(__mmask32 __U, __m512h __A) { + return (__m512h)__builtin_ia32_getexpph512_mask( + (__v32hf)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_getexp_round_ph(A, R) \ + ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \ + (__v32hf)_mm512_undefined_ph(), \ + (__mmask32)-1, (int)(R))) + +#define _mm512_mask_getexp_round_ph(W, U, A, R) \ + ((__m512h)__builtin_ia32_getexpph512_mask( \ + (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(W), (__mmask32)(U), (int)(R))) + +#define _mm512_maskz_getexp_round_ph(U, A, R) \ + ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \ + (__v32hf)_mm512_setzero_ph(), \ + (__mmask32)(U), (int)(R))) + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_scalef_ph(__m512h __A, + __m512h __B) { + return (__m512h)__builtin_ia32_scalefph512_mask( + (__v32hf)__A, (__v32hf)__B, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_mask_scalef_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) { + return (__m512h)__builtin_ia32_scalefph512_mask((__v32hf)__A, (__v32hf)__B, + (__v32hf)__W, (__mmask32)__U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_maskz_scalef_ph(__mmask32 __U, __m512h __A, __m512h __B) { + return (__m512h)__builtin_ia32_scalefph512_mask( + (__v32hf)__A, (__v32hf)__B, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_scalef_round_ph(A, B, R) \ + ((__m512h)__builtin_ia32_scalefph512_mask( \ + (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), \ + (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, (int)(R))) + +#define _mm512_mask_scalef_round_ph(W, U, A, B, R) \ + ((__m512h)__builtin_ia32_scalefph512_mask( \ + (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(W), \ + (__mmask32)(U), (int)(R))) + +#define _mm512_maskz_scalef_round_ph(U, A, B, R) \ + ((__m512h)__builtin_ia32_scalefph512_mask( \ + (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), \ + (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R))) + +#define _mm512_roundscale_ph(A, B) \ + ((__m512h)__builtin_ia32_rndscaleph_mask( \ + (__v32hf)(__m512h)(A), (int)(B), (__v32hf)(__m512h)(A), (__mmask32)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_roundscale_ph(A, B, C, imm) \ + ((__m512h)__builtin_ia32_rndscaleph_mask( \ + (__v32hf)(__m512h)(C), (int)(imm), (__v32hf)(__m512h)(A), \ + (__mmask32)(B), _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_roundscale_ph(A, B, imm) \ + ((__m512h)__builtin_ia32_rndscaleph_mask( \ + (__v32hf)(__m512h)(B), (int)(imm), (__v32hf)_mm512_setzero_ph(), \ + (__mmask32)(A), _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_roundscale_round_ph(A, B, C, imm, R) \ + ((__m512h)__builtin_ia32_rndscaleph_mask((__v32hf)(__m512h)(C), (int)(imm), \ + (__v32hf)(__m512h)(A), \ + (__mmask32)(B), (int)(R))) + +#define _mm512_maskz_roundscale_round_ph(A, B, imm, R) \ + ((__m512h)__builtin_ia32_rndscaleph_mask((__v32hf)(__m512h)(B), (int)(imm), \ + (__v32hf)_mm512_setzero_ph(), \ + (__mmask32)(A), (int)(R))) + +#define _mm512_roundscale_round_ph(A, imm, R) \ + ((__m512h)__builtin_ia32_rndscaleph_mask((__v32hf)(__m512h)(A), (int)(imm), \ + (__v32hf)_mm512_undefined_ph(), \ + (__mmask32)-1, (int)(R))) + +#define _mm512_reduce_ph(A, imm) \ + ((__m512h)__builtin_ia32_reduceph512_mask( \ + (__v32hf)(__m512h)(A), (int)(imm), (__v32hf)_mm512_undefined_ph(), \ + (__mmask32)-1, _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_reduce_ph(W, U, A, imm) \ + ((__m512h)__builtin_ia32_reduceph512_mask( \ + (__v32hf)(__m512h)(A), (int)(imm), (__v32hf)(__m512h)(W), \ + (__mmask32)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_reduce_ph(U, A, imm) \ + ((__m512h)__builtin_ia32_reduceph512_mask( \ + (__v32hf)(__m512h)(A), (int)(imm), (__v32hf)_mm512_setzero_ph(), \ + (__mmask32)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_reduce_round_ph(W, U, A, imm, R) \ + ((__m512h)__builtin_ia32_reduceph512_mask((__v32hf)(__m512h)(A), (int)(imm), \ + (__v32hf)(__m512h)(W), \ + (__mmask32)(U), (int)(R))) + +#define _mm512_maskz_reduce_round_ph(U, A, imm, R) \ + ((__m512h)__builtin_ia32_reduceph512_mask((__v32hf)(__m512h)(A), (int)(imm), \ + (__v32hf)_mm512_setzero_ph(), \ + (__mmask32)(U), (int)(R))) + +#define _mm512_reduce_round_ph(A, imm, R) \ + ((__m512h)__builtin_ia32_reduceph512_mask((__v32hf)(__m512h)(A), (int)(imm), \ + (__v32hf)_mm512_undefined_ph(), \ + (__mmask32)-1, (int)(R))) + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rcp_sh(__m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_rcpsh_mask( + (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rcp_sh(__m128h __W, + __mmask8 __U, + __m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_rcpsh_mask((__v8hf)__A, (__v8hf)__B, + (__v8hf)__W, (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_rcp_sh(__mmask8 __U, + __m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_rcpsh_mask( + (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rsqrt_sh(__m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_rsqrtsh_mask( + (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt_sh(__m128h __W, + __mmask8 __U, + __m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_rsqrtsh_mask((__v8hf)__A, (__v8hf)__B, + (__v8hf)__W, (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_maskz_rsqrt_sh(__mmask8 __U, __m128h __A, __m128h __B) { + return (__m128h)__builtin_ia32_rsqrtsh_mask( + (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); +} + +#define _mm_getmant_round_sh(A, B, C, D, R) \ + ((__m128h)__builtin_ia32_getmantsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \ + (__v8hf)_mm_setzero_ph(), (__mmask8)-1, (int)(R))) + +#define _mm_getmant_sh(A, B, C, D) \ + ((__m128h)__builtin_ia32_getmantsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \ + (__v8hf)_mm_setzero_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)) + +#define _mm_mask_getmant_sh(W, U, A, B, C, D) \ + ((__m128h)__builtin_ia32_getmantsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \ + (__v8hf)(__m128h)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm_mask_getmant_round_sh(W, U, A, B, C, D, R) \ + ((__m128h)__builtin_ia32_getmantsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \ + (__v8hf)(__m128h)(W), (__mmask8)(U), (int)(R))) + +#define _mm_maskz_getmant_sh(U, A, B, C, D) \ + ((__m128h)__builtin_ia32_getmantsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \ + (__v8hf)_mm_setzero_ph(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm_maskz_getmant_round_sh(U, A, B, C, D, R) \ + ((__m128h)__builtin_ia32_getmantsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \ + (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) + +#define _mm_getexp_round_sh(A, B, R) \ + ((__m128h)__builtin_ia32_getexpsh128_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)-1, (int)(R))) + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_getexp_sh(__m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_getexpsh128_round_mask( + (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_mask_getexp_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { + return (__m128h)__builtin_ia32_getexpsh128_round_mask( + (__v8hf)__A, (__v8hf)__B, (__v8hf)__W, (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_mask_getexp_round_sh(W, U, A, B, R) \ + ((__m128h)__builtin_ia32_getexpsh128_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ + (__mmask8)(U), (int)(R))) + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_maskz_getexp_sh(__mmask8 __U, __m128h __A, __m128h __B) { + return (__m128h)__builtin_ia32_getexpsh128_round_mask( + (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_maskz_getexp_round_sh(U, A, B, R) \ + ((__m128h)__builtin_ia32_getexpsh128_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)(U), (int)(R))) + +#define _mm_scalef_round_sh(A, B, R) \ + ((__m128h)__builtin_ia32_scalefsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)-1, (int)(R))) + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_scalef_sh(__m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_scalefsh_round_mask( + (__v8hf)__A, (__v8hf)(__B), (__v8hf)_mm_setzero_ph(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_mask_scalef_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { + return (__m128h)__builtin_ia32_scalefsh_round_mask((__v8hf)__A, (__v8hf)__B, + (__v8hf)__W, (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_mask_scalef_round_sh(W, U, A, B, R) \ + ((__m128h)__builtin_ia32_scalefsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ + (__mmask8)(U), (int)(R))) + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_maskz_scalef_sh(__mmask8 __U, __m128h __A, __m128h __B) { + return (__m128h)__builtin_ia32_scalefsh_round_mask( + (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_maskz_scalef_round_sh(U, A, B, R) \ + ((__m128h)__builtin_ia32_scalefsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)(U), (int)(R))) + +#define _mm_roundscale_round_sh(A, B, imm, R) \ + ((__m128h)__builtin_ia32_rndscalesh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)-1, (int)(imm), (int)(R))) + +#define _mm_roundscale_sh(A, B, imm) \ + ((__m128h)__builtin_ia32_rndscalesh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)-1, (int)(imm), _MM_FROUND_CUR_DIRECTION)) + +#define _mm_mask_roundscale_sh(W, U, A, B, I) \ + ((__m128h)__builtin_ia32_rndscalesh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ + (__mmask8)(U), (int)(I), _MM_FROUND_CUR_DIRECTION)) + +#define _mm_mask_roundscale_round_sh(W, U, A, B, I, R) \ + ((__m128h)__builtin_ia32_rndscalesh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ + (__mmask8)(U), (int)(I), (int)(R))) + +#define _mm_maskz_roundscale_sh(U, A, B, I) \ + ((__m128h)__builtin_ia32_rndscalesh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)(U), (int)(I), _MM_FROUND_CUR_DIRECTION)) + +#define _mm_maskz_roundscale_round_sh(U, A, B, I, R) \ + ((__m128h)__builtin_ia32_rndscalesh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)(U), (int)(I), (int)(R))) + +#define _mm_reduce_sh(A, B, C) \ + ((__m128h)__builtin_ia32_reducesh_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)-1, (int)(C), _MM_FROUND_CUR_DIRECTION)) + +#define _mm_mask_reduce_sh(W, U, A, B, C) \ + ((__m128h)__builtin_ia32_reducesh_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ + (__mmask8)(U), (int)(C), _MM_FROUND_CUR_DIRECTION)) + +#define _mm_maskz_reduce_sh(U, A, B, C) \ + ((__m128h)__builtin_ia32_reducesh_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)(U), (int)(C), _MM_FROUND_CUR_DIRECTION)) + +#define _mm_reduce_round_sh(A, B, C, R) \ + ((__m128h)__builtin_ia32_reducesh_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)-1, (int)(C), (int)(R))) + +#define _mm_mask_reduce_round_sh(W, U, A, B, C, R) \ + ((__m128h)__builtin_ia32_reducesh_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ + (__mmask8)(U), (int)(C), (int)(R))) + +#define _mm_maskz_reduce_round_sh(U, A, B, C, R) \ + ((__m128h)__builtin_ia32_reducesh_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)(U), (int)(C), (int)(R))) + +#define _mm512_sqrt_round_ph(A, R) \ + ((__m512h)__builtin_ia32_sqrtph512((__v32hf)(__m512h)(A), (int)(R))) + +#define _mm512_mask_sqrt_round_ph(W, U, A, R) \ + ((__m512h)__builtin_ia32_selectph_512( \ + (__mmask32)(U), (__v32hf)_mm512_sqrt_round_ph((A), (R)), \ + (__v32hf)(__m512h)(W))) + +#define _mm512_maskz_sqrt_round_ph(U, A, R) \ + ((__m512h)__builtin_ia32_selectph_512( \ + (__mmask32)(U), (__v32hf)_mm512_sqrt_round_ph((A), (R)), \ + (__v32hf)_mm512_setzero_ph())) + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_sqrt_ph(__m512h __A) { + return (__m512h)__builtin_ia32_sqrtph512((__v32hf)__A, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_mask_sqrt_ph(__m512h __W, __mmask32 __U, __m512h __A) { + return (__m512h)__builtin_ia32_selectph_512( + (__mmask32)(__U), + (__v32hf)__builtin_ia32_sqrtph512((__A), (_MM_FROUND_CUR_DIRECTION)), + (__v32hf)(__m512h)(__W)); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_maskz_sqrt_ph(__mmask32 __U, __m512h __A) { + return (__m512h)__builtin_ia32_selectph_512( + (__mmask32)(__U), + (__v32hf)__builtin_ia32_sqrtph512((__A), (_MM_FROUND_CUR_DIRECTION)), + (__v32hf)_mm512_setzero_ph()); +} + +#define _mm_sqrt_round_sh(A, B, R) \ + ((__m128h)__builtin_ia32_sqrtsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)-1, (int)(R))) + +#define _mm_mask_sqrt_round_sh(W, U, A, B, R) \ + ((__m128h)__builtin_ia32_sqrtsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm_maskz_sqrt_round_sh(U, A, B, R) \ + ((__m128h)__builtin_ia32_sqrtsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)(U), (int)(R))) + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sqrt_sh(__m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_sqrtsh_round_mask( + (__v8hf)(__m128h)(__A), (__v8hf)(__m128h)(__B), (__v8hf)_mm_setzero_ph(), + (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sh(__m128h __W, + __mmask32 __U, + __m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_sqrtsh_round_mask( + (__v8hf)(__m128h)(__A), (__v8hf)(__m128h)(__B), (__v8hf)(__m128h)(__W), + (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sh(__mmask32 __U, + __m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_sqrtsh_round_mask( + (__v8hf)(__m128h)(__A), (__v8hf)(__m128h)(__B), (__v8hf)_mm_setzero_ph(), + (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_mask_fpclass_ph_mask(U, A, imm) \ + ((__mmask32)__builtin_ia32_fpclassph512_mask((__v32hf)(__m512h)(A), \ + (int)(imm), (__mmask32)(U))) + +#define _mm512_fpclass_ph_mask(A, imm) \ + ((__mmask32)__builtin_ia32_fpclassph512_mask((__v32hf)(__m512h)(A), \ + (int)(imm), (__mmask32)-1)) + +#define _mm_fpclass_sh_mask(A, imm) \ + ((__mmask8)__builtin_ia32_fpclasssh_mask((__v8hf)(__m128h)(A), (int)(imm), \ + (__mmask8)-1)) + +#define _mm_mask_fpclass_sh_mask(U, A, imm) \ + ((__mmask8)__builtin_ia32_fpclasssh_mask((__v8hf)(__m128h)(A), (int)(imm), \ + (__mmask8)(U))) + +#define _mm512_cvt_roundpd_ph(A, R) \ + ((__m128h)__builtin_ia32_vcvtpd2ph512_mask( \ + (__v8df)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R))) + +#define _mm512_mask_cvt_roundpd_ph(W, U, A, R) \ + ((__m128h)__builtin_ia32_vcvtpd2ph512_mask((__v8df)(A), (__v8hf)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm512_maskz_cvt_roundpd_ph(U, A, R) \ + ((__m128h)__builtin_ia32_vcvtpd2ph512_mask( \ + (__v8df)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) + +static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ph(__m512d __A) { + return (__m128h)__builtin_ia32_vcvtpd2ph512_mask( + (__v8df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m512d __A) { + return (__m128h)__builtin_ia32_vcvtpd2ph512_mask( + (__v8df)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtpd_ph(__mmask8 __U, __m512d __A) { + return (__m128h)__builtin_ia32_vcvtpd2ph512_mask( + (__v8df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_cvt_roundph_pd(A, R) \ + ((__m512d)__builtin_ia32_vcvtph2pd512_mask( \ + (__v8hf)(A), (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), (int)(R))) + +#define _mm512_mask_cvt_roundph_pd(W, U, A, R) \ + ((__m512d)__builtin_ia32_vcvtph2pd512_mask((__v8hf)(A), (__v8df)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm512_maskz_cvt_roundph_pd(U, A, R) \ + ((__m512d)__builtin_ia32_vcvtph2pd512_mask( \ + (__v8hf)(A), (__v8df)_mm512_setzero_pd(), (__mmask8)(U), (int)(R))) + +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtph_pd(__m128h __A) { + return (__m512d)__builtin_ia32_vcvtph2pd512_mask( + (__v8hf)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtph_pd(__m512d __W, __mmask8 __U, __m128h __A) { + return (__m512d)__builtin_ia32_vcvtph2pd512_mask( + (__v8hf)__A, (__v8df)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtph_pd(__mmask8 __U, __m128h __A) { + return (__m512d)__builtin_ia32_vcvtph2pd512_mask( + (__v8hf)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_cvt_roundsh_ss(A, B, R) \ + ((__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)(A), (__v8hf)(B), \ + (__v4sf)_mm_undefined_ps(), \ + (__mmask8)(-1), (int)(R))) + +#define _mm_mask_cvt_roundsh_ss(W, U, A, B, R) \ + ((__m128)__builtin_ia32_vcvtsh2ss_round_mask( \ + (__v4sf)(A), (__v8hf)(B), (__v4sf)(W), (__mmask8)(U), (int)(R))) + +#define _mm_maskz_cvt_roundsh_ss(U, A, B, R) \ + ((__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)(A), (__v8hf)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U), (int)(R))) + +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtsh_ss(__m128 __A, + __m128h __B) { + return (__m128)__builtin_ia32_vcvtsh2ss_round_mask( + (__v4sf)__A, (__v8hf)__B, (__v4sf)_mm_undefined_ps(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsh_ss(__m128 __W, + __mmask8 __U, + __m128 __A, + __m128h __B) { + return (__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)__A, (__v8hf)__B, + (__v4sf)__W, (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsh_ss(__mmask8 __U, + __m128 __A, + __m128h __B) { + return (__m128)__builtin_ia32_vcvtsh2ss_round_mask( + (__v4sf)__A, (__v8hf)__B, (__v4sf)_mm_setzero_ps(), (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_cvt_roundss_sh(A, B, R) \ + ((__m128h)__builtin_ia32_vcvtss2sh_round_mask((__v8hf)(A), (__v4sf)(B), \ + (__v8hf)_mm_undefined_ph(), \ + (__mmask8)(-1), (int)(R))) + +#define _mm_mask_cvt_roundss_sh(W, U, A, B, R) \ + ((__m128h)__builtin_ia32_vcvtss2sh_round_mask( \ + (__v8hf)(A), (__v4sf)(B), (__v8hf)(W), (__mmask8)(U), (int)(R))) + +#define _mm_maskz_cvt_roundss_sh(U, A, B, R) \ + ((__m128h)__builtin_ia32_vcvtss2sh_round_mask((__v8hf)(A), (__v4sf)(B), \ + (__v8hf)_mm_setzero_ph(), \ + (__mmask8)(U), (int)(R))) + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtss_sh(__m128h __A, + __m128 __B) { + return (__m128h)__builtin_ia32_vcvtss2sh_round_mask( + (__v8hf)__A, (__v4sf)__B, (__v8hf)_mm_undefined_ph(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sh(__m128h __W, + __mmask8 __U, + __m128h __A, + __m128 __B) { + return (__m128h)__builtin_ia32_vcvtss2sh_round_mask( + (__v8hf)__A, (__v4sf)__B, (__v8hf)__W, (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sh(__mmask8 __U, + __m128h __A, + __m128 __B) { + return (__m128h)__builtin_ia32_vcvtss2sh_round_mask( + (__v8hf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_cvt_roundsd_sh(A, B, R) \ + ((__m128h)__builtin_ia32_vcvtsd2sh_round_mask((__v8hf)(A), (__v2df)(B), \ + (__v8hf)_mm_undefined_ph(), \ + (__mmask8)(-1), (int)(R))) + +#define _mm_mask_cvt_roundsd_sh(W, U, A, B, R) \ + ((__m128h)__builtin_ia32_vcvtsd2sh_round_mask( \ + (__v8hf)(A), (__v2df)(B), (__v8hf)(W), (__mmask8)(U), (int)(R))) + +#define _mm_maskz_cvt_roundsd_sh(U, A, B, R) \ + ((__m128h)__builtin_ia32_vcvtsd2sh_round_mask((__v8hf)(A), (__v2df)(B), \ + (__v8hf)_mm_setzero_ph(), \ + (__mmask8)(U), (int)(R))) + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtsd_sh(__m128h __A, + __m128d __B) { + return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask( + (__v8hf)__A, (__v2df)__B, (__v8hf)_mm_undefined_ph(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_sh(__m128h __W, + __mmask8 __U, + __m128h __A, + __m128d __B) { + return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask( + (__v8hf)__A, (__v2df)__B, (__v8hf)__W, (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtsd_sh(__mmask8 __U, __m128h __A, __m128d __B) { + return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask( + (__v8hf)__A, (__v2df)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_cvt_roundsh_sd(A, B, R) \ + ((__m128d)__builtin_ia32_vcvtsh2sd_round_mask((__v2df)(A), (__v8hf)(B), \ + (__v2df)_mm_undefined_pd(), \ + (__mmask8)(-1), (int)(R))) + +#define _mm_mask_cvt_roundsh_sd(W, U, A, B, R) \ + ((__m128d)__builtin_ia32_vcvtsh2sd_round_mask( \ + (__v2df)(A), (__v8hf)(B), (__v2df)(W), (__mmask8)(U), (int)(R))) + +#define _mm_maskz_cvt_roundsh_sd(U, A, B, R) \ + ((__m128d)__builtin_ia32_vcvtsh2sd_round_mask((__v2df)(A), (__v8hf)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(U), (int)(R))) + +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtsh_sd(__m128d __A, + __m128h __B) { + return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask( + (__v2df)__A, (__v8hf)__B, (__v2df)_mm_undefined_pd(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtsh_sd(__m128d __W, + __mmask8 __U, + __m128d __A, + __m128h __B) { + return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask( + (__v2df)__A, (__v8hf)__B, (__v2df)__W, (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtsh_sd(__mmask8 __U, __m128d __A, __m128h __B) { + return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask( + (__v2df)__A, (__v8hf)__B, (__v2df)_mm_setzero_pd(), (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_cvt_roundph_epi16(A, R) \ + ((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), \ + (__v32hi)_mm512_undefined_epi32(), \ + (__mmask32)(-1), (int)(R))) + +#define _mm512_mask_cvt_roundph_epi16(W, U, A, R) \ + ((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), (__v32hi)(W), \ + (__mmask32)(U), (int)(R))) + +#define _mm512_maskz_cvt_roundph_epi16(U, A, R) \ + ((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), \ + (__v32hi)_mm512_setzero_epi32(), \ + (__mmask32)(U), (int)(R))) + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_cvtph_epi16(__m512h __A) { + return (__m512i)__builtin_ia32_vcvtph2w512_mask( + (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtph_epi16(__m512i __W, __mmask32 __U, __m512h __A) { + return (__m512i)__builtin_ia32_vcvtph2w512_mask( + (__v32hf)__A, (__v32hi)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtph_epi16(__mmask32 __U, __m512h __A) { + return (__m512i)__builtin_ia32_vcvtph2w512_mask( + (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_cvtt_roundph_epi16(A, R) \ + ((__m512i)__builtin_ia32_vcvttph2w512_mask( \ + (__v32hf)(A), (__v32hi)_mm512_undefined_epi32(), (__mmask32)(-1), \ + (int)(R))) + +#define _mm512_mask_cvtt_roundph_epi16(W, U, A, R) \ + ((__m512i)__builtin_ia32_vcvttph2w512_mask((__v32hf)(A), (__v32hi)(W), \ + (__mmask32)(U), (int)(R))) + +#define _mm512_maskz_cvtt_roundph_epi16(U, A, R) \ + ((__m512i)__builtin_ia32_vcvttph2w512_mask((__v32hf)(A), \ + (__v32hi)_mm512_setzero_epi32(), \ + (__mmask32)(U), (int)(R))) + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_cvttph_epi16(__m512h __A) { + return (__m512i)__builtin_ia32_vcvttph2w512_mask( + (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvttph_epi16(__m512i __W, __mmask32 __U, __m512h __A) { + return (__m512i)__builtin_ia32_vcvttph2w512_mask( + (__v32hf)__A, (__v32hi)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvttph_epi16(__mmask32 __U, __m512h __A) { + return (__m512i)__builtin_ia32_vcvttph2w512_mask( + (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_cvt_roundepi16_ph(A, R) \ + ((__m512h)__builtin_ia32_vcvtw2ph512_mask((__v32hi)(A), \ + (__v32hf)_mm512_undefined_ph(), \ + (__mmask32)(-1), (int)(R))) + +#define _mm512_mask_cvt_roundepi16_ph(W, U, A, R) \ + ((__m512h)__builtin_ia32_vcvtw2ph512_mask((__v32hi)(A), (__v32hf)(W), \ + (__mmask32)(U), (int)(R))) + +#define _mm512_maskz_cvt_roundepi16_ph(U, A, R) \ + ((__m512h)__builtin_ia32_vcvtw2ph512_mask( \ + (__v32hi)(A), (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R))) + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_cvtepi16_ph(__m512i __A) { + return (__m512h)__builtin_ia32_vcvtw2ph512_mask( + (__v32hi)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtepi16_ph(__m512h __W, __mmask32 __U, __m512i __A) { + return (__m512h)__builtin_ia32_vcvtw2ph512_mask( + (__v32hi)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtepi16_ph(__mmask32 __U, __m512i __A) { + return (__m512h)__builtin_ia32_vcvtw2ph512_mask( + (__v32hi)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_cvt_roundph_epu16(A, R) \ + ((__m512i)__builtin_ia32_vcvtph2uw512_mask( \ + (__v32hf)(A), (__v32hu)_mm512_undefined_epi32(), (__mmask32)(-1), \ + (int)(R))) + +#define _mm512_mask_cvt_roundph_epu16(W, U, A, R) \ + ((__m512i)__builtin_ia32_vcvtph2uw512_mask((__v32hf)(A), (__v32hu)(W), \ + (__mmask32)(U), (int)(R))) + +#define _mm512_maskz_cvt_roundph_epu16(U, A, R) \ + ((__m512i)__builtin_ia32_vcvtph2uw512_mask((__v32hf)(A), \ + (__v32hu)_mm512_setzero_epi32(), \ + (__mmask32)(U), (int)(R))) + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_cvtph_epu16(__m512h __A) { + return (__m512i)__builtin_ia32_vcvtph2uw512_mask( + (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtph_epu16(__m512i __W, __mmask32 __U, __m512h __A) { + return (__m512i)__builtin_ia32_vcvtph2uw512_mask( + (__v32hf)__A, (__v32hu)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtph_epu16(__mmask32 __U, __m512h __A) { + return (__m512i)__builtin_ia32_vcvtph2uw512_mask( + (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_cvtt_roundph_epu16(A, R) \ + ((__m512i)__builtin_ia32_vcvttph2uw512_mask( \ + (__v32hf)(A), (__v32hu)_mm512_undefined_epi32(), (__mmask32)(-1), \ + (int)(R))) + +#define _mm512_mask_cvtt_roundph_epu16(W, U, A, R) \ + ((__m512i)__builtin_ia32_vcvttph2uw512_mask((__v32hf)(A), (__v32hu)(W), \ + (__mmask32)(U), (int)(R))) + +#define _mm512_maskz_cvtt_roundph_epu16(U, A, R) \ + ((__m512i)__builtin_ia32_vcvttph2uw512_mask((__v32hf)(A), \ + (__v32hu)_mm512_setzero_epi32(), \ + (__mmask32)(U), (int)(R))) + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_cvttph_epu16(__m512h __A) { + return (__m512i)__builtin_ia32_vcvttph2uw512_mask( + (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvttph_epu16(__m512i __W, __mmask32 __U, __m512h __A) { + return (__m512i)__builtin_ia32_vcvttph2uw512_mask( + (__v32hf)__A, (__v32hu)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvttph_epu16(__mmask32 __U, __m512h __A) { + return (__m512i)__builtin_ia32_vcvttph2uw512_mask( + (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_cvt_roundepu16_ph(A, R) \ + ((__m512h)__builtin_ia32_vcvtuw2ph512_mask((__v32hu)(A), \ + (__v32hf)_mm512_undefined_ph(), \ + (__mmask32)(-1), (int)(R))) + +#define _mm512_mask_cvt_roundepu16_ph(W, U, A, R) \ + ((__m512h)__builtin_ia32_vcvtuw2ph512_mask((__v32hu)(A), (__v32hf)(W), \ + (__mmask32)(U), (int)(R))) + +#define _mm512_maskz_cvt_roundepu16_ph(U, A, R) \ + ((__m512h)__builtin_ia32_vcvtuw2ph512_mask( \ + (__v32hu)(A), (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R))) + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_cvtepu16_ph(__m512i __A) { + return (__m512h)__builtin_ia32_vcvtuw2ph512_mask( + (__v32hu)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtepu16_ph(__m512h __W, __mmask32 __U, __m512i __A) { + return (__m512h)__builtin_ia32_vcvtuw2ph512_mask( + (__v32hu)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtepu16_ph(__mmask32 __U, __m512i __A) { + return (__m512h)__builtin_ia32_vcvtuw2ph512_mask( + (__v32hu)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_cvt_roundph_epi32(A, R) \ + ((__m512i)__builtin_ia32_vcvtph2dq512_mask( \ + (__v16hf)(A), (__v16si)_mm512_undefined_epi32(), (__mmask16)(-1), \ + (int)(R))) + +#define _mm512_mask_cvt_roundph_epi32(W, U, A, R) \ + ((__m512i)__builtin_ia32_vcvtph2dq512_mask((__v16hf)(A), (__v16si)(W), \ + (__mmask16)(U), (int)(R))) + +#define _mm512_maskz_cvt_roundph_epi32(U, A, R) \ + ((__m512i)__builtin_ia32_vcvtph2dq512_mask((__v16hf)(A), \ + (__v16si)_mm512_setzero_epi32(), \ + (__mmask16)(U), (int)(R))) + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_cvtph_epi32(__m256h __A) { + return (__m512i)__builtin_ia32_vcvtph2dq512_mask( + (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtph_epi32(__m512i __W, __mmask16 __U, __m256h __A) { + return (__m512i)__builtin_ia32_vcvtph2dq512_mask( + (__v16hf)__A, (__v16si)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtph_epi32(__mmask16 __U, __m256h __A) { + return (__m512i)__builtin_ia32_vcvtph2dq512_mask( + (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_cvt_roundph_epu32(A, R) \ + ((__m512i)__builtin_ia32_vcvtph2udq512_mask( \ + (__v16hf)(A), (__v16su)_mm512_undefined_epi32(), (__mmask16)(-1), \ + (int)(R))) + +#define _mm512_mask_cvt_roundph_epu32(W, U, A, R) \ + ((__m512i)__builtin_ia32_vcvtph2udq512_mask((__v16hf)(A), (__v16su)(W), \ + (__mmask16)(U), (int)(R))) + +#define _mm512_maskz_cvt_roundph_epu32(U, A, R) \ + ((__m512i)__builtin_ia32_vcvtph2udq512_mask((__v16hf)(A), \ + (__v16su)_mm512_setzero_epi32(), \ + (__mmask16)(U), (int)(R))) + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_cvtph_epu32(__m256h __A) { + return (__m512i)__builtin_ia32_vcvtph2udq512_mask( + (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtph_epu32(__m512i __W, __mmask16 __U, __m256h __A) { + return (__m512i)__builtin_ia32_vcvtph2udq512_mask( + (__v16hf)__A, (__v16su)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtph_epu32(__mmask16 __U, __m256h __A) { + return (__m512i)__builtin_ia32_vcvtph2udq512_mask( + (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_cvt_roundepi32_ph(A, R) \ + ((__m256h)__builtin_ia32_vcvtdq2ph512_mask((__v16si)(A), \ + (__v16hf)_mm256_undefined_ph(), \ + (__mmask16)(-1), (int)(R))) + +#define _mm512_mask_cvt_roundepi32_ph(W, U, A, R) \ + ((__m256h)__builtin_ia32_vcvtdq2ph512_mask((__v16si)(A), (__v16hf)(W), \ + (__mmask16)(U), (int)(R))) + +#define _mm512_maskz_cvt_roundepi32_ph(U, A, R) \ + ((__m256h)__builtin_ia32_vcvtdq2ph512_mask( \ + (__v16si)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R))) + +static __inline__ __m256h __DEFAULT_FN_ATTRS512 +_mm512_cvtepi32_ph(__m512i __A) { + return (__m256h)__builtin_ia32_vcvtdq2ph512_mask( + (__v16si)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtepi32_ph(__m256h __W, __mmask16 __U, __m512i __A) { + return (__m256h)__builtin_ia32_vcvtdq2ph512_mask( + (__v16si)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtepi32_ph(__mmask16 __U, __m512i __A) { + return (__m256h)__builtin_ia32_vcvtdq2ph512_mask( + (__v16si)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_cvt_roundepu32_ph(A, R) \ + ((__m256h)__builtin_ia32_vcvtudq2ph512_mask((__v16su)(A), \ + (__v16hf)_mm256_undefined_ph(), \ + (__mmask16)(-1), (int)(R))) + +#define _mm512_mask_cvt_roundepu32_ph(W, U, A, R) \ + ((__m256h)__builtin_ia32_vcvtudq2ph512_mask((__v16su)(A), (__v16hf)(W), \ + (__mmask16)(U), (int)(R))) + +#define _mm512_maskz_cvt_roundepu32_ph(U, A, R) \ + ((__m256h)__builtin_ia32_vcvtudq2ph512_mask( \ + (__v16su)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R))) + +static __inline__ __m256h __DEFAULT_FN_ATTRS512 +_mm512_cvtepu32_ph(__m512i __A) { + return (__m256h)__builtin_ia32_vcvtudq2ph512_mask( + (__v16su)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtepu32_ph(__m256h __W, __mmask16 __U, __m512i __A) { + return (__m256h)__builtin_ia32_vcvtudq2ph512_mask( + (__v16su)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtepu32_ph(__mmask16 __U, __m512i __A) { + return (__m256h)__builtin_ia32_vcvtudq2ph512_mask( + (__v16su)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_cvtt_roundph_epi32(A, R) \ + ((__m512i)__builtin_ia32_vcvttph2dq512_mask( \ + (__v16hf)(A), (__v16si)_mm512_undefined_epi32(), (__mmask16)(-1), \ + (int)(R))) + +#define _mm512_mask_cvtt_roundph_epi32(W, U, A, R) \ + ((__m512i)__builtin_ia32_vcvttph2dq512_mask((__v16hf)(A), (__v16si)(W), \ + (__mmask16)(U), (int)(R))) + +#define _mm512_maskz_cvtt_roundph_epi32(U, A, R) \ + ((__m512i)__builtin_ia32_vcvttph2dq512_mask((__v16hf)(A), \ + (__v16si)_mm512_setzero_epi32(), \ + (__mmask16)(U), (int)(R))) + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_cvttph_epi32(__m256h __A) { + return (__m512i)__builtin_ia32_vcvttph2dq512_mask( + (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvttph_epi32(__m512i __W, __mmask16 __U, __m256h __A) { + return (__m512i)__builtin_ia32_vcvttph2dq512_mask( + (__v16hf)__A, (__v16si)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvttph_epi32(__mmask16 __U, __m256h __A) { + return (__m512i)__builtin_ia32_vcvttph2dq512_mask( + (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_cvtt_roundph_epu32(A, R) \ + ((__m512i)__builtin_ia32_vcvttph2udq512_mask( \ + (__v16hf)(A), (__v16su)_mm512_undefined_epi32(), (__mmask16)(-1), \ + (int)(R))) + +#define _mm512_mask_cvtt_roundph_epu32(W, U, A, R) \ + ((__m512i)__builtin_ia32_vcvttph2udq512_mask((__v16hf)(A), (__v16su)(W), \ + (__mmask16)(U), (int)(R))) + +#define _mm512_maskz_cvtt_roundph_epu32(U, A, R) \ + ((__m512i)__builtin_ia32_vcvttph2udq512_mask( \ + (__v16hf)(A), (__v16su)_mm512_setzero_epi32(), (__mmask16)(U), \ + (int)(R))) + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_cvttph_epu32(__m256h __A) { + return (__m512i)__builtin_ia32_vcvttph2udq512_mask( + (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvttph_epu32(__m512i __W, __mmask16 __U, __m256h __A) { + return (__m512i)__builtin_ia32_vcvttph2udq512_mask( + (__v16hf)__A, (__v16su)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvttph_epu32(__mmask16 __U, __m256h __A) { + return (__m512i)__builtin_ia32_vcvttph2udq512_mask( + (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_cvt_roundepi64_ph(A, R) \ + ((__m128h)__builtin_ia32_vcvtqq2ph512_mask( \ + (__v8di)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R))) + +#define _mm512_mask_cvt_roundepi64_ph(W, U, A, R) \ + ((__m128h)__builtin_ia32_vcvtqq2ph512_mask((__v8di)(A), (__v8hf)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm512_maskz_cvt_roundepi64_ph(U, A, R) \ + ((__m128h)__builtin_ia32_vcvtqq2ph512_mask( \ + (__v8di)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) + +static __inline__ __m128h __DEFAULT_FN_ATTRS512 +_mm512_cvtepi64_ph(__m512i __A) { + return (__m128h)__builtin_ia32_vcvtqq2ph512_mask( + (__v8di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m512i __A) { + return (__m128h)__builtin_ia32_vcvtqq2ph512_mask( + (__v8di)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtepi64_ph(__mmask8 __U, __m512i __A) { + return (__m128h)__builtin_ia32_vcvtqq2ph512_mask( + (__v8di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_cvt_roundph_epi64(A, R) \ + ((__m512i)__builtin_ia32_vcvtph2qq512_mask((__v8hf)(A), \ + (__v8di)_mm512_undefined_epi32(), \ + (__mmask8)(-1), (int)(R))) + +#define _mm512_mask_cvt_roundph_epi64(W, U, A, R) \ + ((__m512i)__builtin_ia32_vcvtph2qq512_mask((__v8hf)(A), (__v8di)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm512_maskz_cvt_roundph_epi64(U, A, R) \ + ((__m512i)__builtin_ia32_vcvtph2qq512_mask( \ + (__v8hf)(A), (__v8di)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R))) + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_cvtph_epi64(__m128h __A) { + return (__m512i)__builtin_ia32_vcvtph2qq512_mask( + (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtph_epi64(__m512i __W, __mmask8 __U, __m128h __A) { + return (__m512i)__builtin_ia32_vcvtph2qq512_mask( + (__v8hf)__A, (__v8di)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) { + return (__m512i)__builtin_ia32_vcvtph2qq512_mask( + (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_cvt_roundepu64_ph(A, R) \ + ((__m128h)__builtin_ia32_vcvtuqq2ph512_mask( \ + (__v8du)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R))) + +#define _mm512_mask_cvt_roundepu64_ph(W, U, A, R) \ + ((__m128h)__builtin_ia32_vcvtuqq2ph512_mask((__v8du)(A), (__v8hf)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm512_maskz_cvt_roundepu64_ph(U, A, R) \ + ((__m128h)__builtin_ia32_vcvtuqq2ph512_mask( \ + (__v8du)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) + +static __inline__ __m128h __DEFAULT_FN_ATTRS512 +_mm512_cvtepu64_ph(__m512i __A) { + return (__m128h)__builtin_ia32_vcvtuqq2ph512_mask( + (__v8du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m512i __A) { + return (__m128h)__builtin_ia32_vcvtuqq2ph512_mask( + (__v8du)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtepu64_ph(__mmask8 __U, __m512i __A) { + return (__m128h)__builtin_ia32_vcvtuqq2ph512_mask( + (__v8du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_cvt_roundph_epu64(A, R) \ + ((__m512i)__builtin_ia32_vcvtph2uqq512_mask( \ + (__v8hf)(A), (__v8du)_mm512_undefined_epi32(), (__mmask8)(-1), \ + (int)(R))) + +#define _mm512_mask_cvt_roundph_epu64(W, U, A, R) \ + ((__m512i)__builtin_ia32_vcvtph2uqq512_mask((__v8hf)(A), (__v8du)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm512_maskz_cvt_roundph_epu64(U, A, R) \ + ((__m512i)__builtin_ia32_vcvtph2uqq512_mask( \ + (__v8hf)(A), (__v8du)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R))) + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_cvtph_epu64(__m128h __A) { + return (__m512i)__builtin_ia32_vcvtph2uqq512_mask( + (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtph_epu64(__m512i __W, __mmask8 __U, __m128h __A) { + return (__m512i)__builtin_ia32_vcvtph2uqq512_mask( + (__v8hf)__A, (__v8du)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) { + return (__m512i)__builtin_ia32_vcvtph2uqq512_mask( + (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_cvtt_roundph_epi64(A, R) \ + ((__m512i)__builtin_ia32_vcvttph2qq512_mask( \ + (__v8hf)(A), (__v8di)_mm512_undefined_epi32(), (__mmask8)(-1), \ + (int)(R))) + +#define _mm512_mask_cvtt_roundph_epi64(W, U, A, R) \ + ((__m512i)__builtin_ia32_vcvttph2qq512_mask((__v8hf)(A), (__v8di)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm512_maskz_cvtt_roundph_epi64(U, A, R) \ + ((__m512i)__builtin_ia32_vcvttph2qq512_mask( \ + (__v8hf)(A), (__v8di)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R))) + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_cvttph_epi64(__m128h __A) { + return (__m512i)__builtin_ia32_vcvttph2qq512_mask( + (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvttph_epi64(__m512i __W, __mmask8 __U, __m128h __A) { + return (__m512i)__builtin_ia32_vcvttph2qq512_mask( + (__v8hf)__A, (__v8di)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) { + return (__m512i)__builtin_ia32_vcvttph2qq512_mask( + (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_cvtt_roundph_epu64(A, R) \ + ((__m512i)__builtin_ia32_vcvttph2uqq512_mask( \ + (__v8hf)(A), (__v8du)_mm512_undefined_epi32(), (__mmask8)(-1), \ + (int)(R))) + +#define _mm512_mask_cvtt_roundph_epu64(W, U, A, R) \ + ((__m512i)__builtin_ia32_vcvttph2uqq512_mask((__v8hf)(A), (__v8du)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm512_maskz_cvtt_roundph_epu64(U, A, R) \ + ((__m512i)__builtin_ia32_vcvttph2uqq512_mask( \ + (__v8hf)(A), (__v8du)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R))) + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_cvttph_epu64(__m128h __A) { + return (__m512i)__builtin_ia32_vcvttph2uqq512_mask( + (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvttph_epu64(__m512i __W, __mmask8 __U, __m128h __A) { + return (__m512i)__builtin_ia32_vcvttph2uqq512_mask( + (__v8hf)__A, (__v8du)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) { + return (__m512i)__builtin_ia32_vcvttph2uqq512_mask( + (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_cvt_roundsh_i32(A, R) \ + ((int)__builtin_ia32_vcvtsh2si32((__v8hf)(A), (int)(R))) + +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvtsh_i32(__m128h __A) { + return (int)__builtin_ia32_vcvtsh2si32((__v8hf)__A, _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_cvt_roundsh_u32(A, R) \ + ((unsigned int)__builtin_ia32_vcvtsh2usi32((__v8hf)(A), (int)(R))) + +static __inline__ unsigned int __DEFAULT_FN_ATTRS128 +_mm_cvtsh_u32(__m128h __A) { + return (unsigned int)__builtin_ia32_vcvtsh2usi32((__v8hf)__A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __x86_64__ +#define _mm_cvt_roundsh_i64(A, R) \ + ((long long)__builtin_ia32_vcvtsh2si64((__v8hf)(A), (int)(R))) + +static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvtsh_i64(__m128h __A) { + return (long long)__builtin_ia32_vcvtsh2si64((__v8hf)__A, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_cvt_roundsh_u64(A, R) \ + ((unsigned long long)__builtin_ia32_vcvtsh2usi64((__v8hf)(A), (int)(R))) + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 +_mm_cvtsh_u64(__m128h __A) { + return (unsigned long long)__builtin_ia32_vcvtsh2usi64( + (__v8hf)__A, _MM_FROUND_CUR_DIRECTION); +} +#endif // __x86_64__ + +#define _mm_cvt_roundu32_sh(A, B, R) \ + ((__m128h)__builtin_ia32_vcvtusi2sh((__v8hf)(A), (unsigned int)(B), (int)(R))) + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_cvtu32_sh(__m128h __A, unsigned int __B) { + __A[0] = __B; + return __A; +} + +#ifdef __x86_64__ +#define _mm_cvt_roundu64_sh(A, B, R) \ + ((__m128h)__builtin_ia32_vcvtusi642sh((__v8hf)(A), (unsigned long long)(B), \ + (int)(R))) + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_cvtu64_sh(__m128h __A, unsigned long long __B) { + __A[0] = __B; + return __A; +} +#endif + +#define _mm_cvt_roundi32_sh(A, B, R) \ + ((__m128h)__builtin_ia32_vcvtsi2sh((__v8hf)(A), (int)(B), (int)(R))) + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvti32_sh(__m128h __A, + int __B) { + __A[0] = __B; + return __A; +} + +#ifdef __x86_64__ +#define _mm_cvt_roundi64_sh(A, B, R) \ + ((__m128h)__builtin_ia32_vcvtsi642sh((__v8hf)(A), (long long)(B), (int)(R))) + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvti64_sh(__m128h __A, + long long __B) { + __A[0] = __B; + return __A; +} +#endif + +#define _mm_cvtt_roundsh_i32(A, R) \ + ((int)__builtin_ia32_vcvttsh2si32((__v8hf)(A), (int)(R))) + +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsh_i32(__m128h __A) { + return (int)__builtin_ia32_vcvttsh2si32((__v8hf)__A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __x86_64__ +#define _mm_cvtt_roundsh_i64(A, R) \ + ((long long)__builtin_ia32_vcvttsh2si64((__v8hf)(A), (int)(R))) + +static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvttsh_i64(__m128h __A) { + return (long long)__builtin_ia32_vcvttsh2si64((__v8hf)__A, + _MM_FROUND_CUR_DIRECTION); +} +#endif + +#define _mm_cvtt_roundsh_u32(A, R) \ + ((unsigned int)__builtin_ia32_vcvttsh2usi32((__v8hf)(A), (int)(R))) + +static __inline__ unsigned int __DEFAULT_FN_ATTRS128 +_mm_cvttsh_u32(__m128h __A) { + return (unsigned int)__builtin_ia32_vcvttsh2usi32((__v8hf)__A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __x86_64__ +#define _mm_cvtt_roundsh_u64(A, R) \ + ((unsigned long long)__builtin_ia32_vcvttsh2usi64((__v8hf)(A), (int)(R))) + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 +_mm_cvttsh_u64(__m128h __A) { + return (unsigned long long)__builtin_ia32_vcvttsh2usi64( + (__v8hf)__A, _MM_FROUND_CUR_DIRECTION); +} +#endif + +#define _mm512_cvtx_roundph_ps(A, R) \ + ((__m512)__builtin_ia32_vcvtph2psx512_mask((__v16hf)(A), \ + (__v16sf)_mm512_undefined_ps(), \ + (__mmask16)(-1), (int)(R))) + +#define _mm512_mask_cvtx_roundph_ps(W, U, A, R) \ + ((__m512)__builtin_ia32_vcvtph2psx512_mask((__v16hf)(A), (__v16sf)(W), \ + (__mmask16)(U), (int)(R))) + +#define _mm512_maskz_cvtx_roundph_ps(U, A, R) \ + ((__m512)__builtin_ia32_vcvtph2psx512_mask( \ + (__v16hf)(A), (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), (int)(R))) + +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtxph_ps(__m256h __A) { + return (__m512)__builtin_ia32_vcvtph2psx512_mask( + (__v16hf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtxph_ps(__m512 __W, __mmask16 __U, __m256h __A) { + return (__m512)__builtin_ia32_vcvtph2psx512_mask( + (__v16hf)__A, (__v16sf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtxph_ps(__mmask16 __U, __m256h __A) { + return (__m512)__builtin_ia32_vcvtph2psx512_mask( + (__v16hf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_cvtx_roundps_ph(A, R) \ + ((__m256h)__builtin_ia32_vcvtps2phx512_mask((__v16sf)(A), \ + (__v16hf)_mm256_undefined_ph(), \ + (__mmask16)(-1), (int)(R))) + +#define _mm512_mask_cvtx_roundps_ph(W, U, A, R) \ + ((__m256h)__builtin_ia32_vcvtps2phx512_mask((__v16sf)(A), (__v16hf)(W), \ + (__mmask16)(U), (int)(R))) + +#define _mm512_maskz_cvtx_roundps_ph(U, A, R) \ + ((__m256h)__builtin_ia32_vcvtps2phx512_mask( \ + (__v16sf)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R))) + +static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_cvtxps_ph(__m512 __A) { + return (__m256h)__builtin_ia32_vcvtps2phx512_mask( + (__v16sf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtxps_ph(__m256h __W, __mmask16 __U, __m512 __A) { + return (__m256h)__builtin_ia32_vcvtps2phx512_mask( + (__v16sf)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtxps_ph(__mmask16 __U, __m512 __A) { + return (__m256h)__builtin_ia32_vcvtps2phx512_mask( + (__v16sf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ _Float16 __DEFAULT_FN_ATTRS512 +_mm512_reduce_add_ph(__m512h __W) { + return __builtin_ia32_reduce_fadd_ph512(-0.0f16, __W); +} + +static __inline__ _Float16 __DEFAULT_FN_ATTRS512 +_mm512_reduce_mul_ph(__m512h __W) { + return __builtin_ia32_reduce_fmul_ph512(1.0f16, __W); +} + +static __inline__ _Float16 __DEFAULT_FN_ATTRS512 +_mm512_reduce_max_ph(__m512h __V) { + return __builtin_ia32_reduce_fmax_ph512(__V); +} + +static __inline__ _Float16 __DEFAULT_FN_ATTRS512 +_mm512_reduce_min_ph(__m512h __V) { + return __builtin_ia32_reduce_fmin_ph512(__V); +} + static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mask_blend_ph(__mmask32 __U, __m512h __A, __m512h __W) { return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, (__v32hf)__W, diff --git a/clang/lib/Headers/avx512vbmi2intrin.h b/clang/lib/Headers/avx512vbmi2intrin.h index a23144616ce36..17fa77722c64f 100644 --- a/clang/lib/Headers/avx512vbmi2intrin.h +++ b/clang/lib/Headers/avx512vbmi2intrin.h @@ -129,88 +129,88 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) } #define _mm512_shldi_epi64(A, B, I) \ - (__m512i)__builtin_ia32_vpshldq512((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), (int)(I)) + ((__m512i)__builtin_ia32_vpshldq512((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), (int)(I))) #define _mm512_mask_shldi_epi64(S, U, A, B, I) \ - (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ - (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ - (__v8di)(__m512i)(S)) + ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ + (__v8di)(__m512i)(S))) #define _mm512_maskz_shldi_epi64(U, A, B, I) \ - (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ - (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ - (__v8di)_mm512_setzero_si512()) + ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ + (__v8di)_mm512_setzero_si512())) #define _mm512_shldi_epi32(A, B, I) \ - (__m512i)__builtin_ia32_vpshldd512((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), (int)(I)) + ((__m512i)__builtin_ia32_vpshldd512((__v16si)(__m512i)(A), \ + (__v16si)(__m512i)(B), (int)(I))) #define _mm512_mask_shldi_epi32(S, U, A, B, I) \ - (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ - (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ - (__v16si)(__m512i)(S)) + ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ + (__v16si)(__m512i)(S))) #define _mm512_maskz_shldi_epi32(U, A, B, I) \ - (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ - (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ - (__v16si)_mm512_setzero_si512()) + ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ + (__v16si)_mm512_setzero_si512())) #define _mm512_shldi_epi16(A, B, I) \ - (__m512i)__builtin_ia32_vpshldw512((__v32hi)(__m512i)(A), \ - (__v32hi)(__m512i)(B), (int)(I)) + ((__m512i)__builtin_ia32_vpshldw512((__v32hi)(__m512i)(A), \ + (__v32hi)(__m512i)(B), (int)(I))) #define _mm512_mask_shldi_epi16(S, U, A, B, I) \ - (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ - (__v32hi)(__m512i)(S)) + ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ + (__v32hi)(__m512i)(S))) #define _mm512_maskz_shldi_epi16(U, A, B, I) \ - (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ - (__v32hi)_mm512_setzero_si512()) + ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ + (__v32hi)_mm512_setzero_si512())) #define _mm512_shrdi_epi64(A, B, I) \ - (__m512i)__builtin_ia32_vpshrdq512((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), (int)(I)) + ((__m512i)__builtin_ia32_vpshrdq512((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), (int)(I))) #define _mm512_mask_shrdi_epi64(S, U, A, B, I) \ - (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ - (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ - (__v8di)(__m512i)(S)) + ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ + (__v8di)(__m512i)(S))) #define _mm512_maskz_shrdi_epi64(U, A, B, I) \ - (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ - (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ - (__v8di)_mm512_setzero_si512()) + ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ + (__v8di)_mm512_setzero_si512())) #define _mm512_shrdi_epi32(A, B, I) \ - (__m512i)__builtin_ia32_vpshrdd512((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), (int)(I)) + ((__m512i)__builtin_ia32_vpshrdd512((__v16si)(__m512i)(A), \ + (__v16si)(__m512i)(B), (int)(I))) #define _mm512_mask_shrdi_epi32(S, U, A, B, I) \ - (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ - (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ - (__v16si)(__m512i)(S)) + ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ + (__v16si)(__m512i)(S))) #define _mm512_maskz_shrdi_epi32(U, A, B, I) \ - (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ - (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ - (__v16si)_mm512_setzero_si512()) + ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ + (__v16si)_mm512_setzero_si512())) #define _mm512_shrdi_epi16(A, B, I) \ - (__m512i)__builtin_ia32_vpshrdw512((__v32hi)(__m512i)(A), \ - (__v32hi)(__m512i)(B), (int)(I)) + ((__m512i)__builtin_ia32_vpshrdw512((__v32hi)(__m512i)(A), \ + (__v32hi)(__m512i)(B), (int)(I))) #define _mm512_mask_shrdi_epi16(S, U, A, B, I) \ - (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ - (__v32hi)(__m512i)(S)) + ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ + (__v32hi)(__m512i)(S))) #define _mm512_maskz_shrdi_epi16(U, A, B, I) \ - (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ - (__v32hi)_mm512_setzero_si512()) + ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ + (__v32hi)_mm512_setzero_si512())) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C) diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index 6ed10ed9803ba..7873516053ece 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -21,84 +21,84 @@ /* Integer compare */ #define _mm_cmp_epi8_mask(a, b, p) \ - (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ - (__v16qi)(__m128i)(b), (int)(p), \ - (__mmask16)-1) + ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ + (__v16qi)(__m128i)(b), (int)(p), \ + (__mmask16)-1)) #define _mm_mask_cmp_epi8_mask(m, a, b, p) \ - (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ - (__v16qi)(__m128i)(b), (int)(p), \ - (__mmask16)(m)) + ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ + (__v16qi)(__m128i)(b), (int)(p), \ + (__mmask16)(m))) #define _mm_cmp_epu8_mask(a, b, p) \ - (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ - (__v16qi)(__m128i)(b), (int)(p), \ - (__mmask16)-1) + ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ + (__v16qi)(__m128i)(b), (int)(p), \ + (__mmask16)-1)) #define _mm_mask_cmp_epu8_mask(m, a, b, p) \ - (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ - (__v16qi)(__m128i)(b), (int)(p), \ - (__mmask16)(m)) + ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ + (__v16qi)(__m128i)(b), (int)(p), \ + (__mmask16)(m))) #define _mm256_cmp_epi8_mask(a, b, p) \ - (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ - (__v32qi)(__m256i)(b), (int)(p), \ - (__mmask32)-1) + ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ + (__v32qi)(__m256i)(b), (int)(p), \ + (__mmask32)-1)) #define _mm256_mask_cmp_epi8_mask(m, a, b, p) \ - (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ - (__v32qi)(__m256i)(b), (int)(p), \ - (__mmask32)(m)) + ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ + (__v32qi)(__m256i)(b), (int)(p), \ + (__mmask32)(m))) #define _mm256_cmp_epu8_mask(a, b, p) \ - (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ - (__v32qi)(__m256i)(b), (int)(p), \ - (__mmask32)-1) + ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ + (__v32qi)(__m256i)(b), (int)(p), \ + (__mmask32)-1)) #define _mm256_mask_cmp_epu8_mask(m, a, b, p) \ - (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ - (__v32qi)(__m256i)(b), (int)(p), \ - (__mmask32)(m)) + ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ + (__v32qi)(__m256i)(b), (int)(p), \ + (__mmask32)(m))) #define _mm_cmp_epi16_mask(a, b, p) \ - (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ - (__v8hi)(__m128i)(b), (int)(p), \ - (__mmask8)-1) + ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ + (__v8hi)(__m128i)(b), (int)(p), \ + (__mmask8)-1)) #define _mm_mask_cmp_epi16_mask(m, a, b, p) \ - (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ - (__v8hi)(__m128i)(b), (int)(p), \ - (__mmask8)(m)) + ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ + (__v8hi)(__m128i)(b), (int)(p), \ + (__mmask8)(m))) #define _mm_cmp_epu16_mask(a, b, p) \ - (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ - (__v8hi)(__m128i)(b), (int)(p), \ - (__mmask8)-1) + ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ + (__v8hi)(__m128i)(b), (int)(p), \ + (__mmask8)-1)) #define _mm_mask_cmp_epu16_mask(m, a, b, p) \ - (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ - (__v8hi)(__m128i)(b), (int)(p), \ - (__mmask8)(m)) + ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ + (__v8hi)(__m128i)(b), (int)(p), \ + (__mmask8)(m))) #define _mm256_cmp_epi16_mask(a, b, p) \ - (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ - (__v16hi)(__m256i)(b), (int)(p), \ - (__mmask16)-1) + ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ + (__v16hi)(__m256i)(b), (int)(p), \ + (__mmask16)-1)) #define _mm256_mask_cmp_epi16_mask(m, a, b, p) \ - (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ - (__v16hi)(__m256i)(b), (int)(p), \ - (__mmask16)(m)) + ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ + (__v16hi)(__m256i)(b), (int)(p), \ + (__mmask16)(m))) #define _mm256_cmp_epu16_mask(a, b, p) \ - (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ - (__v16hi)(__m256i)(b), (int)(p), \ - (__mmask16)-1) + ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ + (__v16hi)(__m256i)(b), (int)(p), \ + (__mmask16)-1)) #define _mm256_mask_cmp_epu16_mask(m, a, b, p) \ - (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ - (__v16hi)(__m256i)(b), (int)(p), \ - (__mmask16)(m)) + ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ + (__v16hi)(__m256i)(b), (int)(p), \ + (__mmask16)(m))) #define _mm_cmpeq_epi8_mask(A, B) \ _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) @@ -1821,46 +1821,46 @@ _mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A) #define _mm_mask_shufflehi_epi16(W, U, A, imm) \ - (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ - (__v8hi)_mm_shufflehi_epi16((A), (imm)), \ - (__v8hi)(__m128i)(W)) + ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_shufflehi_epi16((A), (imm)), \ + (__v8hi)(__m128i)(W))) #define _mm_maskz_shufflehi_epi16(U, A, imm) \ - (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ - (__v8hi)_mm_shufflehi_epi16((A), (imm)), \ - (__v8hi)_mm_setzero_si128()) + ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_shufflehi_epi16((A), (imm)), \ + (__v8hi)_mm_setzero_si128())) #define _mm256_mask_shufflehi_epi16(W, U, A, imm) \ - (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ - (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \ - (__v16hi)(__m256i)(W)) + ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \ + (__v16hi)(__m256i)(W))) #define _mm256_maskz_shufflehi_epi16(U, A, imm) \ - (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ - (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \ - (__v16hi)_mm256_setzero_si256()) + ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \ + (__v16hi)_mm256_setzero_si256())) #define _mm_mask_shufflelo_epi16(W, U, A, imm) \ - (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ - (__v8hi)_mm_shufflelo_epi16((A), (imm)), \ - (__v8hi)(__m128i)(W)) + ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_shufflelo_epi16((A), (imm)), \ + (__v8hi)(__m128i)(W))) #define _mm_maskz_shufflelo_epi16(U, A, imm) \ - (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ - (__v8hi)_mm_shufflelo_epi16((A), (imm)), \ - (__v8hi)_mm_setzero_si128()) + ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_shufflelo_epi16((A), (imm)), \ + (__v8hi)_mm_setzero_si128())) #define _mm256_mask_shufflelo_epi16(W, U, A, imm) \ - (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ - (__v16hi)_mm256_shufflelo_epi16((A), \ - (imm)), \ - (__v16hi)(__m256i)(W)) + ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_shufflelo_epi16((A), \ + (imm)), \ + (__v16hi)(__m256i)(W))) #define _mm256_maskz_shufflelo_epi16(U, A, imm) \ - (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ - (__v16hi)_mm256_shufflelo_epi16((A), \ - (imm)), \ - (__v16hi)_mm256_setzero_si256()) + ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_shufflelo_epi16((A), \ + (imm)), \ + (__v16hi)_mm256_setzero_si256())) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sllv_epi16(__m256i __A, __m256i __B) @@ -2756,52 +2756,52 @@ _mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A, } #define _mm_mask_alignr_epi8(W, U, A, B, N) \ - (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ + ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \ - (__v16qi)(__m128i)(W)) + (__v16qi)(__m128i)(W))) #define _mm_maskz_alignr_epi8(U, A, B, N) \ - (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ + ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \ - (__v16qi)_mm_setzero_si128()) + (__v16qi)_mm_setzero_si128())) #define _mm256_mask_alignr_epi8(W, U, A, B, N) \ - (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ + ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \ - (__v32qi)(__m256i)(W)) + (__v32qi)(__m256i)(W))) #define _mm256_maskz_alignr_epi8(U, A, B, N) \ - (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ + ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \ - (__v32qi)_mm256_setzero_si256()) + (__v32qi)_mm256_setzero_si256())) #define _mm_dbsad_epu8(A, B, imm) \ - (__m128i)__builtin_ia32_dbpsadbw128((__v16qi)(__m128i)(A), \ - (__v16qi)(__m128i)(B), (int)(imm)) + ((__m128i)__builtin_ia32_dbpsadbw128((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), (int)(imm))) #define _mm_mask_dbsad_epu8(W, U, A, B, imm) \ - (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \ - (__v8hi)(__m128i)(W)) + (__v8hi)(__m128i)(W))) #define _mm_maskz_dbsad_epu8(U, A, B, imm) \ - (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \ - (__v8hi)_mm_setzero_si128()) + (__v8hi)_mm_setzero_si128())) #define _mm256_dbsad_epu8(A, B, imm) \ - (__m256i)__builtin_ia32_dbpsadbw256((__v32qi)(__m256i)(A), \ - (__v32qi)(__m256i)(B), (int)(imm)) + ((__m256i)__builtin_ia32_dbpsadbw256((__v32qi)(__m256i)(A), \ + (__v32qi)(__m256i)(B), (int)(imm))) #define _mm256_mask_dbsad_epu8(W, U, A, B, imm) \ - (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \ - (__v16hi)(__m256i)(W)) + (__v16hi)(__m256i)(W))) #define _mm256_maskz_dbsad_epu8(U, A, B, imm) \ - (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \ - (__v16hi)_mm256_setzero_si256()) + (__v16hi)_mm256_setzero_si256())) #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 diff --git a/clang/lib/Headers/avx512vldqintrin.h b/clang/lib/Headers/avx512vldqintrin.h index 95ba574ea8210..713e1a18a1b3f 100644 --- a/clang/lib/Headers/avx512vldqintrin.h +++ b/clang/lib/Headers/avx512vldqintrin.h @@ -773,134 +773,134 @@ _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) { } #define _mm_range_pd(A, B, C) \ - (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), (int)(C), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1) + ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), (int)(C), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)-1)) #define _mm_mask_range_pd(W, U, A, B, C) \ - (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), (int)(C), \ - (__v2df)(__m128d)(W), \ - (__mmask8)(U)) + ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), (int)(C), \ + (__v2df)(__m128d)(W), \ + (__mmask8)(U))) #define _mm_maskz_range_pd(U, A, B, C) \ - (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), (int)(C), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U)) + ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), (int)(C), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(U))) #define _mm256_range_pd(A, B, C) \ - (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ - (__v4df)(__m256d)(B), (int)(C), \ - (__v4df)_mm256_setzero_pd(), \ - (__mmask8)-1) + ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ + (__v4df)(__m256d)(B), (int)(C), \ + (__v4df)_mm256_setzero_pd(), \ + (__mmask8)-1)) #define _mm256_mask_range_pd(W, U, A, B, C) \ - (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ - (__v4df)(__m256d)(B), (int)(C), \ - (__v4df)(__m256d)(W), \ - (__mmask8)(U)) + ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ + (__v4df)(__m256d)(B), (int)(C), \ + (__v4df)(__m256d)(W), \ + (__mmask8)(U))) #define _mm256_maskz_range_pd(U, A, B, C) \ - (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ - (__v4df)(__m256d)(B), (int)(C), \ - (__v4df)_mm256_setzero_pd(), \ - (__mmask8)(U)) + ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ + (__v4df)(__m256d)(B), (int)(C), \ + (__v4df)_mm256_setzero_pd(), \ + (__mmask8)(U))) #define _mm_range_ps(A, B, C) \ - (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), (int)(C), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1) + ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), (int)(C), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)-1)) #define _mm_mask_range_ps(W, U, A, B, C) \ - (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), (int)(C), \ - (__v4sf)(__m128)(W), (__mmask8)(U)) + ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), (int)(C), \ + (__v4sf)(__m128)(W), (__mmask8)(U))) #define _mm_maskz_range_ps(U, A, B, C) \ - (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), (int)(C), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U)) + ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), (int)(C), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U))) #define _mm256_range_ps(A, B, C) \ - (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ - (__v8sf)(__m256)(B), (int)(C), \ - (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)-1) + ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ + (__v8sf)(__m256)(B), (int)(C), \ + (__v8sf)_mm256_setzero_ps(), \ + (__mmask8)-1)) #define _mm256_mask_range_ps(W, U, A, B, C) \ - (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ - (__v8sf)(__m256)(B), (int)(C), \ - (__v8sf)(__m256)(W), (__mmask8)(U)) + ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ + (__v8sf)(__m256)(B), (int)(C), \ + (__v8sf)(__m256)(W), (__mmask8)(U))) #define _mm256_maskz_range_ps(U, A, B, C) \ - (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ - (__v8sf)(__m256)(B), (int)(C), \ - (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U)) + ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ + (__v8sf)(__m256)(B), (int)(C), \ + (__v8sf)_mm256_setzero_ps(), \ + (__mmask8)(U))) #define _mm_reduce_pd(A, B) \ - (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1) + ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)-1)) #define _mm_mask_reduce_pd(W, U, A, B) \ - (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ - (__v2df)(__m128d)(W), \ - (__mmask8)(U)) + ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ + (__v2df)(__m128d)(W), \ + (__mmask8)(U))) #define _mm_maskz_reduce_pd(U, A, B) \ - (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U)) + ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(U))) #define _mm256_reduce_pd(A, B) \ - (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ - (__v4df)_mm256_setzero_pd(), \ - (__mmask8)-1) + ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ + (__v4df)_mm256_setzero_pd(), \ + (__mmask8)-1)) #define _mm256_mask_reduce_pd(W, U, A, B) \ - (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ - (__v4df)(__m256d)(W), \ - (__mmask8)(U)) + ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ + (__v4df)(__m256d)(W), \ + (__mmask8)(U))) #define _mm256_maskz_reduce_pd(U, A, B) \ - (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ - (__v4df)_mm256_setzero_pd(), \ - (__mmask8)(U)) + ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ + (__v4df)_mm256_setzero_pd(), \ + (__mmask8)(U))) #define _mm_reduce_ps(A, B) \ - (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1) + ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)-1)) #define _mm_mask_reduce_ps(W, U, A, B) \ - (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ - (__v4sf)(__m128)(W), \ - (__mmask8)(U)) + ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ + (__v4sf)(__m128)(W), \ + (__mmask8)(U))) #define _mm_maskz_reduce_ps(U, A, B) \ - (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U)) + ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U))) #define _mm256_reduce_ps(A, B) \ - (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ - (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)-1) + ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ + (__v8sf)_mm256_setzero_ps(), \ + (__mmask8)-1)) #define _mm256_mask_reduce_ps(W, U, A, B) \ - (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ - (__v8sf)(__m256)(W), \ - (__mmask8)(U)) + ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ + (__v8sf)(__m256)(W), \ + (__mmask8)(U))) #define _mm256_maskz_reduce_ps(U, A, B) \ - (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ - (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U)) + ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ + (__v8sf)_mm256_setzero_ps(), \ + (__mmask8)(U))) static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi32_mask (__m128i __A) @@ -1066,100 +1066,100 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) } #define _mm256_extractf64x2_pd(A, imm) \ - (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ - (int)(imm), \ - (__v2df)_mm_undefined_pd(), \ - (__mmask8)-1) + ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ + (int)(imm), \ + (__v2df)_mm_undefined_pd(), \ + (__mmask8)-1)) #define _mm256_mask_extractf64x2_pd(W, U, A, imm) \ - (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ - (int)(imm), \ - (__v2df)(__m128d)(W), \ - (__mmask8)(U)) + ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ + (int)(imm), \ + (__v2df)(__m128d)(W), \ + (__mmask8)(U))) #define _mm256_maskz_extractf64x2_pd(U, A, imm) \ - (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ - (int)(imm), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U)) + ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ + (int)(imm), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(U))) #define _mm256_extracti64x2_epi64(A, imm) \ - (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ + ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ (int)(imm), \ (__v2di)_mm_undefined_si128(), \ - (__mmask8)-1) + (__mmask8)-1)) #define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \ - (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ - (int)(imm), \ - (__v2di)(__m128i)(W), \ - (__mmask8)(U)) + ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ + (int)(imm), \ + (__v2di)(__m128i)(W), \ + (__mmask8)(U))) #define _mm256_maskz_extracti64x2_epi64(U, A, imm) \ - (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ - (int)(imm), \ - (__v2di)_mm_setzero_si128(), \ - (__mmask8)(U)) + ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ + (int)(imm), \ + (__v2di)_mm_setzero_si128(), \ + (__mmask8)(U))) #define _mm256_insertf64x2(A, B, imm) \ - (__m256d)__builtin_ia32_insertf64x2_256((__v4df)(__m256d)(A), \ - (__v2df)(__m128d)(B), (int)(imm)) + ((__m256d)__builtin_ia32_insertf64x2_256((__v4df)(__m256d)(A), \ + (__v2df)(__m128d)(B), (int)(imm))) #define _mm256_mask_insertf64x2(W, U, A, B, imm) \ - (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ + ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_insertf64x2((A), (B), (imm)), \ - (__v4df)(__m256d)(W)) + (__v4df)(__m256d)(W))) #define _mm256_maskz_insertf64x2(U, A, B, imm) \ - (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ + ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_insertf64x2((A), (B), (imm)), \ - (__v4df)_mm256_setzero_pd()) + (__v4df)_mm256_setzero_pd())) #define _mm256_inserti64x2(A, B, imm) \ - (__m256i)__builtin_ia32_inserti64x2_256((__v4di)(__m256i)(A), \ - (__v2di)(__m128i)(B), (int)(imm)) + ((__m256i)__builtin_ia32_inserti64x2_256((__v4di)(__m256i)(A), \ + (__v2di)(__m128i)(B), (int)(imm))) #define _mm256_mask_inserti64x2(W, U, A, B, imm) \ - (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ - (__v4di)_mm256_inserti64x2((A), (B), (imm)), \ - (__v4di)(__m256i)(W)) + ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + (__v4di)_mm256_inserti64x2((A), (B), (imm)), \ + (__v4di)(__m256i)(W))) #define _mm256_maskz_inserti64x2(U, A, B, imm) \ - (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ - (__v4di)_mm256_inserti64x2((A), (B), (imm)), \ - (__v4di)_mm256_setzero_si256()) + ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + (__v4di)_mm256_inserti64x2((A), (B), (imm)), \ + (__v4di)_mm256_setzero_si256())) #define _mm_mask_fpclass_pd_mask(U, A, imm) \ - (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ - (__mmask8)(U)) + ((__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ + (__mmask8)(U))) #define _mm_fpclass_pd_mask(A, imm) \ - (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ - (__mmask8)-1) + ((__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ + (__mmask8)-1)) #define _mm256_mask_fpclass_pd_mask(U, A, imm) \ - (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ - (__mmask8)(U)) + ((__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ + (__mmask8)(U))) #define _mm256_fpclass_pd_mask(A, imm) \ - (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ - (__mmask8)-1) + ((__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ + (__mmask8)-1)) #define _mm_mask_fpclass_ps_mask(U, A, imm) \ - (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ - (__mmask8)(U)) + ((__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ + (__mmask8)(U))) #define _mm_fpclass_ps_mask(A, imm) \ - (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ - (__mmask8)-1) + ((__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ + (__mmask8)-1)) #define _mm256_mask_fpclass_ps_mask(U, A, imm) \ - (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ - (__mmask8)(U)) + ((__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ + (__mmask8)(U))) #define _mm256_fpclass_ps_mask(A, imm) \ - (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ - (__mmask8)-1) + ((__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ + (__mmask8)-1)) #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 diff --git a/clang/lib/Headers/avx512vlfp16intrin.h b/clang/lib/Headers/avx512vlfp16intrin.h index cda54bcc8351d..1809211fd4066 100644 --- a/clang/lib/Headers/avx512vlfp16intrin.h +++ b/clang/lib/Headers/avx512vlfp16intrin.h @@ -69,6 +69,240 @@ _mm256_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4, _mm256_set_ph((h16), (h15), (h14), (h13), (h12), (h11), (h10), (h9), (h8), \ (h7), (h6), (h5), (h4), (h3), (h2), (h1)) +static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_add_ph(__m256h __A, + __m256h __B) { + return (__m256h)((__v16hf)__A + (__v16hf)__B); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_mask_add_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { + return (__m256h)__builtin_ia32_selectph_256( + __U, (__v16hf)_mm256_add_ph(__A, __B), (__v16hf)__W); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_maskz_add_ph(__mmask16 __U, __m256h __A, __m256h __B) { + return (__m256h)__builtin_ia32_selectph_256( + __U, (__v16hf)_mm256_add_ph(__A, __B), (__v16hf)_mm256_setzero_ph()); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_add_ph(__m128h __A, + __m128h __B) { + return (__m128h)((__v8hf)__A + (__v8hf)__B); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_add_ph(__m128h __W, + __mmask8 __U, + __m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_add_ph(__A, __B), + (__v8hf)__W); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_add_ph(__mmask8 __U, + __m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_add_ph(__A, __B), + (__v8hf)_mm_setzero_ph()); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_sub_ph(__m256h __A, + __m256h __B) { + return (__m256h)((__v16hf)__A - (__v16hf)__B); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_mask_sub_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { + return (__m256h)__builtin_ia32_selectph_256( + __U, (__v16hf)_mm256_sub_ph(__A, __B), (__v16hf)__W); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_maskz_sub_ph(__mmask16 __U, __m256h __A, __m256h __B) { + return (__m256h)__builtin_ia32_selectph_256( + __U, (__v16hf)_mm256_sub_ph(__A, __B), (__v16hf)_mm256_setzero_ph()); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sub_ph(__m128h __A, + __m128h __B) { + return (__m128h)((__v8hf)__A - (__v8hf)__B); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sub_ph(__m128h __W, + __mmask8 __U, + __m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_sub_ph(__A, __B), + (__v8hf)__W); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ph(__mmask8 __U, + __m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_sub_ph(__A, __B), + (__v8hf)_mm_setzero_ph()); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mul_ph(__m256h __A, + __m256h __B) { + return (__m256h)((__v16hf)__A * (__v16hf)__B); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_mask_mul_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { + return (__m256h)__builtin_ia32_selectph_256( + __U, (__v16hf)_mm256_mul_ph(__A, __B), (__v16hf)__W); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_maskz_mul_ph(__mmask16 __U, __m256h __A, __m256h __B) { + return (__m256h)__builtin_ia32_selectph_256( + __U, (__v16hf)_mm256_mul_ph(__A, __B), (__v16hf)_mm256_setzero_ph()); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mul_ph(__m128h __A, + __m128h __B) { + return (__m128h)((__v8hf)__A * (__v8hf)__B); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_mul_ph(__m128h __W, + __mmask8 __U, + __m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_mul_ph(__A, __B), + (__v8hf)__W); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ph(__mmask8 __U, + __m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_mul_ph(__A, __B), + (__v8hf)_mm_setzero_ph()); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_div_ph(__m256h __A, + __m256h __B) { + return (__m256h)((__v16hf)__A / (__v16hf)__B); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_mask_div_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { + return (__m256h)__builtin_ia32_selectph_256( + __U, (__v16hf)_mm256_div_ph(__A, __B), (__v16hf)__W); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_maskz_div_ph(__mmask16 __U, __m256h __A, __m256h __B) { + return (__m256h)__builtin_ia32_selectph_256( + __U, (__v16hf)_mm256_div_ph(__A, __B), (__v16hf)_mm256_setzero_ph()); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_div_ph(__m128h __A, + __m128h __B) { + return (__m128h)((__v8hf)__A / (__v8hf)__B); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_div_ph(__m128h __W, + __mmask8 __U, + __m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_div_ph(__A, __B), + (__v8hf)__W); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_div_ph(__mmask8 __U, + __m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_div_ph(__A, __B), + (__v8hf)_mm_setzero_ph()); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_min_ph(__m256h __A, + __m256h __B) { + return (__m256h)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_mask_min_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { + return (__m256h)__builtin_ia32_selectph_256( + (__mmask16)__U, + (__v16hf)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B), + (__v16hf)__W); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_maskz_min_ph(__mmask16 __U, __m256h __A, __m256h __B) { + return (__m256h)__builtin_ia32_selectph_256( + (__mmask16)__U, + (__v16hf)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B), + (__v16hf)_mm256_setzero_ph()); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_min_ph(__m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_min_ph(__m128h __W, + __mmask8 __U, + __m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_selectph_128( + (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B), + (__v8hf)__W); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_min_ph(__mmask8 __U, + __m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_selectph_128( + (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B), + (__v8hf)_mm_setzero_ph()); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_max_ph(__m256h __A, + __m256h __B) { + return (__m256h)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_mask_max_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { + return (__m256h)__builtin_ia32_selectph_256( + (__mmask16)__U, + (__v16hf)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B), + (__v16hf)__W); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_maskz_max_ph(__mmask16 __U, __m256h __A, __m256h __B) { + return (__m256h)__builtin_ia32_selectph_256( + (__mmask16)__U, + (__v16hf)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B), + (__v16hf)_mm256_setzero_ph()); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_max_ph(__m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_max_ph(__m128h __W, + __mmask8 __U, + __m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_selectph_128( + (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B), + (__v8hf)__W); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_max_ph(__mmask8 __U, + __m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_selectph_128( + (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B), + (__v8hf)_mm_setzero_ph()); +} + static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_abs_ph(__m256h __A) { return (__m256h)_mm256_and_epi32(_mm256_set1_epi32(0x7FFF7FFF), (__m256i)__A); } @@ -77,6 +311,1066 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_abs_ph(__m128h __A) { return (__m128h)_mm_and_epi32(_mm_set1_epi32(0x7FFF7FFF), (__m128i)__A); } +#define _mm256_cmp_ph_mask(a, b, p) \ + ((__mmask16)__builtin_ia32_cmpph256_mask( \ + (__v16hf)(__m256h)(a), (__v16hf)(__m256h)(b), (int)(p), (__mmask16)-1)) + +#define _mm256_mask_cmp_ph_mask(m, a, b, p) \ + ((__mmask16)__builtin_ia32_cmpph256_mask( \ + (__v16hf)(__m256h)(a), (__v16hf)(__m256h)(b), (int)(p), (__mmask16)(m))) + +#define _mm_cmp_ph_mask(a, b, p) \ + ((__mmask8)__builtin_ia32_cmpph128_mask( \ + (__v8hf)(__m128h)(a), (__v8hf)(__m128h)(b), (int)(p), (__mmask8)-1)) + +#define _mm_mask_cmp_ph_mask(m, a, b, p) \ + ((__mmask8)__builtin_ia32_cmpph128_mask( \ + (__v8hf)(__m128h)(a), (__v8hf)(__m128h)(b), (int)(p), (__mmask8)(m))) + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_rcp_ph(__m256h __A) { + return (__m256h)__builtin_ia32_rcpph256_mask( + (__v16hf)__A, (__v16hf)_mm256_undefined_ph(), (__mmask16)-1); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_mask_rcp_ph(__m256h __W, __mmask16 __U, __m256h __A) { + return (__m256h)__builtin_ia32_rcpph256_mask((__v16hf)__A, (__v16hf)__W, + (__mmask16)__U); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_maskz_rcp_ph(__mmask16 __U, __m256h __A) { + return (__m256h)__builtin_ia32_rcpph256_mask( + (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rcp_ph(__m128h __A) { + return (__m128h)__builtin_ia32_rcpph128_mask( + (__v8hf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rcp_ph(__m128h __W, + __mmask8 __U, + __m128h __A) { + return (__m128h)__builtin_ia32_rcpph128_mask((__v8hf)__A, (__v8hf)__W, + (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_rcp_ph(__mmask8 __U, + __m128h __A) { + return (__m128h)__builtin_ia32_rcpph128_mask( + (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_rsqrt_ph(__m256h __A) { + return (__m256h)__builtin_ia32_rsqrtph256_mask( + (__v16hf)__A, (__v16hf)_mm256_undefined_ph(), (__mmask16)-1); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_mask_rsqrt_ph(__m256h __W, __mmask16 __U, __m256h __A) { + return (__m256h)__builtin_ia32_rsqrtph256_mask((__v16hf)__A, (__v16hf)__W, + (__mmask16)__U); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_maskz_rsqrt_ph(__mmask16 __U, __m256h __A) { + return (__m256h)__builtin_ia32_rsqrtph256_mask( + (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rsqrt_ph(__m128h __A) { + return (__m128h)__builtin_ia32_rsqrtph128_mask( + (__v8hf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt_ph(__m128h __W, + __mmask8 __U, + __m128h __A) { + return (__m128h)__builtin_ia32_rsqrtph128_mask((__v8hf)__A, (__v8hf)__W, + (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_maskz_rsqrt_ph(__mmask8 __U, __m128h __A) { + return (__m128h)__builtin_ia32_rsqrtph128_mask( + (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_getexp_ph(__m128h __A) { + return (__m128h)__builtin_ia32_getexpph128_mask( + (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_mask_getexp_ph(__m128h __W, __mmask8 __U, __m128h __A) { + return (__m128h)__builtin_ia32_getexpph128_mask((__v8hf)__A, (__v8hf)__W, + (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_maskz_getexp_ph(__mmask8 __U, __m128h __A) { + return (__m128h)__builtin_ia32_getexpph128_mask( + (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_getexp_ph(__m256h __A) { + return (__m256h)__builtin_ia32_getexpph256_mask( + (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_mask_getexp_ph(__m256h __W, __mmask16 __U, __m256h __A) { + return (__m256h)__builtin_ia32_getexpph256_mask((__v16hf)__A, (__v16hf)__W, + (__mmask16)__U); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_maskz_getexp_ph(__mmask16 __U, __m256h __A) { + return (__m256h)__builtin_ia32_getexpph256_mask( + (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U); +} + +#define _mm_getmant_ph(A, B, C) \ + ((__m128h)__builtin_ia32_getmantph128_mask( \ + (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)-1)) + +#define _mm_mask_getmant_ph(W, U, A, B, C) \ + ((__m128h)__builtin_ia32_getmantph128_mask( \ + (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)(__m128h)(W), \ + (__mmask8)(U))) + +#define _mm_maskz_getmant_ph(U, A, B, C) \ + ((__m128h)__builtin_ia32_getmantph128_mask( \ + (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)(U))) + +#define _mm256_getmant_ph(A, B, C) \ + ((__m256h)__builtin_ia32_getmantph256_mask( \ + (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \ + (__v16hf)_mm256_setzero_ph(), (__mmask16)-1)) + +#define _mm256_mask_getmant_ph(W, U, A, B, C) \ + ((__m256h)__builtin_ia32_getmantph256_mask( \ + (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), (__v16hf)(__m256h)(W), \ + (__mmask16)(U))) + +#define _mm256_maskz_getmant_ph(U, A, B, C) \ + ((__m256h)__builtin_ia32_getmantph256_mask( \ + (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \ + (__v16hf)_mm256_setzero_ph(), (__mmask16)(U))) + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_scalef_ph(__m128h __A, + __m128h __B) { + return (__m128h)__builtin_ia32_scalefph128_mask( + (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_mask_scalef_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { + return (__m128h)__builtin_ia32_scalefph128_mask((__v8hf)__A, (__v8hf)__B, + (__v8hf)__W, (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_maskz_scalef_ph(__mmask8 __U, __m128h __A, __m128h __B) { + return (__m128h)__builtin_ia32_scalefph128_mask( + (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_scalef_ph(__m256h __A, + __m256h __B) { + return (__m256h)__builtin_ia32_scalefph256_mask( + (__v16hf)__A, (__v16hf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_mask_scalef_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { + return (__m256h)__builtin_ia32_scalefph256_mask((__v16hf)__A, (__v16hf)__B, + (__v16hf)__W, (__mmask16)__U); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_maskz_scalef_ph(__mmask16 __U, __m256h __A, __m256h __B) { + return (__m256h)__builtin_ia32_scalefph256_mask( + (__v16hf)__A, (__v16hf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U); +} + +#define _mm_roundscale_ph(A, imm) \ + ((__m128h)__builtin_ia32_rndscaleph_128_mask( \ + (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)-1)) + +#define _mm_mask_roundscale_ph(W, U, A, imm) \ + ((__m128h)__builtin_ia32_rndscaleph_128_mask( \ + (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)(__m128h)(W), (__mmask8)(U))) + +#define _mm_maskz_roundscale_ph(U, A, imm) \ + ((__m128h)__builtin_ia32_rndscaleph_128_mask( \ + (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)_mm_setzero_ph(), \ + (__mmask8)(U))) + +#define _mm256_roundscale_ph(A, imm) \ + ((__m256h)__builtin_ia32_rndscaleph_256_mask( \ + (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(), \ + (__mmask16)-1)) + +#define _mm256_mask_roundscale_ph(W, U, A, imm) \ + ((__m256h)__builtin_ia32_rndscaleph_256_mask( \ + (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)(__m256h)(W), \ + (__mmask16)(U))) + +#define _mm256_maskz_roundscale_ph(U, A, imm) \ + ((__m256h)__builtin_ia32_rndscaleph_256_mask( \ + (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(), \ + (__mmask16)(U))) + +#define _mm_reduce_ph(A, imm) \ + ((__m128h)__builtin_ia32_reduceph128_mask((__v8hf)(__m128h)(A), (int)(imm), \ + (__v8hf)_mm_setzero_ph(), \ + (__mmask8)-1)) + +#define _mm_mask_reduce_ph(W, U, A, imm) \ + ((__m128h)__builtin_ia32_reduceph128_mask( \ + (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)(__m128h)(W), (__mmask8)(U))) + +#define _mm_maskz_reduce_ph(U, A, imm) \ + ((__m128h)__builtin_ia32_reduceph128_mask((__v8hf)(__m128h)(A), (int)(imm), \ + (__v8hf)_mm_setzero_ph(), \ + (__mmask8)(U))) + +#define _mm256_reduce_ph(A, imm) \ + ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \ + (__v16hf)_mm256_setzero_ph(), \ + (__mmask16)-1)) + +#define _mm256_mask_reduce_ph(W, U, A, imm) \ + ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \ + (__v16hf)(__m256h)(W), \ + (__mmask16)(U))) + +#define _mm256_maskz_reduce_ph(U, A, imm) \ + ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \ + (__v16hf)_mm256_setzero_ph(), \ + (__mmask16)(U))) + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sqrt_ph(__m128h __a) { + return __builtin_ia32_sqrtph((__v8hf)__a); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ph(__m128h __W, + __mmask8 __U, + __m128h __A) { + return (__m128h)__builtin_ia32_selectph_128( + (__mmask8)__U, (__v8hf)_mm_sqrt_ph(__A), (__v8hf)__W); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ph(__mmask8 __U, + __m128h __A) { + return (__m128h)__builtin_ia32_selectph_128( + (__mmask8)__U, (__v8hf)_mm_sqrt_ph(__A), (__v8hf)_mm_setzero_ph()); +} + +static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_sqrt_ph(__m256h __a) { + return (__m256h)__builtin_ia32_sqrtph256((__v16hf)__a); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_mask_sqrt_ph(__m256h __W, __mmask16 __U, __m256h __A) { + return (__m256h)__builtin_ia32_selectph_256( + (__mmask16)__U, (__v16hf)_mm256_sqrt_ph(__A), (__v16hf)__W); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_maskz_sqrt_ph(__mmask16 __U, __m256h __A) { + return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, + (__v16hf)_mm256_sqrt_ph(__A), + (__v16hf)_mm256_setzero_ph()); +} + +#define _mm_mask_fpclass_ph_mask(U, A, imm) \ + ((__mmask8)__builtin_ia32_fpclassph128_mask((__v8hf)(__m128h)(A), \ + (int)(imm), (__mmask8)(U))) + +#define _mm_fpclass_ph_mask(A, imm) \ + ((__mmask8)__builtin_ia32_fpclassph128_mask((__v8hf)(__m128h)(A), \ + (int)(imm), (__mmask8)-1)) + +#define _mm256_mask_fpclass_ph_mask(U, A, imm) \ + ((__mmask16)__builtin_ia32_fpclassph256_mask((__v16hf)(__m256h)(A), \ + (int)(imm), (__mmask16)(U))) + +#define _mm256_fpclass_ph_mask(A, imm) \ + ((__mmask16)__builtin_ia32_fpclassph256_mask((__v16hf)(__m256h)(A), \ + (int)(imm), (__mmask16)-1)) + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtpd_ph(__m128d __A) { + return (__m128h)__builtin_ia32_vcvtpd2ph128_mask( + (__v2df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_ph(__m128h __W, + __mmask8 __U, + __m128d __A) { + return (__m128h)__builtin_ia32_vcvtpd2ph128_mask((__v2df)__A, (__v8hf)__W, + (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtpd_ph(__mmask8 __U, __m128d __A) { + return (__m128h)__builtin_ia32_vcvtpd2ph128_mask( + (__v2df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtpd_ph(__m256d __A) { + return (__m128h)__builtin_ia32_vcvtpd2ph256_mask( + (__v4df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m256d __A) { + return (__m128h)__builtin_ia32_vcvtpd2ph256_mask((__v4df)__A, (__v8hf)__W, + (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtpd_ph(__mmask8 __U, __m256d __A) { + return (__m128h)__builtin_ia32_vcvtpd2ph256_mask( + (__v4df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtph_pd(__m128h __A) { + return (__m128d)__builtin_ia32_vcvtph2pd128_mask( + (__v8hf)__A, (__v2df)_mm_undefined_pd(), (__mmask8)-1); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_pd(__m128d __W, + __mmask8 __U, + __m128h __A) { + return (__m128d)__builtin_ia32_vcvtph2pd128_mask((__v8hf)__A, (__v2df)__W, + (__mmask8)__U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtph_pd(__mmask8 __U, __m128h __A) { + return (__m128d)__builtin_ia32_vcvtph2pd128_mask( + (__v8hf)__A, (__v2df)_mm_setzero_pd(), (__mmask8)__U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtph_pd(__m128h __A) { + return (__m256d)__builtin_ia32_vcvtph2pd256_mask( + (__v8hf)__A, (__v4df)_mm256_undefined_pd(), (__mmask8)-1); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtph_pd(__m256d __W, __mmask8 __U, __m128h __A) { + return (__m256d)__builtin_ia32_vcvtph2pd256_mask((__v8hf)__A, (__v4df)__W, + (__mmask8)__U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtph_pd(__mmask8 __U, __m128h __A) { + return (__m256d)__builtin_ia32_vcvtph2pd256_mask( + (__v8hf)__A, (__v4df)_mm256_setzero_pd(), (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi16(__m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2w128_mask( + (__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvtph_epi16(__m128i __W, __mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2w128_mask((__v8hf)__A, (__v8hi)__W, + (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtph_epi16(__mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2w128_mask( + (__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_cvtph_epi16(__m256h __A) { + return (__m256i)__builtin_ia32_vcvtph2w256_mask( + (__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtph_epi16(__m256i __W, __mmask16 __U, __m256h __A) { + return (__m256i)__builtin_ia32_vcvtph2w256_mask((__v16hf)__A, (__v16hi)__W, + (__mmask16)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtph_epi16(__mmask16 __U, __m256h __A) { + return (__m256i)__builtin_ia32_vcvtph2w256_mask( + (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi16(__m128h __A) { + return (__m128i)__builtin_ia32_vcvttph2w128_mask( + (__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvttph_epi16(__m128i __W, __mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvttph2w128_mask((__v8hf)__A, (__v8hi)__W, + (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvttph_epi16(__mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvttph2w128_mask( + (__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_cvttph_epi16(__m256h __A) { + return (__m256i)__builtin_ia32_vcvttph2w256_mask( + (__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvttph_epi16(__m256i __W, __mmask16 __U, __m256h __A) { + return (__m256i)__builtin_ia32_vcvttph2w256_mask((__v16hf)__A, (__v16hi)__W, + (__mmask16)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvttph_epi16(__mmask16 __U, __m256h __A) { + return (__m256i)__builtin_ia32_vcvttph2w256_mask( + (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi16_ph(__m128i __A) { + return (__m128h) __builtin_convertvector((__v8hi)__A, __v8hf); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_mask_cvtepi16_ph(__m128h __W, __mmask8 __U, __m128i __A) { + return (__m128h)__builtin_ia32_selectph_128( + (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)__W); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtepi16_ph(__mmask8 __U, __m128i __A) { + return (__m128h)__builtin_ia32_selectph_128( + (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)_mm_setzero_ph()); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_cvtepi16_ph(__m256i __A) { + return (__m256h) __builtin_convertvector((__v16hi)__A, __v16hf); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtepi16_ph(__m256h __W, __mmask16 __U, __m256i __A) { + return (__m256h)__builtin_ia32_selectph_256( + (__mmask16)__U, (__v16hf)_mm256_cvtepi16_ph(__A), (__v16hf)__W); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtepi16_ph(__mmask16 __U, __m256i __A) { + return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, + (__v16hf)_mm256_cvtepi16_ph(__A), + (__v16hf)_mm256_setzero_ph()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu16(__m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2uw128_mask( + (__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvtph_epu16(__m128i __W, __mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2uw128_mask((__v8hf)__A, (__v8hu)__W, + (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtph_epu16(__mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2uw128_mask( + (__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_cvtph_epu16(__m256h __A) { + return (__m256i)__builtin_ia32_vcvtph2uw256_mask( + (__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtph_epu16(__m256i __W, __mmask16 __U, __m256h __A) { + return (__m256i)__builtin_ia32_vcvtph2uw256_mask((__v16hf)__A, (__v16hu)__W, + (__mmask16)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtph_epu16(__mmask16 __U, __m256h __A) { + return (__m256i)__builtin_ia32_vcvtph2uw256_mask( + (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu16(__m128h __A) { + return (__m128i)__builtin_ia32_vcvttph2uw128_mask( + (__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvttph_epu16(__m128i __W, __mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvttph2uw128_mask((__v8hf)__A, (__v8hu)__W, + (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvttph_epu16(__mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvttph2uw128_mask( + (__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_cvttph_epu16(__m256h __A) { + return (__m256i)__builtin_ia32_vcvttph2uw256_mask( + (__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvttph_epu16(__m256i __W, __mmask16 __U, __m256h __A) { + return (__m256i)__builtin_ia32_vcvttph2uw256_mask((__v16hf)__A, (__v16hu)__W, + (__mmask16)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvttph_epu16(__mmask16 __U, __m256h __A) { + return (__m256i)__builtin_ia32_vcvttph2uw256_mask( + (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu16_ph(__m128i __A) { + return (__m128h) __builtin_convertvector((__v8hu)__A, __v8hf); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_mask_cvtepu16_ph(__m128h __W, __mmask8 __U, __m128i __A) { + return (__m128h)__builtin_ia32_selectph_128( + (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)__W); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtepu16_ph(__mmask8 __U, __m128i __A) { + return (__m128h)__builtin_ia32_selectph_128( + (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)_mm_setzero_ph()); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_cvtepu16_ph(__m256i __A) { + return (__m256h) __builtin_convertvector((__v16hu)__A, __v16hf); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtepu16_ph(__m256h __W, __mmask16 __U, __m256i __A) { + return (__m256h)__builtin_ia32_selectph_256( + (__mmask16)__U, (__v16hf)_mm256_cvtepu16_ph(__A), (__v16hf)__W); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtepu16_ph(__mmask16 __U, __m256i __A) { + return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, + (__v16hf)_mm256_cvtepu16_ph(__A), + (__v16hf)_mm256_setzero_ph()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi32(__m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2dq128_mask( + (__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvtph_epi32(__m128i __W, __mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2dq128_mask((__v8hf)__A, (__v4si)__W, + (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtph_epi32(__mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2dq128_mask( + (__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_cvtph_epi32(__m128h __A) { + return (__m256i)__builtin_ia32_vcvtph2dq256_mask( + (__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtph_epi32(__m256i __W, __mmask8 __U, __m128h __A) { + return (__m256i)__builtin_ia32_vcvtph2dq256_mask((__v8hf)__A, (__v8si)__W, + (__mmask8)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtph_epi32(__mmask8 __U, __m128h __A) { + return (__m256i)__builtin_ia32_vcvtph2dq256_mask( + (__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu32(__m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2udq128_mask( + (__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvtph_epu32(__m128i __W, __mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2udq128_mask((__v8hf)__A, (__v4su)__W, + (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtph_epu32(__mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2udq128_mask( + (__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_cvtph_epu32(__m128h __A) { + return (__m256i)__builtin_ia32_vcvtph2udq256_mask( + (__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtph_epu32(__m256i __W, __mmask8 __U, __m128h __A) { + return (__m256i)__builtin_ia32_vcvtph2udq256_mask((__v8hf)__A, (__v8su)__W, + (__mmask8)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtph_epu32(__mmask8 __U, __m128h __A) { + return (__m256i)__builtin_ia32_vcvtph2udq256_mask( + (__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi32_ph(__m128i __A) { + return (__m128h)__builtin_ia32_vcvtdq2ph128_mask( + (__v4si)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m128i __A) { + return (__m128h)__builtin_ia32_vcvtdq2ph128_mask((__v4si)__A, (__v8hf)__W, + (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtepi32_ph(__mmask8 __U, __m128i __A) { + return (__m128h)__builtin_ia32_vcvtdq2ph128_mask( + (__v4si)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS256 +_mm256_cvtepi32_ph(__m256i __A) { + return (__m128h) __builtin_convertvector((__v8si)__A, __v8hf); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m256i __A) { + return (__m128h)__builtin_ia32_selectph_128( + (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)__W); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtepi32_ph(__mmask8 __U, __m256i __A) { + return (__m128h)__builtin_ia32_selectph_128( + (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)_mm_setzero_ph()); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu32_ph(__m128i __A) { + return (__m128h)__builtin_ia32_vcvtudq2ph128_mask( + (__v4su)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m128i __A) { + return (__m128h)__builtin_ia32_vcvtudq2ph128_mask((__v4su)__A, (__v8hf)__W, + (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtepu32_ph(__mmask8 __U, __m128i __A) { + return (__m128h)__builtin_ia32_vcvtudq2ph128_mask( + (__v4su)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS256 +_mm256_cvtepu32_ph(__m256i __A) { + return (__m128h) __builtin_convertvector((__v8su)__A, __v8hf); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m256i __A) { + return (__m128h)__builtin_ia32_selectph_128( + (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)__W); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtepu32_ph(__mmask8 __U, __m256i __A) { + return (__m128h)__builtin_ia32_selectph_128( + (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)_mm_setzero_ph()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi32(__m128h __A) { + return (__m128i)__builtin_ia32_vcvttph2dq128_mask( + (__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvttph_epi32(__m128i __W, __mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvttph2dq128_mask((__v8hf)__A, (__v4si)__W, + (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvttph_epi32(__mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvttph2dq128_mask( + (__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_cvttph_epi32(__m128h __A) { + return (__m256i)__builtin_ia32_vcvttph2dq256_mask( + (__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvttph_epi32(__m256i __W, __mmask8 __U, __m128h __A) { + return (__m256i)__builtin_ia32_vcvttph2dq256_mask((__v8hf)__A, (__v8si)__W, + (__mmask8)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvttph_epi32(__mmask8 __U, __m128h __A) { + return (__m256i)__builtin_ia32_vcvttph2dq256_mask( + (__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu32(__m128h __A) { + return (__m128i)__builtin_ia32_vcvttph2udq128_mask( + (__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvttph_epu32(__m128i __W, __mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvttph2udq128_mask((__v8hf)__A, (__v4su)__W, + (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvttph_epu32(__mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvttph2udq128_mask( + (__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_cvttph_epu32(__m128h __A) { + return (__m256i)__builtin_ia32_vcvttph2udq256_mask( + (__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvttph_epu32(__m256i __W, __mmask8 __U, __m128h __A) { + return (__m256i)__builtin_ia32_vcvttph2udq256_mask((__v8hf)__A, (__v8su)__W, + (__mmask8)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvttph_epu32(__mmask8 __U, __m128h __A) { + return (__m256i)__builtin_ia32_vcvttph2udq256_mask( + (__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi64_ph(__m128i __A) { + return (__m128h)__builtin_ia32_vcvtqq2ph128_mask( + (__v2di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m128i __A) { + return (__m128h)__builtin_ia32_vcvtqq2ph128_mask((__v2di)__A, (__v8hf)__W, + (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtepi64_ph(__mmask8 __U, __m128i __A) { + return (__m128h)__builtin_ia32_vcvtqq2ph128_mask( + (__v2di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS256 +_mm256_cvtepi64_ph(__m256i __A) { + return (__m128h)__builtin_ia32_vcvtqq2ph256_mask( + (__v4di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m256i __A) { + return (__m128h)__builtin_ia32_vcvtqq2ph256_mask((__v4di)__A, (__v8hf)__W, + (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtepi64_ph(__mmask8 __U, __m256i __A) { + return (__m128h)__builtin_ia32_vcvtqq2ph256_mask( + (__v4di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi64(__m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2qq128_mask( + (__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvtph_epi64(__m128i __W, __mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2qq128_mask((__v8hf)__A, (__v2di)__W, + (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2qq128_mask( + (__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_cvtph_epi64(__m128h __A) { + return (__m256i)__builtin_ia32_vcvtph2qq256_mask( + (__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtph_epi64(__m256i __W, __mmask8 __U, __m128h __A) { + return (__m256i)__builtin_ia32_vcvtph2qq256_mask((__v8hf)__A, (__v4di)__W, + (__mmask8)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) { + return (__m256i)__builtin_ia32_vcvtph2qq256_mask( + (__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu64_ph(__m128i __A) { + return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask( + (__v2du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m128i __A) { + return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask((__v2du)__A, (__v8hf)__W, + (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtepu64_ph(__mmask8 __U, __m128i __A) { + return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask( + (__v2du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS256 +_mm256_cvtepu64_ph(__m256i __A) { + return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask( + (__v4du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m256i __A) { + return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask((__v4du)__A, (__v8hf)__W, + (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtepu64_ph(__mmask8 __U, __m256i __A) { + return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask( + (__v4du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu64(__m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2uqq128_mask( + (__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvtph_epu64(__m128i __W, __mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2uqq128_mask((__v8hf)__A, (__v2du)__W, + (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2uqq128_mask( + (__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_cvtph_epu64(__m128h __A) { + return (__m256i)__builtin_ia32_vcvtph2uqq256_mask( + (__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtph_epu64(__m256i __W, __mmask8 __U, __m128h __A) { + return (__m256i)__builtin_ia32_vcvtph2uqq256_mask((__v8hf)__A, (__v4du)__W, + (__mmask8)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) { + return (__m256i)__builtin_ia32_vcvtph2uqq256_mask( + (__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi64(__m128h __A) { + return (__m128i)__builtin_ia32_vcvttph2qq128_mask( + (__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvttph_epi64(__m128i __W, __mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvttph2qq128_mask((__v8hf)__A, (__v2di)__W, + (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvttph2qq128_mask( + (__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_cvttph_epi64(__m128h __A) { + return (__m256i)__builtin_ia32_vcvttph2qq256_mask( + (__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvttph_epi64(__m256i __W, __mmask8 __U, __m128h __A) { + return (__m256i)__builtin_ia32_vcvttph2qq256_mask((__v8hf)__A, (__v4di)__W, + (__mmask8)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) { + return (__m256i)__builtin_ia32_vcvttph2qq256_mask( + (__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu64(__m128h __A) { + return (__m128i)__builtin_ia32_vcvttph2uqq128_mask( + (__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvttph_epu64(__m128i __W, __mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvttph2uqq128_mask((__v8hf)__A, (__v2du)__W, + (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvttph2uqq128_mask( + (__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_cvttph_epu64(__m128h __A) { + return (__m256i)__builtin_ia32_vcvttph2uqq256_mask( + (__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvttph_epu64(__m256i __W, __mmask8 __U, __m128h __A) { + return (__m256i)__builtin_ia32_vcvttph2uqq256_mask((__v8hf)__A, (__v4du)__W, + (__mmask8)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) { + return (__m256i)__builtin_ia32_vcvttph2uqq256_mask( + (__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtxph_ps(__m128h __A) { + return (__m128)__builtin_ia32_vcvtph2psx128_mask( + (__v8hf)__A, (__v4sf)_mm_undefined_ps(), (__mmask8)-1); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtxph_ps(__m128 __W, + __mmask8 __U, + __m128h __A) { + return (__m128)__builtin_ia32_vcvtph2psx128_mask((__v8hf)__A, (__v4sf)__W, + (__mmask8)__U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtxph_ps(__mmask8 __U, __m128h __A) { + return (__m128)__builtin_ia32_vcvtph2psx128_mask( + (__v8hf)__A, (__v4sf)_mm_setzero_ps(), (__mmask8)__U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtxph_ps(__m128h __A) { + return (__m256)__builtin_ia32_vcvtph2psx256_mask( + (__v8hf)__A, (__v8sf)_mm256_undefined_ps(), (__mmask8)-1); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtxph_ps(__m256 __W, __mmask8 __U, __m128h __A) { + return (__m256)__builtin_ia32_vcvtph2psx256_mask((__v8hf)__A, (__v8sf)__W, + (__mmask8)__U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtxph_ps(__mmask8 __U, __m128h __A) { + return (__m256)__builtin_ia32_vcvtph2psx256_mask( + (__v8hf)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtxps_ph(__m128 __A) { + return (__m128h)__builtin_ia32_vcvtps2phx128_mask( + (__v4sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtxps_ph(__m128h __W, + __mmask8 __U, + __m128 __A) { + return (__m128h)__builtin_ia32_vcvtps2phx128_mask((__v4sf)__A, (__v8hf)__W, + (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtxps_ph(__mmask8 __U, __m128 __A) { + return (__m128h)__builtin_ia32_vcvtps2phx128_mask( + (__v4sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtxps_ph(__m256 __A) { + return (__m128h)__builtin_ia32_vcvtps2phx256_mask( + (__v8sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtxps_ph(__m128h __W, __mmask8 __U, __m256 __A) { + return (__m128h)__builtin_ia32_vcvtps2phx256_mask((__v8sf)__A, (__v8hf)__W, + (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtxps_ph(__mmask8 __U, __m256 __A) { + return (__m128h)__builtin_ia32_vcvtps2phx256_mask( + (__v8sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); +} + static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_blend_ph(__mmask8 __U, __m128h __A, __m128h __W) { @@ -112,6 +1406,46 @@ _mm256_permutexvar_ph(__m256i __A, __m256h __B) { return (__m256h)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A); } +static __inline__ _Float16 __DEFAULT_FN_ATTRS256 +_mm256_reduce_add_ph(__m256h __W) { + return __builtin_ia32_reduce_fadd_ph256(-0.0f16, __W); +} + +static __inline__ _Float16 __DEFAULT_FN_ATTRS256 +_mm256_reduce_mul_ph(__m256h __W) { + return __builtin_ia32_reduce_fmul_ph256(1.0f16, __W); +} + +static __inline__ _Float16 __DEFAULT_FN_ATTRS256 +_mm256_reduce_max_ph(__m256h __V) { + return __builtin_ia32_reduce_fmax_ph256(__V); +} + +static __inline__ _Float16 __DEFAULT_FN_ATTRS256 +_mm256_reduce_min_ph(__m256h __V) { + return __builtin_ia32_reduce_fmin_ph256(__V); +} + +static __inline__ _Float16 __DEFAULT_FN_ATTRS128 +_mm_reduce_add_ph(__m128h __W) { + return __builtin_ia32_reduce_fadd_ph128(-0.0f16, __W); +} + +static __inline__ _Float16 __DEFAULT_FN_ATTRS128 +_mm_reduce_mul_ph(__m128h __W) { + return __builtin_ia32_reduce_fmul_ph128(1.0f16, __W); +} + +static __inline__ _Float16 __DEFAULT_FN_ATTRS128 +_mm_reduce_max_ph(__m128h __V) { + return __builtin_ia32_reduce_fmax_ph128(__V); +} + +static __inline__ _Float16 __DEFAULT_FN_ATTRS128 +_mm_reduce_min_ph(__m128h __V) { + return __builtin_ia32_reduce_fmin_ph128(__V); +} + #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 968c10efeac0c..0519dba59081a 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -771,124 +771,124 @@ _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B) } #define _mm_cmp_epi32_mask(a, b, p) \ - (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ - (__v4si)(__m128i)(b), (int)(p), \ - (__mmask8)-1) + ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ + (__v4si)(__m128i)(b), (int)(p), \ + (__mmask8)-1)) #define _mm_mask_cmp_epi32_mask(m, a, b, p) \ - (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ - (__v4si)(__m128i)(b), (int)(p), \ - (__mmask8)(m)) + ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ + (__v4si)(__m128i)(b), (int)(p), \ + (__mmask8)(m))) #define _mm_cmp_epu32_mask(a, b, p) \ - (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ - (__v4si)(__m128i)(b), (int)(p), \ - (__mmask8)-1) + ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ + (__v4si)(__m128i)(b), (int)(p), \ + (__mmask8)-1)) #define _mm_mask_cmp_epu32_mask(m, a, b, p) \ - (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ - (__v4si)(__m128i)(b), (int)(p), \ - (__mmask8)(m)) + ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ + (__v4si)(__m128i)(b), (int)(p), \ + (__mmask8)(m))) #define _mm256_cmp_epi32_mask(a, b, p) \ - (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ - (__v8si)(__m256i)(b), (int)(p), \ - (__mmask8)-1) + ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ + (__v8si)(__m256i)(b), (int)(p), \ + (__mmask8)-1)) #define _mm256_mask_cmp_epi32_mask(m, a, b, p) \ - (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ - (__v8si)(__m256i)(b), (int)(p), \ - (__mmask8)(m)) + ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ + (__v8si)(__m256i)(b), (int)(p), \ + (__mmask8)(m))) #define _mm256_cmp_epu32_mask(a, b, p) \ - (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ - (__v8si)(__m256i)(b), (int)(p), \ - (__mmask8)-1) + ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ + (__v8si)(__m256i)(b), (int)(p), \ + (__mmask8)-1)) #define _mm256_mask_cmp_epu32_mask(m, a, b, p) \ - (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ - (__v8si)(__m256i)(b), (int)(p), \ - (__mmask8)(m)) + ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ + (__v8si)(__m256i)(b), (int)(p), \ + (__mmask8)(m))) #define _mm_cmp_epi64_mask(a, b, p) \ - (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ - (__v2di)(__m128i)(b), (int)(p), \ - (__mmask8)-1) + ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ + (__v2di)(__m128i)(b), (int)(p), \ + (__mmask8)-1)) #define _mm_mask_cmp_epi64_mask(m, a, b, p) \ - (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ - (__v2di)(__m128i)(b), (int)(p), \ - (__mmask8)(m)) + ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ + (__v2di)(__m128i)(b), (int)(p), \ + (__mmask8)(m))) #define _mm_cmp_epu64_mask(a, b, p) \ - (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ - (__v2di)(__m128i)(b), (int)(p), \ - (__mmask8)-1) + ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ + (__v2di)(__m128i)(b), (int)(p), \ + (__mmask8)-1)) #define _mm_mask_cmp_epu64_mask(m, a, b, p) \ - (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ - (__v2di)(__m128i)(b), (int)(p), \ - (__mmask8)(m)) + ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ + (__v2di)(__m128i)(b), (int)(p), \ + (__mmask8)(m))) #define _mm256_cmp_epi64_mask(a, b, p) \ - (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ - (__v4di)(__m256i)(b), (int)(p), \ - (__mmask8)-1) + ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ + (__v4di)(__m256i)(b), (int)(p), \ + (__mmask8)-1)) #define _mm256_mask_cmp_epi64_mask(m, a, b, p) \ - (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ - (__v4di)(__m256i)(b), (int)(p), \ - (__mmask8)(m)) + ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ + (__v4di)(__m256i)(b), (int)(p), \ + (__mmask8)(m))) #define _mm256_cmp_epu64_mask(a, b, p) \ - (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ - (__v4di)(__m256i)(b), (int)(p), \ - (__mmask8)-1) + ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ + (__v4di)(__m256i)(b), (int)(p), \ + (__mmask8)-1)) #define _mm256_mask_cmp_epu64_mask(m, a, b, p) \ - (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ - (__v4di)(__m256i)(b), (int)(p), \ - (__mmask8)(m)) + ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ + (__v4di)(__m256i)(b), (int)(p), \ + (__mmask8)(m))) #define _mm256_cmp_ps_mask(a, b, p) \ - (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ - (__v8sf)(__m256)(b), (int)(p), \ - (__mmask8)-1) + ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ + (__v8sf)(__m256)(b), (int)(p), \ + (__mmask8)-1)) #define _mm256_mask_cmp_ps_mask(m, a, b, p) \ - (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ - (__v8sf)(__m256)(b), (int)(p), \ - (__mmask8)(m)) + ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ + (__v8sf)(__m256)(b), (int)(p), \ + (__mmask8)(m))) #define _mm256_cmp_pd_mask(a, b, p) \ - (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ - (__v4df)(__m256d)(b), (int)(p), \ - (__mmask8)-1) + ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ + (__v4df)(__m256d)(b), (int)(p), \ + (__mmask8)-1)) #define _mm256_mask_cmp_pd_mask(m, a, b, p) \ - (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ - (__v4df)(__m256d)(b), (int)(p), \ - (__mmask8)(m)) + ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ + (__v4df)(__m256d)(b), (int)(p), \ + (__mmask8)(m))) #define _mm_cmp_ps_mask(a, b, p) \ - (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ - (__v4sf)(__m128)(b), (int)(p), \ - (__mmask8)-1) + ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ + (__v4sf)(__m128)(b), (int)(p), \ + (__mmask8)-1)) #define _mm_mask_cmp_ps_mask(m, a, b, p) \ - (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ - (__v4sf)(__m128)(b), (int)(p), \ - (__mmask8)(m)) + ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ + (__v4sf)(__m128)(b), (int)(p), \ + (__mmask8)(m))) #define _mm_cmp_pd_mask(a, b, p) \ - (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ - (__v2df)(__m128d)(b), (int)(p), \ - (__mmask8)-1) + ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ + (__v2df)(__m128d)(b), (int)(p), \ + (__mmask8)-1)) #define _mm_mask_cmp_pd_mask(m, a, b, p) \ - (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ - (__v2df)(__m128d)(b), (int)(p), \ - (__mmask8)(m)) + ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ + (__v2df)(__m128d)(b), (int)(p), \ + (__mmask8)(m))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) @@ -3289,78 +3289,78 @@ _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { } #define _mm_roundscale_pd(A, imm) \ - (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ - (int)(imm), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1) + ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ + (int)(imm), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)-1)) #define _mm_mask_roundscale_pd(W, U, A, imm) \ - (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ - (int)(imm), \ - (__v2df)(__m128d)(W), \ - (__mmask8)(U)) + ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ + (int)(imm), \ + (__v2df)(__m128d)(W), \ + (__mmask8)(U))) #define _mm_maskz_roundscale_pd(U, A, imm) \ - (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ - (int)(imm), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U)) + ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ + (int)(imm), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(U))) #define _mm256_roundscale_pd(A, imm) \ - (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ - (int)(imm), \ - (__v4df)_mm256_setzero_pd(), \ - (__mmask8)-1) + ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ + (int)(imm), \ + (__v4df)_mm256_setzero_pd(), \ + (__mmask8)-1)) #define _mm256_mask_roundscale_pd(W, U, A, imm) \ - (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ - (int)(imm), \ - (__v4df)(__m256d)(W), \ - (__mmask8)(U)) + ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ + (int)(imm), \ + (__v4df)(__m256d)(W), \ + (__mmask8)(U))) #define _mm256_maskz_roundscale_pd(U, A, imm) \ - (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ - (int)(imm), \ - (__v4df)_mm256_setzero_pd(), \ - (__mmask8)(U)) + ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ + (int)(imm), \ + (__v4df)_mm256_setzero_pd(), \ + (__mmask8)(U))) #define _mm_roundscale_ps(A, imm) \ - (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1) + ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)-1)) #define _mm_mask_roundscale_ps(W, U, A, imm) \ - (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ - (__v4sf)(__m128)(W), \ - (__mmask8)(U)) + ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ + (__v4sf)(__m128)(W), \ + (__mmask8)(U))) #define _mm_maskz_roundscale_ps(U, A, imm) \ - (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U)) + ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U))) #define _mm256_roundscale_ps(A, imm) \ - (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ - (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)-1) + ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ + (__v8sf)_mm256_setzero_ps(), \ + (__mmask8)-1)) #define _mm256_mask_roundscale_ps(W, U, A, imm) \ - (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ - (__v8sf)(__m256)(W), \ - (__mmask8)(U)) + ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ + (__v8sf)(__m256)(W), \ + (__mmask8)(U))) #define _mm256_maskz_roundscale_ps(U, A, imm) \ - (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ - (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U)) + ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ + (__v8sf)_mm256_setzero_ps(), \ + (__mmask8)(U))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_pd (__m128d __A, __m128d __B) { @@ -4298,56 +4298,56 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { #define _mm_rol_epi32(a, b) \ - (__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b)) + ((__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b))) #define _mm_mask_rol_epi32(w, u, a, b) \ - (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ - (__v4si)_mm_rol_epi32((a), (b)), \ - (__v4si)(__m128i)(w)) + ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ + (__v4si)_mm_rol_epi32((a), (b)), \ + (__v4si)(__m128i)(w))) #define _mm_maskz_rol_epi32(u, a, b) \ - (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ - (__v4si)_mm_rol_epi32((a), (b)), \ - (__v4si)_mm_setzero_si128()) + ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ + (__v4si)_mm_rol_epi32((a), (b)), \ + (__v4si)_mm_setzero_si128())) #define _mm256_rol_epi32(a, b) \ - (__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b)) + ((__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b))) #define _mm256_mask_rol_epi32(w, u, a, b) \ - (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ - (__v8si)_mm256_rol_epi32((a), (b)), \ - (__v8si)(__m256i)(w)) + ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ + (__v8si)_mm256_rol_epi32((a), (b)), \ + (__v8si)(__m256i)(w))) #define _mm256_maskz_rol_epi32(u, a, b) \ - (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ - (__v8si)_mm256_rol_epi32((a), (b)), \ - (__v8si)_mm256_setzero_si256()) + ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ + (__v8si)_mm256_rol_epi32((a), (b)), \ + (__v8si)_mm256_setzero_si256())) #define _mm_rol_epi64(a, b) \ - (__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b)) + ((__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b))) #define _mm_mask_rol_epi64(w, u, a, b) \ - (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ - (__v2di)_mm_rol_epi64((a), (b)), \ - (__v2di)(__m128i)(w)) + ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ + (__v2di)_mm_rol_epi64((a), (b)), \ + (__v2di)(__m128i)(w))) #define _mm_maskz_rol_epi64(u, a, b) \ - (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ - (__v2di)_mm_rol_epi64((a), (b)), \ - (__v2di)_mm_setzero_si128()) + ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ + (__v2di)_mm_rol_epi64((a), (b)), \ + (__v2di)_mm_setzero_si128())) #define _mm256_rol_epi64(a, b) \ - (__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b)) + ((__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b))) #define _mm256_mask_rol_epi64(w, u, a, b) \ - (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ - (__v4di)_mm256_rol_epi64((a), (b)), \ - (__v4di)(__m256i)(w)) + ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ + (__v4di)_mm256_rol_epi64((a), (b)), \ + (__v4di)(__m256i)(w))) #define _mm256_maskz_rol_epi64(u, a, b) \ - (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ - (__v4di)_mm256_rol_epi64((a), (b)), \ - (__v4di)_mm256_setzero_si256()) + ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ + (__v4di)_mm256_rol_epi64((a), (b)), \ + (__v4di)_mm256_setzero_si256())) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rolv_epi32 (__m128i __A, __m128i __B) @@ -4438,56 +4438,56 @@ _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) } #define _mm_ror_epi32(a, b) \ - (__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b)) + ((__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b))) #define _mm_mask_ror_epi32(w, u, a, b) \ - (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ - (__v4si)_mm_ror_epi32((a), (b)), \ - (__v4si)(__m128i)(w)) + ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ + (__v4si)_mm_ror_epi32((a), (b)), \ + (__v4si)(__m128i)(w))) #define _mm_maskz_ror_epi32(u, a, b) \ - (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ - (__v4si)_mm_ror_epi32((a), (b)), \ - (__v4si)_mm_setzero_si128()) + ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ + (__v4si)_mm_ror_epi32((a), (b)), \ + (__v4si)_mm_setzero_si128())) #define _mm256_ror_epi32(a, b) \ - (__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b)) + ((__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b))) #define _mm256_mask_ror_epi32(w, u, a, b) \ - (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ - (__v8si)_mm256_ror_epi32((a), (b)), \ - (__v8si)(__m256i)(w)) + ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ + (__v8si)_mm256_ror_epi32((a), (b)), \ + (__v8si)(__m256i)(w))) #define _mm256_maskz_ror_epi32(u, a, b) \ - (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ - (__v8si)_mm256_ror_epi32((a), (b)), \ - (__v8si)_mm256_setzero_si256()) + ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ + (__v8si)_mm256_ror_epi32((a), (b)), \ + (__v8si)_mm256_setzero_si256())) #define _mm_ror_epi64(a, b) \ - (__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b)) + ((__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b))) #define _mm_mask_ror_epi64(w, u, a, b) \ - (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ - (__v2di)_mm_ror_epi64((a), (b)), \ - (__v2di)(__m128i)(w)) + ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ + (__v2di)_mm_ror_epi64((a), (b)), \ + (__v2di)(__m128i)(w))) #define _mm_maskz_ror_epi64(u, a, b) \ - (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ - (__v2di)_mm_ror_epi64((a), (b)), \ - (__v2di)_mm_setzero_si128()) + ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ + (__v2di)_mm_ror_epi64((a), (b)), \ + (__v2di)_mm_setzero_si128())) #define _mm256_ror_epi64(a, b) \ - (__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b)) + ((__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b))) #define _mm256_mask_ror_epi64(w, u, a, b) \ - (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ - (__v4di)_mm256_ror_epi64((a), (b)), \ - (__v4di)(__m256i)(w)) + ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ + (__v4di)_mm256_ror_epi64((a), (b)), \ + (__v4di)(__m256i)(w))) #define _mm256_maskz_ror_epi64(u, a, b) \ - (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ - (__v4di)_mm256_ror_epi64((a), (b)), \ - (__v4di)_mm256_setzero_si256()) + ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ + (__v4di)_mm256_ror_epi64((a), (b)), \ + (__v4di)_mm256_setzero_si256())) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) @@ -5356,76 +5356,76 @@ _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A) } #define _mm_fixupimm_pd(A, B, C, imm) \ - (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)-1) + ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2di)(__m128i)(C), (int)(imm), \ + (__mmask8)-1)) #define _mm_mask_fixupimm_pd(A, U, B, C, imm) \ - (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)(U)) + ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2di)(__m128i)(C), (int)(imm), \ + (__mmask8)(U))) #define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \ - (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2di)(__m128i)(C), \ - (int)(imm), (__mmask8)(U)) + ((__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2di)(__m128i)(C), \ + (int)(imm), (__mmask8)(U))) #define _mm256_fixupimm_pd(A, B, C, imm) \ - (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ - (__v4df)(__m256d)(B), \ - (__v4di)(__m256i)(C), (int)(imm), \ - (__mmask8)-1) + ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ + (__v4df)(__m256d)(B), \ + (__v4di)(__m256i)(C), (int)(imm), \ + (__mmask8)-1)) #define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \ - (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ - (__v4df)(__m256d)(B), \ - (__v4di)(__m256i)(C), (int)(imm), \ - (__mmask8)(U)) + ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ + (__v4df)(__m256d)(B), \ + (__v4di)(__m256i)(C), (int)(imm), \ + (__mmask8)(U))) #define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \ - (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \ - (__v4df)(__m256d)(B), \ - (__v4di)(__m256i)(C), \ - (int)(imm), (__mmask8)(U)) + ((__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \ + (__v4df)(__m256d)(B), \ + (__v4di)(__m256i)(C), \ + (int)(imm), (__mmask8)(U))) #define _mm_fixupimm_ps(A, B, C, imm) \ - (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)-1) + ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4si)(__m128i)(C), (int)(imm), \ + (__mmask8)-1)) #define _mm_mask_fixupimm_ps(A, U, B, C, imm) \ - (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)(U)) - -#define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \ - (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \ + ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)(U)) + (__mmask8)(U))) + +#define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \ + ((__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4si)(__m128i)(C), (int)(imm), \ + (__mmask8)(U))) #define _mm256_fixupimm_ps(A, B, C, imm) \ - (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ - (__v8sf)(__m256)(B), \ - (__v8si)(__m256i)(C), (int)(imm), \ - (__mmask8)-1) + ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ + (__v8sf)(__m256)(B), \ + (__v8si)(__m256i)(C), (int)(imm), \ + (__mmask8)-1)) #define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \ - (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ - (__v8sf)(__m256)(B), \ - (__v8si)(__m256i)(C), (int)(imm), \ - (__mmask8)(U)) - -#define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \ - (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \ + ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ (__v8sf)(__m256)(B), \ (__v8si)(__m256i)(C), (int)(imm), \ - (__mmask8)(U)) + (__mmask8)(U))) + +#define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \ + ((__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \ + (__v8sf)(__m256)(B), \ + (__v8si)(__m256i)(C), (int)(imm), \ + (__mmask8)(U))) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P) @@ -6033,44 +6033,44 @@ _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A) } #define _mm_mask_permute_pd(W, U, X, C) \ - (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ - (__v2df)_mm_permute_pd((X), (C)), \ - (__v2df)(__m128d)(W)) + ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ + (__v2df)_mm_permute_pd((X), (C)), \ + (__v2df)(__m128d)(W))) #define _mm_maskz_permute_pd(U, X, C) \ - (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ - (__v2df)_mm_permute_pd((X), (C)), \ - (__v2df)_mm_setzero_pd()) + ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ + (__v2df)_mm_permute_pd((X), (C)), \ + (__v2df)_mm_setzero_pd())) #define _mm256_mask_permute_pd(W, U, X, C) \ - (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ - (__v4df)_mm256_permute_pd((X), (C)), \ - (__v4df)(__m256d)(W)) + ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ + (__v4df)_mm256_permute_pd((X), (C)), \ + (__v4df)(__m256d)(W))) #define _mm256_maskz_permute_pd(U, X, C) \ - (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ - (__v4df)_mm256_permute_pd((X), (C)), \ - (__v4df)_mm256_setzero_pd()) + ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ + (__v4df)_mm256_permute_pd((X), (C)), \ + (__v4df)_mm256_setzero_pd())) #define _mm_mask_permute_ps(W, U, X, C) \ - (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ - (__v4sf)_mm_permute_ps((X), (C)), \ - (__v4sf)(__m128)(W)) + ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ + (__v4sf)_mm_permute_ps((X), (C)), \ + (__v4sf)(__m128)(W))) #define _mm_maskz_permute_ps(U, X, C) \ - (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ - (__v4sf)_mm_permute_ps((X), (C)), \ - (__v4sf)_mm_setzero_ps()) + ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ + (__v4sf)_mm_permute_ps((X), (C)), \ + (__v4sf)_mm_setzero_ps())) #define _mm256_mask_permute_ps(W, U, X, C) \ - (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ - (__v8sf)_mm256_permute_ps((X), (C)), \ - (__v8sf)(__m256)(W)) + ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ + (__v8sf)_mm256_permute_ps((X), (C)), \ + (__v8sf)(__m256)(W))) #define _mm256_maskz_permute_ps(U, X, C) \ - (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ - (__v8sf)_mm256_permute_ps((X), (C)), \ - (__v8sf)_mm256_setzero_ps()) + ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ + (__v8sf)_mm256_permute_ps((X), (C)), \ + (__v8sf)_mm256_setzero_ps())) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) @@ -6526,175 +6526,175 @@ _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm) } #define _mm_ternarylogic_epi32(A, B, C, imm) \ - (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), \ - (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)-1) + ((__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \ + (__v4si)(__m128i)(B), \ + (__v4si)(__m128i)(C), (int)(imm), \ + (__mmask8)-1)) #define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \ - (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), \ - (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)(U)) - -#define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \ - (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \ + ((__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \ (__v4si)(__m128i)(B), \ (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)(U)) + (__mmask8)(U))) + +#define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \ + ((__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \ + (__v4si)(__m128i)(B), \ + (__v4si)(__m128i)(C), (int)(imm), \ + (__mmask8)(U))) #define _mm256_ternarylogic_epi32(A, B, C, imm) \ - (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \ - (__v8si)(__m256i)(B), \ - (__v8si)(__m256i)(C), (int)(imm), \ - (__mmask8)-1) + ((__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \ + (__v8si)(__m256i)(B), \ + (__v8si)(__m256i)(C), (int)(imm), \ + (__mmask8)-1)) #define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \ - (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \ - (__v8si)(__m256i)(B), \ - (__v8si)(__m256i)(C), (int)(imm), \ - (__mmask8)(U)) - -#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \ - (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \ + ((__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \ (__v8si)(__m256i)(B), \ (__v8si)(__m256i)(C), (int)(imm), \ - (__mmask8)(U)) + (__mmask8)(U))) + +#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \ + ((__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \ + (__v8si)(__m256i)(B), \ + (__v8si)(__m256i)(C), (int)(imm), \ + (__mmask8)(U))) #define _mm_ternarylogic_epi64(A, B, C, imm) \ - (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), \ - (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)-1) + ((__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \ + (__v2di)(__m128i)(B), \ + (__v2di)(__m128i)(C), (int)(imm), \ + (__mmask8)-1)) #define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \ - (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), \ - (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)(U)) - -#define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \ - (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \ + ((__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \ (__v2di)(__m128i)(B), \ (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)(U)) + (__mmask8)(U))) + +#define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \ + ((__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \ + (__v2di)(__m128i)(B), \ + (__v2di)(__m128i)(C), (int)(imm), \ + (__mmask8)(U))) #define _mm256_ternarylogic_epi64(A, B, C, imm) \ - (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), \ - (__v4di)(__m256i)(C), (int)(imm), \ - (__mmask8)-1) + ((__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \ + (__v4di)(__m256i)(B), \ + (__v4di)(__m256i)(C), (int)(imm), \ + (__mmask8)-1)) #define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \ - (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), \ - (__v4di)(__m256i)(C), (int)(imm), \ - (__mmask8)(U)) - -#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \ - (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \ + ((__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \ (__v4di)(__m256i)(B), \ (__v4di)(__m256i)(C), (int)(imm), \ - (__mmask8)(U)) + (__mmask8)(U))) + +#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \ + ((__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \ + (__v4di)(__m256i)(B), \ + (__v4di)(__m256i)(C), (int)(imm), \ + (__mmask8)(U))) #define _mm256_shuffle_f32x4(A, B, imm) \ - (__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \ - (__v8sf)(__m256)(B), (int)(imm)) + ((__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \ + (__v8sf)(__m256)(B), (int)(imm))) #define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \ - (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ - (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \ - (__v8sf)(__m256)(W)) + ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ + (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \ + (__v8sf)(__m256)(W))) #define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \ - (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ - (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \ - (__v8sf)_mm256_setzero_ps()) + ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ + (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \ + (__v8sf)_mm256_setzero_ps())) #define _mm256_shuffle_f64x2(A, B, imm) \ - (__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \ - (__v4df)(__m256d)(B), (int)(imm)) + ((__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \ + (__v4df)(__m256d)(B), (int)(imm))) #define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \ - (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ - (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \ - (__v4df)(__m256d)(W)) + ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ + (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \ + (__v4df)(__m256d)(W))) #define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \ - (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ - (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \ - (__v4df)_mm256_setzero_pd()) + ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ + (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \ + (__v4df)_mm256_setzero_pd())) #define _mm256_shuffle_i32x4(A, B, imm) \ - (__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \ - (__v8si)(__m256i)(B), (int)(imm)) + ((__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \ + (__v8si)(__m256i)(B), (int)(imm))) #define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \ - (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ - (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \ - (__v8si)(__m256i)(W)) + ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \ + (__v8si)(__m256i)(W))) #define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \ - (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ - (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \ - (__v8si)_mm256_setzero_si256()) + ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \ + (__v8si)_mm256_setzero_si256())) #define _mm256_shuffle_i64x2(A, B, imm) \ - (__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), (int)(imm)) + ((__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \ + (__v4di)(__m256i)(B), (int)(imm))) #define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \ - (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ - (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \ - (__v4di)(__m256i)(W)) + ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \ + (__v4di)(__m256i)(W))) #define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \ - (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ - (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \ - (__v4di)_mm256_setzero_si256()) + ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \ + (__v4di)_mm256_setzero_si256())) #define _mm_mask_shuffle_pd(W, U, A, B, M) \ - (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ - (__v2df)_mm_shuffle_pd((A), (B), (M)), \ - (__v2df)(__m128d)(W)) + ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ + (__v2df)_mm_shuffle_pd((A), (B), (M)), \ + (__v2df)(__m128d)(W))) #define _mm_maskz_shuffle_pd(U, A, B, M) \ - (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ - (__v2df)_mm_shuffle_pd((A), (B), (M)), \ - (__v2df)_mm_setzero_pd()) + ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ + (__v2df)_mm_shuffle_pd((A), (B), (M)), \ + (__v2df)_mm_setzero_pd())) #define _mm256_mask_shuffle_pd(W, U, A, B, M) \ - (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ - (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ - (__v4df)(__m256d)(W)) + ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ + (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ + (__v4df)(__m256d)(W))) #define _mm256_maskz_shuffle_pd(U, A, B, M) \ - (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ - (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ - (__v4df)_mm256_setzero_pd()) + ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ + (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ + (__v4df)_mm256_setzero_pd())) #define _mm_mask_shuffle_ps(W, U, A, B, M) \ - (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ - (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ - (__v4sf)(__m128)(W)) + ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ + (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ + (__v4sf)(__m128)(W))) #define _mm_maskz_shuffle_ps(U, A, B, M) \ - (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ - (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ - (__v4sf)_mm_setzero_ps()) + ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ + (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ + (__v4sf)_mm_setzero_ps())) #define _mm256_mask_shuffle_ps(W, U, A, B, M) \ - (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ - (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ - (__v8sf)(__m256)(W)) + ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ + (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ + (__v8sf)(__m256)(W))) #define _mm256_maskz_shuffle_ps(U, A, B, M) \ - (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ - (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ - (__v8sf)_mm256_setzero_ps()) + ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ + (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ + (__v8sf)_mm256_setzero_ps())) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_pd (__m128d __A) @@ -7834,262 +7834,262 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) } #define _mm256_extractf32x4_ps(A, imm) \ - (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ - (int)(imm), \ - (__v4sf)_mm_undefined_ps(), \ - (__mmask8)-1) + ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ + (int)(imm), \ + (__v4sf)_mm_undefined_ps(), \ + (__mmask8)-1)) #define _mm256_mask_extractf32x4_ps(W, U, A, imm) \ - (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ - (int)(imm), \ - (__v4sf)(__m128)(W), \ - (__mmask8)(U)) + ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ + (int)(imm), \ + (__v4sf)(__m128)(W), \ + (__mmask8)(U))) #define _mm256_maskz_extractf32x4_ps(U, A, imm) \ - (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ - (int)(imm), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U)) + ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ + (int)(imm), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U))) #define _mm256_extracti32x4_epi32(A, imm) \ - (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ - (int)(imm), \ - (__v4si)_mm_undefined_si128(), \ - (__mmask8)-1) + ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ + (int)(imm), \ + (__v4si)_mm_undefined_si128(), \ + (__mmask8)-1)) #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \ - (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ - (int)(imm), \ - (__v4si)(__m128i)(W), \ - (__mmask8)(U)) + ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ + (int)(imm), \ + (__v4si)(__m128i)(W), \ + (__mmask8)(U))) #define _mm256_maskz_extracti32x4_epi32(U, A, imm) \ - (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ - (int)(imm), \ - (__v4si)_mm_setzero_si128(), \ - (__mmask8)(U)) + ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ + (int)(imm), \ + (__v4si)_mm_setzero_si128(), \ + (__mmask8)(U))) #define _mm256_insertf32x4(A, B, imm) \ - (__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \ - (__v4sf)(__m128)(B), (int)(imm)) + ((__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \ + (__v4sf)(__m128)(B), (int)(imm))) #define _mm256_mask_insertf32x4(W, U, A, B, imm) \ - (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ + ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ - (__v8sf)(__m256)(W)) + (__v8sf)(__m256)(W))) #define _mm256_maskz_insertf32x4(U, A, B, imm) \ - (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ + ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ - (__v8sf)_mm256_setzero_ps()) + (__v8sf)_mm256_setzero_ps())) #define _mm256_inserti32x4(A, B, imm) \ - (__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \ - (__v4si)(__m128i)(B), (int)(imm)) + ((__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \ + (__v4si)(__m128i)(B), (int)(imm))) #define _mm256_mask_inserti32x4(W, U, A, B, imm) \ - (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ - (__v8si)(__m256i)(W)) + (__v8si)(__m256i)(W))) #define _mm256_maskz_inserti32x4(U, A, B, imm) \ - (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ - (__v8si)_mm256_setzero_si256()) + (__v8si)_mm256_setzero_si256())) #define _mm_getmant_pd(A, B, C) \ - (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ - (int)(((C)<<2) | (B)), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1) + ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ + (int)(((C)<<2) | (B)), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)-1)) #define _mm_mask_getmant_pd(W, U, A, B, C) \ - (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ - (int)(((C)<<2) | (B)), \ - (__v2df)(__m128d)(W), \ - (__mmask8)(U)) + ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ + (int)(((C)<<2) | (B)), \ + (__v2df)(__m128d)(W), \ + (__mmask8)(U))) #define _mm_maskz_getmant_pd(U, A, B, C) \ - (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ - (int)(((C)<<2) | (B)), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U)) + ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ + (int)(((C)<<2) | (B)), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(U))) #define _mm256_getmant_pd(A, B, C) \ - (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ - (int)(((C)<<2) | (B)), \ - (__v4df)_mm256_setzero_pd(), \ - (__mmask8)-1) + ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ + (int)(((C)<<2) | (B)), \ + (__v4df)_mm256_setzero_pd(), \ + (__mmask8)-1)) #define _mm256_mask_getmant_pd(W, U, A, B, C) \ - (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ - (int)(((C)<<2) | (B)), \ - (__v4df)(__m256d)(W), \ - (__mmask8)(U)) + ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ + (int)(((C)<<2) | (B)), \ + (__v4df)(__m256d)(W), \ + (__mmask8)(U))) #define _mm256_maskz_getmant_pd(U, A, B, C) \ - (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ - (int)(((C)<<2) | (B)), \ - (__v4df)_mm256_setzero_pd(), \ - (__mmask8)(U)) + ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ + (int)(((C)<<2) | (B)), \ + (__v4df)_mm256_setzero_pd(), \ + (__mmask8)(U))) #define _mm_getmant_ps(A, B, C) \ - (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ - (int)(((C)<<2) | (B)), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1) + ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ + (int)(((C)<<2) | (B)), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)-1)) #define _mm_mask_getmant_ps(W, U, A, B, C) \ - (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ - (int)(((C)<<2) | (B)), \ - (__v4sf)(__m128)(W), \ - (__mmask8)(U)) + ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ + (int)(((C)<<2) | (B)), \ + (__v4sf)(__m128)(W), \ + (__mmask8)(U))) #define _mm_maskz_getmant_ps(U, A, B, C) \ - (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ - (int)(((C)<<2) | (B)), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U)) + ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ + (int)(((C)<<2) | (B)), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U))) #define _mm256_getmant_ps(A, B, C) \ - (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ - (int)(((C)<<2) | (B)), \ - (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)-1) + ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ + (int)(((C)<<2) | (B)), \ + (__v8sf)_mm256_setzero_ps(), \ + (__mmask8)-1)) #define _mm256_mask_getmant_ps(W, U, A, B, C) \ - (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ - (int)(((C)<<2) | (B)), \ - (__v8sf)(__m256)(W), \ - (__mmask8)(U)) + ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ + (int)(((C)<<2) | (B)), \ + (__v8sf)(__m256)(W), \ + (__mmask8)(U))) #define _mm256_maskz_getmant_ps(U, A, B, C) \ - (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ - (int)(((C)<<2) | (B)), \ - (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U)) + ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ + (int)(((C)<<2) | (B)), \ + (__v8sf)_mm256_setzero_ps(), \ + (__mmask8)(U))) #define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \ - (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \ - (void const *)(addr), \ - (__v2di)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)) + ((__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \ + (void const *)(addr), \ + (__v2di)(__m128i)(index), \ + (__mmask8)(mask), (int)(scale))) #define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \ - (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \ - (void const *)(addr), \ - (__v2di)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)) + ((__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \ + (void const *)(addr), \ + (__v2di)(__m128i)(index), \ + (__mmask8)(mask), (int)(scale))) #define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \ - (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \ - (void const *)(addr), \ - (__v4di)(__m256i)(index), \ - (__mmask8)(mask), (int)(scale)) + ((__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \ + (void const *)(addr), \ + (__v4di)(__m256i)(index), \ + (__mmask8)(mask), (int)(scale))) #define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \ - (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \ - (void const *)(addr), \ - (__v4di)(__m256i)(index), \ - (__mmask8)(mask), (int)(scale)) + ((__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \ + (void const *)(addr), \ + (__v4di)(__m256i)(index), \ + (__mmask8)(mask), (int)(scale))) #define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \ - (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \ - (void const *)(addr), \ - (__v2di)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)) - -#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \ - (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \ + ((__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \ (void const *)(addr), \ (__v2di)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)) + (__mmask8)(mask), (int)(scale))) -#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \ - (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \ - (void const *)(addr), \ - (__v4di)(__m256i)(index), \ - (__mmask8)(mask), (int)(scale)) +#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \ + ((__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \ + (void const *)(addr), \ + (__v2di)(__m128i)(index), \ + (__mmask8)(mask), (int)(scale))) -#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \ - (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \ +#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \ + ((__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \ (void const *)(addr), \ (__v4di)(__m256i)(index), \ - (__mmask8)(mask), (int)(scale)) + (__mmask8)(mask), (int)(scale))) + +#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \ + ((__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \ + (void const *)(addr), \ + (__v4di)(__m256i)(index), \ + (__mmask8)(mask), (int)(scale))) #define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \ - (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \ - (void const *)(addr), \ - (__v4si)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)) + ((__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \ + (void const *)(addr), \ + (__v4si)(__m128i)(index), \ + (__mmask8)(mask), (int)(scale))) #define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \ - (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \ - (void const *)(addr), \ - (__v4si)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)) + ((__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \ + (void const *)(addr), \ + (__v4si)(__m128i)(index), \ + (__mmask8)(mask), (int)(scale))) #define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \ - (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \ - (void const *)(addr), \ - (__v4si)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)) + ((__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \ + (void const *)(addr), \ + (__v4si)(__m128i)(index), \ + (__mmask8)(mask), (int)(scale))) #define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \ - (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \ - (void const *)(addr), \ - (__v4si)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)) + ((__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \ + (void const *)(addr), \ + (__v4si)(__m128i)(index), \ + (__mmask8)(mask), (int)(scale))) #define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \ - (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \ - (void const *)(addr), \ - (__v4si)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)) - -#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \ - (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \ + ((__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \ (void const *)(addr), \ (__v4si)(__m128i)(index), \ - (__mmask8)(mask), (int)(scale)) + (__mmask8)(mask), (int)(scale))) -#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \ - (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \ - (void const *)(addr), \ - (__v8si)(__m256i)(index), \ - (__mmask8)(mask), (int)(scale)) +#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \ + ((__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \ + (void const *)(addr), \ + (__v4si)(__m128i)(index), \ + (__mmask8)(mask), (int)(scale))) -#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \ - (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \ +#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \ + ((__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \ (void const *)(addr), \ (__v8si)(__m256i)(index), \ - (__mmask8)(mask), (int)(scale)) + (__mmask8)(mask), (int)(scale))) + +#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \ + ((__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \ + (void const *)(addr), \ + (__v8si)(__m256i)(index), \ + (__mmask8)(mask), (int)(scale))) #define _mm256_permutex_pd(X, C) \ - (__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C)) + ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C))) #define _mm256_mask_permutex_pd(W, U, X, C) \ - (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ + ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_permutex_pd((X), (C)), \ - (__v4df)(__m256d)(W)) + (__v4df)(__m256d)(W))) #define _mm256_maskz_permutex_pd(U, X, C) \ - (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ - (__v4df)_mm256_permutex_pd((X), (C)), \ - (__v4df)_mm256_setzero_pd()) + ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ + (__v4df)_mm256_permutex_pd((X), (C)), \ + (__v4df)_mm256_setzero_pd())) #define _mm256_permutex_epi64(X, C) \ - (__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C)) + ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C))) #define _mm256_mask_permutex_epi64(W, U, X, C) \ - (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_permutex_epi64((X), (C)), \ - (__v4di)(__m256i)(W)) + (__v4di)(__m256i)(W))) #define _mm256_maskz_permutex_epi64(U, X, C) \ - (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_permutex_epi64((X), (C)), \ - (__v4di)_mm256_setzero_si256()) + (__v4di)_mm256_setzero_si256())) static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_permutexvar_pd (__m256i __X, __m256d __Y) @@ -8175,60 +8175,60 @@ _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) } #define _mm_alignr_epi32(A, B, imm) \ - (__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), (int)(imm)) + ((__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \ + (__v4si)(__m128i)(B), (int)(imm))) #define _mm_mask_alignr_epi32(W, U, A, B, imm) \ - (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ - (__v4si)(__m128i)(W)) + (__v4si)(__m128i)(W))) #define _mm_maskz_alignr_epi32(U, A, B, imm) \ - (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ - (__v4si)_mm_setzero_si128()) + (__v4si)_mm_setzero_si128())) #define _mm256_alignr_epi32(A, B, imm) \ - (__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \ - (__v8si)(__m256i)(B), (int)(imm)) + ((__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \ + (__v8si)(__m256i)(B), (int)(imm))) #define _mm256_mask_alignr_epi32(W, U, A, B, imm) \ - (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ - (__v8si)(__m256i)(W)) + (__v8si)(__m256i)(W))) #define _mm256_maskz_alignr_epi32(U, A, B, imm) \ - (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ - (__v8si)_mm256_setzero_si256()) + (__v8si)_mm256_setzero_si256())) #define _mm_alignr_epi64(A, B, imm) \ - (__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), (int)(imm)) + ((__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \ + (__v2di)(__m128i)(B), (int)(imm))) #define _mm_mask_alignr_epi64(W, U, A, B, imm) \ - (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ + ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ - (__v2di)(__m128i)(W)) + (__v2di)(__m128i)(W))) #define _mm_maskz_alignr_epi64(U, A, B, imm) \ - (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ + ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ - (__v2di)_mm_setzero_si128()) + (__v2di)_mm_setzero_si128())) #define _mm256_alignr_epi64(A, B, imm) \ - (__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), (int)(imm)) + ((__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \ + (__v4di)(__m256i)(B), (int)(imm))) #define _mm256_mask_alignr_epi64(W, U, A, B, imm) \ - (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ - (__v4di)(__m256i)(W)) + (__v4di)(__m256i)(W))) #define _mm256_maskz_alignr_epi64(U, A, B, imm) \ - (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ - (__v4di)_mm256_setzero_si256()) + (__v4di)_mm256_setzero_si256())) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A) @@ -8295,24 +8295,24 @@ _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) } #define _mm256_mask_shuffle_epi32(W, U, A, I) \ - (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ - (__v8si)_mm256_shuffle_epi32((A), (I)), \ - (__v8si)(__m256i)(W)) + ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_shuffle_epi32((A), (I)), \ + (__v8si)(__m256i)(W))) #define _mm256_maskz_shuffle_epi32(U, A, I) \ - (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ - (__v8si)_mm256_shuffle_epi32((A), (I)), \ - (__v8si)_mm256_setzero_si256()) + ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_shuffle_epi32((A), (I)), \ + (__v8si)_mm256_setzero_si256())) #define _mm_mask_shuffle_epi32(W, U, A, I) \ - (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ - (__v4si)_mm_shuffle_epi32((A), (I)), \ - (__v4si)(__m128i)(W)) + ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + (__v4si)_mm_shuffle_epi32((A), (I)), \ + (__v4si)(__m128i)(W))) #define _mm_maskz_shuffle_epi32(U, A, I) \ - (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ - (__v4si)_mm_shuffle_epi32((A), (I)), \ - (__v4si)_mm_setzero_si128()) + ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + (__v4si)_mm_shuffle_epi32((A), (I)), \ + (__v4si)_mm_setzero_si128())) static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A) @@ -8413,27 +8413,27 @@ _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A) } #define _mm_mask_cvt_roundps_ph(W, U, A, I) \ - (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ - (__v8hi)(__m128i)(W), \ - (__mmask8)(U)) + ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ + (__v8hi)(__m128i)(W), \ + (__mmask8)(U))) #define _mm_maskz_cvt_roundps_ph(U, A, I) \ - (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ - (__v8hi)_mm_setzero_si128(), \ - (__mmask8)(U)) + ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ + (__v8hi)_mm_setzero_si128(), \ + (__mmask8)(U))) #define _mm_mask_cvtps_ph _mm_mask_cvt_roundps_ph #define _mm_maskz_cvtps_ph _mm_maskz_cvt_roundps_ph #define _mm256_mask_cvt_roundps_ph(W, U, A, I) \ - (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ - (__v8hi)(__m128i)(W), \ - (__mmask8)(U)) + ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ + (__v8hi)(__m128i)(W), \ + (__mmask8)(U))) #define _mm256_maskz_cvt_roundps_ph(U, A, I) \ - (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ - (__v8hi)_mm_setzero_si128(), \ - (__mmask8)(U)) + ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ + (__v8hi)_mm_setzero_si128(), \ + (__mmask8)(U))) #define _mm256_mask_cvtps_ph _mm256_mask_cvt_roundps_ph #define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph diff --git a/clang/lib/Headers/avx512vlvbmi2intrin.h b/clang/lib/Headers/avx512vlvbmi2intrin.h index a40f926de75ab..fac1f232415af 100644 --- a/clang/lib/Headers/avx512vlvbmi2intrin.h +++ b/clang/lib/Headers/avx512vlvbmi2intrin.h @@ -239,172 +239,172 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) } #define _mm256_shldi_epi64(A, B, I) \ - (__m256i)__builtin_ia32_vpshldq256((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), (int)(I)) + ((__m256i)__builtin_ia32_vpshldq256((__v4di)(__m256i)(A), \ + (__v4di)(__m256i)(B), (int)(I))) #define _mm256_mask_shldi_epi64(S, U, A, B, I) \ - (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ - (__v4di)_mm256_shldi_epi64((A), (B), (I)), \ - (__v4di)(__m256i)(S)) + ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + (__v4di)_mm256_shldi_epi64((A), (B), (I)), \ + (__v4di)(__m256i)(S))) #define _mm256_maskz_shldi_epi64(U, A, B, I) \ - (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ - (__v4di)_mm256_shldi_epi64((A), (B), (I)), \ - (__v4di)_mm256_setzero_si256()) + ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + (__v4di)_mm256_shldi_epi64((A), (B), (I)), \ + (__v4di)_mm256_setzero_si256())) #define _mm_shldi_epi64(A, B, I) \ - (__m128i)__builtin_ia32_vpshldq128((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), (int)(I)) + ((__m128i)__builtin_ia32_vpshldq128((__v2di)(__m128i)(A), \ + (__v2di)(__m128i)(B), (int)(I))) #define _mm_mask_shldi_epi64(S, U, A, B, I) \ - (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ - (__v2di)_mm_shldi_epi64((A), (B), (I)), \ - (__v2di)(__m128i)(S)) + ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ + (__v2di)_mm_shldi_epi64((A), (B), (I)), \ + (__v2di)(__m128i)(S))) #define _mm_maskz_shldi_epi64(U, A, B, I) \ - (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ - (__v2di)_mm_shldi_epi64((A), (B), (I)), \ - (__v2di)_mm_setzero_si128()) + ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ + (__v2di)_mm_shldi_epi64((A), (B), (I)), \ + (__v2di)_mm_setzero_si128())) #define _mm256_shldi_epi32(A, B, I) \ - (__m256i)__builtin_ia32_vpshldd256((__v8si)(__m256i)(A), \ - (__v8si)(__m256i)(B), (int)(I)) + ((__m256i)__builtin_ia32_vpshldd256((__v8si)(__m256i)(A), \ + (__v8si)(__m256i)(B), (int)(I))) #define _mm256_mask_shldi_epi32(S, U, A, B, I) \ - (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ - (__v8si)_mm256_shldi_epi32((A), (B), (I)), \ - (__v8si)(__m256i)(S)) + ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_shldi_epi32((A), (B), (I)), \ + (__v8si)(__m256i)(S))) #define _mm256_maskz_shldi_epi32(U, A, B, I) \ - (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ - (__v8si)_mm256_shldi_epi32((A), (B), (I)), \ - (__v8si)_mm256_setzero_si256()) + ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_shldi_epi32((A), (B), (I)), \ + (__v8si)_mm256_setzero_si256())) #define _mm_shldi_epi32(A, B, I) \ - (__m128i)__builtin_ia32_vpshldd128((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), (int)(I)) + ((__m128i)__builtin_ia32_vpshldd128((__v4si)(__m128i)(A), \ + (__v4si)(__m128i)(B), (int)(I))) #define _mm_mask_shldi_epi32(S, U, A, B, I) \ - (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ - (__v4si)_mm_shldi_epi32((A), (B), (I)), \ - (__v4si)(__m128i)(S)) + ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + (__v4si)_mm_shldi_epi32((A), (B), (I)), \ + (__v4si)(__m128i)(S))) #define _mm_maskz_shldi_epi32(U, A, B, I) \ - (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ - (__v4si)_mm_shldi_epi32((A), (B), (I)), \ - (__v4si)_mm_setzero_si128()) + ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + (__v4si)_mm_shldi_epi32((A), (B), (I)), \ + (__v4si)_mm_setzero_si128())) #define _mm256_shldi_epi16(A, B, I) \ - (__m256i)__builtin_ia32_vpshldw256((__v16hi)(__m256i)(A), \ - (__v16hi)(__m256i)(B), (int)(I)) + ((__m256i)__builtin_ia32_vpshldw256((__v16hi)(__m256i)(A), \ + (__v16hi)(__m256i)(B), (int)(I))) #define _mm256_mask_shldi_epi16(S, U, A, B, I) \ - (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ - (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \ - (__v16hi)(__m256i)(S)) + ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \ + (__v16hi)(__m256i)(S))) #define _mm256_maskz_shldi_epi16(U, A, B, I) \ - (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ - (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \ - (__v16hi)_mm256_setzero_si256()) + ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \ + (__v16hi)_mm256_setzero_si256())) #define _mm_shldi_epi16(A, B, I) \ - (__m128i)__builtin_ia32_vpshldw128((__v8hi)(__m128i)(A), \ - (__v8hi)(__m128i)(B), (int)(I)) + ((__m128i)__builtin_ia32_vpshldw128((__v8hi)(__m128i)(A), \ + (__v8hi)(__m128i)(B), (int)(I))) #define _mm_mask_shldi_epi16(S, U, A, B, I) \ - (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ - (__v8hi)_mm_shldi_epi16((A), (B), (I)), \ - (__v8hi)(__m128i)(S)) + ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_shldi_epi16((A), (B), (I)), \ + (__v8hi)(__m128i)(S))) #define _mm_maskz_shldi_epi16(U, A, B, I) \ - (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ - (__v8hi)_mm_shldi_epi16((A), (B), (I)), \ - (__v8hi)_mm_setzero_si128()) + ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_shldi_epi16((A), (B), (I)), \ + (__v8hi)_mm_setzero_si128())) #define _mm256_shrdi_epi64(A, B, I) \ - (__m256i)__builtin_ia32_vpshrdq256((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), (int)(I)) + ((__m256i)__builtin_ia32_vpshrdq256((__v4di)(__m256i)(A), \ + (__v4di)(__m256i)(B), (int)(I))) #define _mm256_mask_shrdi_epi64(S, U, A, B, I) \ - (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ - (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \ - (__v4di)(__m256i)(S)) + ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \ + (__v4di)(__m256i)(S))) #define _mm256_maskz_shrdi_epi64(U, A, B, I) \ - (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ - (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \ - (__v4di)_mm256_setzero_si256()) + ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \ + (__v4di)_mm256_setzero_si256())) #define _mm_shrdi_epi64(A, B, I) \ - (__m128i)__builtin_ia32_vpshrdq128((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), (int)(I)) + ((__m128i)__builtin_ia32_vpshrdq128((__v2di)(__m128i)(A), \ + (__v2di)(__m128i)(B), (int)(I))) #define _mm_mask_shrdi_epi64(S, U, A, B, I) \ - (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ - (__v2di)_mm_shrdi_epi64((A), (B), (I)), \ - (__v2di)(__m128i)(S)) + ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ + (__v2di)_mm_shrdi_epi64((A), (B), (I)), \ + (__v2di)(__m128i)(S))) #define _mm_maskz_shrdi_epi64(U, A, B, I) \ - (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ - (__v2di)_mm_shrdi_epi64((A), (B), (I)), \ - (__v2di)_mm_setzero_si128()) + ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ + (__v2di)_mm_shrdi_epi64((A), (B), (I)), \ + (__v2di)_mm_setzero_si128())) #define _mm256_shrdi_epi32(A, B, I) \ - (__m256i)__builtin_ia32_vpshrdd256((__v8si)(__m256i)(A), \ - (__v8si)(__m256i)(B), (int)(I)) + ((__m256i)__builtin_ia32_vpshrdd256((__v8si)(__m256i)(A), \ + (__v8si)(__m256i)(B), (int)(I))) #define _mm256_mask_shrdi_epi32(S, U, A, B, I) \ - (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ - (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \ - (__v8si)(__m256i)(S)) + ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \ + (__v8si)(__m256i)(S))) #define _mm256_maskz_shrdi_epi32(U, A, B, I) \ - (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ - (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \ - (__v8si)_mm256_setzero_si256()) + ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \ + (__v8si)_mm256_setzero_si256())) #define _mm_shrdi_epi32(A, B, I) \ - (__m128i)__builtin_ia32_vpshrdd128((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), (int)(I)) + ((__m128i)__builtin_ia32_vpshrdd128((__v4si)(__m128i)(A), \ + (__v4si)(__m128i)(B), (int)(I))) #define _mm_mask_shrdi_epi32(S, U, A, B, I) \ - (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ - (__v4si)_mm_shrdi_epi32((A), (B), (I)), \ - (__v4si)(__m128i)(S)) + ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + (__v4si)_mm_shrdi_epi32((A), (B), (I)), \ + (__v4si)(__m128i)(S))) #define _mm_maskz_shrdi_epi32(U, A, B, I) \ - (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ - (__v4si)_mm_shrdi_epi32((A), (B), (I)), \ - (__v4si)_mm_setzero_si128()) + ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + (__v4si)_mm_shrdi_epi32((A), (B), (I)), \ + (__v4si)_mm_setzero_si128())) #define _mm256_shrdi_epi16(A, B, I) \ - (__m256i)__builtin_ia32_vpshrdw256((__v16hi)(__m256i)(A), \ - (__v16hi)(__m256i)(B), (int)(I)) + ((__m256i)__builtin_ia32_vpshrdw256((__v16hi)(__m256i)(A), \ + (__v16hi)(__m256i)(B), (int)(I))) #define _mm256_mask_shrdi_epi16(S, U, A, B, I) \ - (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ - (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \ - (__v16hi)(__m256i)(S)) + ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \ + (__v16hi)(__m256i)(S))) #define _mm256_maskz_shrdi_epi16(U, A, B, I) \ - (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ - (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \ - (__v16hi)_mm256_setzero_si256()) + ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \ + (__v16hi)_mm256_setzero_si256())) #define _mm_shrdi_epi16(A, B, I) \ - (__m128i)__builtin_ia32_vpshrdw128((__v8hi)(__m128i)(A), \ - (__v8hi)(__m128i)(B), (int)(I)) + ((__m128i)__builtin_ia32_vpshrdw128((__v8hi)(__m128i)(A), \ + (__v8hi)(__m128i)(B), (int)(I))) #define _mm_mask_shrdi_epi16(S, U, A, B, I) \ - (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ - (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \ - (__v8hi)(__m128i)(S)) + ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \ + (__v8hi)(__m128i)(S))) #define _mm_maskz_shrdi_epi16(U, A, B, I) \ - (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ - (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \ - (__v8hi)_mm_setzero_si128()) + ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \ + (__v8hi)_mm_setzero_si128())) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shldv_epi64(__m256i __A, __m256i __B, __m256i __C) diff --git a/clang/lib/Headers/avx512vlvnniintrin.h b/clang/lib/Headers/avx512vlvnniintrin.h index 71ac1b4370d4f..0fb29af262f71 100644 --- a/clang/lib/Headers/avx512vlvnniintrin.h +++ b/clang/lib/Headers/avx512vlvnniintrin.h @@ -36,7 +36,7 @@ /// DST[MAX:256] := 0 /// \endoperation #define _mm256_dpbusd_epi32(S, A, B) \ - (__m256i)__builtin_ia32_vpdpbusd256((__v8si)(S), (__v8si)(A), (__v8si)(B)) + ((__m256i)__builtin_ia32_vpdpbusd256((__v8si)(S), (__v8si)(A), (__v8si)(B))) /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with /// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed @@ -56,7 +56,7 @@ /// DST[MAX:256] := 0 /// \endoperation #define _mm256_dpbusds_epi32(S, A, B) \ - (__m256i)__builtin_ia32_vpdpbusds256((__v8si)(S), (__v8si)(A), (__v8si)(B)) + ((__m256i)__builtin_ia32_vpdpbusds256((__v8si)(S), (__v8si)(A), (__v8si)(B))) /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with /// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit @@ -74,7 +74,7 @@ /// DST[MAX:256] := 0 /// \endoperation #define _mm256_dpwssd_epi32(S, A, B) \ - (__m256i)__builtin_ia32_vpdpwssd256((__v8si)(S), (__v8si)(A), (__v8si)(B)) + ((__m256i)__builtin_ia32_vpdpwssd256((__v8si)(S), (__v8si)(A), (__v8si)(B))) /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with /// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit @@ -92,7 +92,7 @@ /// DST[MAX:256] := 0 /// \endoperation #define _mm256_dpwssds_epi32(S, A, B) \ - (__m256i)__builtin_ia32_vpdpwssds256((__v8si)(S), (__v8si)(A), (__v8si)(B)) + ((__m256i)__builtin_ia32_vpdpwssds256((__v8si)(S), (__v8si)(A), (__v8si)(B))) /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with /// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed @@ -112,7 +112,7 @@ /// DST[MAX:128] := 0 /// \endoperation #define _mm_dpbusd_epi32(S, A, B) \ - (__m128i)__builtin_ia32_vpdpbusd128((__v4si)(S), (__v4si)(A), (__v4si)(B)) + ((__m128i)__builtin_ia32_vpdpbusd128((__v4si)(S), (__v4si)(A), (__v4si)(B))) /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with /// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed @@ -132,7 +132,7 @@ /// DST[MAX:128] := 0 /// \endoperation #define _mm_dpbusds_epi32(S, A, B) \ - (__m128i)__builtin_ia32_vpdpbusds128((__v4si)(S), (__v4si)(A), (__v4si)(B)) + ((__m128i)__builtin_ia32_vpdpbusds128((__v4si)(S), (__v4si)(A), (__v4si)(B))) /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with /// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit @@ -150,7 +150,7 @@ /// DST[MAX:128] := 0 /// \endoperation #define _mm_dpwssd_epi32(S, A, B) \ - (__m128i)__builtin_ia32_vpdpwssd128((__v4si)(S), (__v4si)(A), (__v4si)(B)) + ((__m128i)__builtin_ia32_vpdpwssd128((__v4si)(S), (__v4si)(A), (__v4si)(B))) /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with /// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit @@ -168,7 +168,7 @@ /// DST[MAX:128] := 0 /// \endoperation #define _mm_dpwssds_epi32(S, A, B) \ - (__m128i)__builtin_ia32_vpdpwssds128((__v4si)(S), (__v4si)(A), (__v4si)(B)) + ((__m128i)__builtin_ia32_vpdpwssds128((__v4si)(S), (__v4si)(A), (__v4si)(B))) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) diff --git a/clang/lib/Headers/f16cintrin.h b/clang/lib/Headers/f16cintrin.h index 109b604adae3b..13905e6fb0ec8 100644 --- a/clang/lib/Headers/f16cintrin.h +++ b/clang/lib/Headers/f16cintrin.h @@ -66,8 +66,8 @@ _cvtsh_ss(unsigned short __a) /// 1XX: Use MXCSR.RC for rounding /// \returns The converted 16-bit half-precision float value. #define _cvtss_sh(a, imm) \ - (unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \ - (imm)))[0]) + ((unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \ + (imm)))[0])) /// Converts a 128-bit vector containing 32-bit float values into a /// 128-bit vector containing 16-bit half-precision float values. @@ -93,7 +93,7 @@ _cvtsh_ss(unsigned short __a) /// values. The lower 64 bits are used to store the converted 16-bit /// half-precision floating-point values. #define _mm_cvtps_ph(a, imm) \ - (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm)) + ((__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm))) /// Converts a 128-bit vector containing 16-bit half-precision float /// values into a 128-bit vector containing 32-bit float values. @@ -136,7 +136,7 @@ _mm_cvtph_ps(__m128i __a) /// \returns A 128-bit vector containing the converted 16-bit half-precision /// float values. #define _mm256_cvtps_ph(a, imm) \ - (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)) + ((__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm))) /// Converts a 128-bit vector containing 16-bit half-precision float /// values into a 256-bit vector of [8 x float]. diff --git a/clang/lib/Headers/gfniintrin.h b/clang/lib/Headers/gfniintrin.h index 11a321b7c919b..a59238b0b1319 100644 --- a/clang/lib/Headers/gfniintrin.h +++ b/clang/lib/Headers/gfniintrin.h @@ -28,14 +28,14 @@ #define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(256))) #define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \ - (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \ - (__v16qi)(__m128i)(B), \ - (char)(I)) + ((__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), \ + (char)(I))) #define _mm_gf2p8affine_epi64_epi8(A, B, I) \ - (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \ - (__v16qi)(__m128i)(B), \ - (char)(I)) + ((__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), \ + (char)(I))) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_gf2p8mul_epi8(__m128i __A, __m128i __B) @@ -46,14 +46,14 @@ _mm_gf2p8mul_epi8(__m128i __A, __m128i __B) #ifdef __AVXINTRIN_H #define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) \ - (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \ - (__v32qi)(__m256i)(B), \ - (char)(I)) + ((__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \ + (__v32qi)(__m256i)(B), \ + (char)(I))) #define _mm256_gf2p8affine_epi64_epi8(A, B, I) \ - (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \ - (__v32qi)(__m256i)(B), \ - (char)(I)) + ((__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \ + (__v32qi)(__m256i)(B), \ + (char)(I))) static __inline__ __m256i __DEFAULT_FN_ATTRS_Y _mm256_gf2p8mul_epi8(__m256i __A, __m256i __B) @@ -65,32 +65,32 @@ _mm256_gf2p8mul_epi8(__m256i __A, __m256i __B) #ifdef __AVX512BWINTRIN_H #define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) \ - (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A), \ - (__v64qi)(__m512i)(B), \ - (char)(I)) + ((__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A), \ + (__v64qi)(__m512i)(B), \ + (char)(I))) #define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \ - (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ - (__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I), \ - (__v64qi)(__m512i)(S)) + ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ + (__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I), \ + (__v64qi)(__m512i)(S))) #define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \ - (__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_si512(), \ - U, A, B, I) + _mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_si512(), \ + U, A, B, I) #define _mm512_gf2p8affine_epi64_epi8(A, B, I) \ - (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \ - (__v64qi)(__m512i)(B), \ - (char)(I)) + ((__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \ + (__v64qi)(__m512i)(B), \ + (char)(I))) #define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \ - (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ - (__v64qi)_mm512_gf2p8affine_epi64_epi8(A, B, I), \ - (__v64qi)(__m512i)(S)) + ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ + (__v64qi)_mm512_gf2p8affine_epi64_epi8((A), (B), (I)), \ + (__v64qi)(__m512i)(S))) #define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \ - (__m512i)_mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_si512(), \ - U, A, B, I) + _mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_si512(), \ + U, A, B, I) static __inline__ __m512i __DEFAULT_FN_ATTRS_Z _mm512_gf2p8mul_epi8(__m512i __A, __m512i __B) @@ -117,40 +117,39 @@ _mm512_maskz_gf2p8mul_epi8(__mmask64 __U, __m512i __A, __m512i __B) #ifdef __AVX512VLBWINTRIN_H #define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \ - (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ - (__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I), \ - (__v16qi)(__m128i)(S)) + ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ + (__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I), \ + (__v16qi)(__m128i)(S))) #define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \ - (__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(), \ - U, A, B, I) + _mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(), \ + U, A, B, I) #define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \ - (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ - (__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I), \ - (__v32qi)(__m256i)(S)) + ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ + (__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I), \ + (__v32qi)(__m256i)(S))) #define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \ - (__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \ - U, A, B, I) + _mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \ + U, A, B, I) #define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \ - (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ - (__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \ - (__v16qi)(__m128i)(S)) + ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ + (__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \ + (__v16qi)(__m128i)(S))) #define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \ - (__m128i)_mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(), \ - U, A, B, I) + _mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(), U, A, B, I) #define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \ - (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ - (__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I), \ - (__v32qi)(__m256i)(S)) + ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ + (__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I), \ + (__v32qi)(__m256i)(S))) #define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \ - (__m256i)_mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(), \ - U, A, B, I) + _mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(), \ + U, A, B, I) static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128 _mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B) diff --git a/clang/lib/Headers/intrin.h b/clang/lib/Headers/intrin.h index ff8eb8fca2687..34ec79d6acbc6 100644 --- a/clang/lib/Headers/intrin.h +++ b/clang/lib/Headers/intrin.h @@ -574,6 +574,9 @@ void _WriteStatusReg(int, __int64); unsigned short __cdecl _byteswap_ushort(unsigned short val); unsigned long __cdecl _byteswap_ulong (unsigned long val); unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64 val); + +__int64 __mulh(__int64 __a, __int64 __b); +unsigned __int64 __umulh(unsigned __int64 __a, unsigned __int64 __b); #endif /*----------------------------------------------------------------------------*\ diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h index 3ee58c9d79370..8913a196144bb 100644 --- a/clang/lib/Headers/smmintrin.h +++ b/clang/lib/Headers/smmintrin.h @@ -865,15 +865,13 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) /// 10: Bits [95:64] of parameter \a X are returned. \n /// 11: Bits [127:96] of parameter \a X are returned. /// \returns A 32-bit integer containing the extracted 32 bits of float data. -#define _mm_extract_ps(X, N) (__extension__ \ - ({ union { int __i; float __f; } __t; \ - __t.__f = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); \ - __t.__i;})) +#define _mm_extract_ps(X, N) \ + __builtin_bit_cast(int, __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N))) /* Miscellaneous insert and extract macros. */ /* Extract a single-precision float from X at index N into D. */ #define _MM_EXTRACT_FLOAT(D, X, N) \ - { (D) = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); } + do { (D) = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); } while (0) /* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create an index suitable for _mm_insert_ps. */ diff --git a/clang/lib/Headers/vpclmulqdqintrin.h b/clang/lib/Headers/vpclmulqdqintrin.h index 44daadb07d57c..485692ea2b5b1 100644 --- a/clang/lib/Headers/vpclmulqdqintrin.h +++ b/clang/lib/Headers/vpclmulqdqintrin.h @@ -15,15 +15,15 @@ #define __VPCLMULQDQINTRIN_H #define _mm256_clmulepi64_epi128(A, B, I) \ - (__m256i)__builtin_ia32_pclmulqdq256((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), \ - (char)(I)) + ((__m256i)__builtin_ia32_pclmulqdq256((__v4di)(__m256i)(A), \ + (__v4di)(__m256i)(B), \ + (char)(I))) #ifdef __AVX512FINTRIN_H #define _mm512_clmulepi64_epi128(A, B, I) \ - (__m512i)__builtin_ia32_pclmulqdq512((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), \ - (char)(I)) + ((__m512i)__builtin_ia32_pclmulqdq512((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), \ + (char)(I))) #endif // __AVX512FINTRIN_H #endif /* __VPCLMULQDQINTRIN_H */ diff --git a/clang/lib/Headers/wasm_simd128.h b/clang/lib/Headers/wasm_simd128.h index 712fa03780986..3889a2769faf7 100644 --- a/clang/lib/Headers/wasm_simd128.h +++ b/clang/lib/Headers/wasm_simd128.h @@ -276,12 +276,28 @@ wasm_i8x16_make(int8_t __c0, int8_t __c1, int8_t __c2, int8_t __c3, int8_t __c4, __c12, __c13, __c14, __c15}; } +static __inline__ v128_t __DEFAULT_FN_ATTRS +wasm_u8x16_make(uint8_t __c0, uint8_t __c1, uint8_t __c2, uint8_t __c3, + uint8_t __c4, uint8_t __c5, uint8_t __c6, uint8_t __c7, + uint8_t __c8, uint8_t __c9, uint8_t __c10, uint8_t __c11, + uint8_t __c12, uint8_t __c13, uint8_t __c14, uint8_t __c15) { + return (v128_t)(__u8x16){__c0, __c1, __c2, __c3, __c4, __c5, + __c6, __c7, __c8, __c9, __c10, __c11, + __c12, __c13, __c14, __c15}; +} + static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_make(int16_t __c0, int16_t __c1, int16_t __c2, int16_t __c3, int16_t __c4, int16_t __c5, int16_t __c6, int16_t __c7) { return (v128_t)(__i16x8){__c0, __c1, __c2, __c3, __c4, __c5, __c6, __c7}; } +static __inline__ v128_t __DEFAULT_FN_ATTRS +wasm_u16x8_make(uint16_t __c0, uint16_t __c1, uint16_t __c2, uint16_t __c3, + uint16_t __c4, uint16_t __c5, uint16_t __c6, uint16_t __c7) { + return (v128_t)(__u16x8){__c0, __c1, __c2, __c3, __c4, __c5, __c6, __c7}; +} + static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_make(int32_t __c0, int32_t __c1, int32_t __c2, @@ -289,11 +305,23 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_make(int32_t __c0, return (v128_t)(__i32x4){__c0, __c1, __c2, __c3}; } +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_make(uint32_t __c0, + uint32_t __c1, + uint32_t __c2, + uint32_t __c3) { + return (v128_t)(__u32x4){__c0, __c1, __c2, __c3}; +} + static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_make(int64_t __c0, int64_t __c1) { return (v128_t)(__i64x2){__c0, __c1}; } +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_make(uint64_t __c0, + uint64_t __c1) { + return (v128_t)(__u64x2){__c0, __c1}; +} + static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_make(float __c0, float __c1, float __c2, @@ -324,6 +352,24 @@ wasm_i8x16_const(int8_t __c0, int8_t __c1, int8_t __c2, int8_t __c3, __c12, __c13, __c14, __c15}; } +static __inline__ v128_t __DEFAULT_FN_ATTRS +wasm_u8x16_const(uint8_t __c0, uint8_t __c1, uint8_t __c2, uint8_t __c3, + uint8_t __c4, uint8_t __c5, uint8_t __c6, uint8_t __c7, + uint8_t __c8, uint8_t __c9, uint8_t __c10, uint8_t __c11, + uint8_t __c12, uint8_t __c13, uint8_t __c14, uint8_t __c15) + __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) __REQUIRE_CONSTANT(__c2) + __REQUIRE_CONSTANT(__c3) __REQUIRE_CONSTANT(__c4) + __REQUIRE_CONSTANT(__c5) __REQUIRE_CONSTANT(__c6) + __REQUIRE_CONSTANT(__c7) __REQUIRE_CONSTANT(__c8) + __REQUIRE_CONSTANT(__c9) __REQUIRE_CONSTANT(__c10) + __REQUIRE_CONSTANT(__c11) __REQUIRE_CONSTANT(__c12) + __REQUIRE_CONSTANT(__c13) __REQUIRE_CONSTANT(__c14) + __REQUIRE_CONSTANT(__c15) { + return (v128_t)(__u8x16){__c0, __c1, __c2, __c3, __c4, __c5, + __c6, __c7, __c8, __c9, __c10, __c11, + __c12, __c13, __c14, __c15}; +} + static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_const(int16_t __c0, int16_t __c1, int16_t __c2, int16_t __c3, int16_t __c4, int16_t __c5, int16_t __c6, int16_t __c7) @@ -334,6 +380,16 @@ wasm_i16x8_const(int16_t __c0, int16_t __c1, int16_t __c2, int16_t __c3, return (v128_t)(__i16x8){__c0, __c1, __c2, __c3, __c4, __c5, __c6, __c7}; } +static __inline__ v128_t __DEFAULT_FN_ATTRS +wasm_u16x8_const(uint16_t __c0, uint16_t __c1, uint16_t __c2, uint16_t __c3, + uint16_t __c4, uint16_t __c5, uint16_t __c6, uint16_t __c7) + __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) __REQUIRE_CONSTANT(__c2) + __REQUIRE_CONSTANT(__c3) __REQUIRE_CONSTANT(__c4) + __REQUIRE_CONSTANT(__c5) __REQUIRE_CONSTANT(__c6) + __REQUIRE_CONSTANT(__c7) { + return (v128_t)(__u16x8){__c0, __c1, __c2, __c3, __c4, __c5, __c6, __c7}; +} + static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_const(int32_t __c0, int32_t __c1, int32_t __c2, int32_t __c3) __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) __REQUIRE_CONSTANT(__c2) @@ -341,12 +397,25 @@ wasm_i32x4_const(int32_t __c0, int32_t __c1, int32_t __c2, int32_t __c3) return (v128_t)(__i32x4){__c0, __c1, __c2, __c3}; } +static __inline__ v128_t __DEFAULT_FN_ATTRS +wasm_u32x4_const(uint32_t __c0, uint32_t __c1, uint32_t __c2, uint32_t __c3) + __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) __REQUIRE_CONSTANT(__c2) + __REQUIRE_CONSTANT(__c3) { + return (v128_t)(__u32x4){__c0, __c1, __c2, __c3}; +} + static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_const(int64_t __c0, int64_t __c1) __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) { return (v128_t)(__i64x2){__c0, __c1}; } +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_const(uint64_t __c0, + uint64_t __c1) + __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) { + return (v128_t)(__u64x2){__c0, __c1}; +} + static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_const(float __c0, float __c1, float __c2, float __c3) __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) __REQUIRE_CONSTANT(__c2) @@ -366,21 +435,42 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_const_splat(int8_t __c) __c, __c, __c, __c, __c, __c, __c, __c}; } +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_const_splat(uint8_t __c) + __REQUIRE_CONSTANT(__c) { + return (v128_t)(__u8x16){__c, __c, __c, __c, __c, __c, __c, __c, + __c, __c, __c, __c, __c, __c, __c, __c}; +} + static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_const_splat(int16_t __c) __REQUIRE_CONSTANT(__c) { return (v128_t)(__i16x8){__c, __c, __c, __c, __c, __c, __c, __c}; } +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_const_splat(uint16_t __c) + __REQUIRE_CONSTANT(__c) { + return (v128_t)(__u16x8){__c, __c, __c, __c, __c, __c, __c, __c}; +} + static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_const_splat(int32_t __c) __REQUIRE_CONSTANT(__c) { return (v128_t)(__i32x4){__c, __c, __c, __c}; } +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_const_splat(uint32_t __c) + __REQUIRE_CONSTANT(__c) { + return (v128_t)(__u32x4){__c, __c, __c, __c}; +} + static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_const_splat(int64_t __c) __REQUIRE_CONSTANT(__c) { return (v128_t)(__i64x2){__c, __c}; } +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_const_splat(uint64_t __c) + __REQUIRE_CONSTANT(__c) { + return (v128_t)(__u64x2){__c, __c}; +} + static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_const_splat(float __c) __REQUIRE_CONSTANT(__c) { return (v128_t)(__f32x4){__c, __c, __c, __c}; @@ -396,6 +486,11 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_splat(int8_t __a) { __a, __a, __a, __a, __a, __a, __a, __a}; } +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_splat(uint8_t __a) { + return (v128_t)(__u8x16){__a, __a, __a, __a, __a, __a, __a, __a, + __a, __a, __a, __a, __a, __a, __a, __a}; +} + static __inline__ int8_t __DEFAULT_FN_ATTRS wasm_i8x16_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) { @@ -417,10 +512,23 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_replace_lane(v128_t __a, return (v128_t)__v; } +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_replace_lane(v128_t __a, + int __i, + uint8_t __b) + __REQUIRE_CONSTANT(__i) { + __u8x16 __v = (__u8x16)__a; + __v[__i] = __b; + return (v128_t)__v; +} + static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_splat(int16_t __a) { return (v128_t)(__i16x8){__a, __a, __a, __a, __a, __a, __a, __a}; } +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_splat(uint16_t __a) { + return (v128_t)(__u16x8){__a, __a, __a, __a, __a, __a, __a, __a}; +} + static __inline__ int16_t __DEFAULT_FN_ATTRS wasm_i16x8_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) { @@ -441,16 +549,32 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_replace_lane(v128_t __a, return (v128_t)__v; } +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_replace_lane( + v128_t __a, int __i, uint16_t __b) __REQUIRE_CONSTANT(__i) { + __u16x8 __v = (__u16x8)__a; + __v[__i] = __b; + return (v128_t)__v; +} + static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_splat(int32_t __a) { return (v128_t)(__i32x4){__a, __a, __a, __a}; } +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_splat(uint32_t __a) { + return (v128_t)(__u32x4){__a, __a, __a, __a}; +} + static __inline__ int32_t __DEFAULT_FN_ATTRS wasm_i32x4_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) { return ((__i32x4)__a)[__i]; } +static __inline__ uint32_t __DEFAULT_FN_ATTRS +wasm_u32x4_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) { + return ((__u32x4)__a)[__i]; +} + static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_replace_lane(v128_t __a, int __i, int32_t __b) @@ -460,16 +584,32 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_replace_lane(v128_t __a, return (v128_t)__v; } +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_replace_lane( + v128_t __a, int __i, uint32_t __b) __REQUIRE_CONSTANT(__i) { + __u32x4 __v = (__u32x4)__a; + __v[__i] = __b; + return (v128_t)__v; +} + static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_splat(int64_t __a) { return (v128_t)(__i64x2){__a, __a}; } +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_splat(uint64_t __a) { + return (v128_t)(__u64x2){__a, __a}; +} + static __inline__ int64_t __DEFAULT_FN_ATTRS wasm_i64x2_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) { return ((__i64x2)__a)[__i]; } +static __inline__ uint64_t __DEFAULT_FN_ATTRS +wasm_u64x2_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) { + return ((__u64x2)__a)[__i]; +} + static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_replace_lane(v128_t __a, int __i, int64_t __b) @@ -479,6 +619,13 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_replace_lane(v128_t __a, return (v128_t)__v; } +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_replace_lane( + v128_t __a, int __i, uint64_t __b) __REQUIRE_CONSTANT(__i) { + __u64x2 __v = (__u64x2)__a; + __v[__i] = __b; + return (v128_t)__v; +} + static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_splat(float __a) { return (v128_t)(__f32x4){__a, __a, __a, __a}; } @@ -804,7 +951,7 @@ static __inline__ bool __DEFAULT_FN_ATTRS wasm_i8x16_all_true(v128_t __a) { return __builtin_wasm_all_true_i8x16((__i8x16)__a); } -static __inline__ int32_t __DEFAULT_FN_ATTRS wasm_i8x16_bitmask(v128_t __a) { +static __inline__ uint32_t __DEFAULT_FN_ATTRS wasm_i8x16_bitmask(v128_t __a) { return __builtin_wasm_bitmask_i8x16((__i8x16)__a); } @@ -813,17 +960,17 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_popcnt(v128_t __a) { } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_shl(v128_t __a, - int32_t __b) { + uint32_t __b) { return (v128_t)((__i8x16)__a << __b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_shr(v128_t __a, - int32_t __b) { + uint32_t __b) { return (v128_t)((__i8x16)__a >> __b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_shr(v128_t __a, - int32_t __b) { + uint32_t __b) { return (v128_t)((__u8x16)__a >> __b); } @@ -894,22 +1041,22 @@ static __inline__ bool __DEFAULT_FN_ATTRS wasm_i16x8_all_true(v128_t __a) { return __builtin_wasm_all_true_i16x8((__i16x8)__a); } -static __inline__ int32_t __DEFAULT_FN_ATTRS wasm_i16x8_bitmask(v128_t __a) { +static __inline__ uint32_t __DEFAULT_FN_ATTRS wasm_i16x8_bitmask(v128_t __a) { return __builtin_wasm_bitmask_i16x8((__i16x8)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_shl(v128_t __a, - int32_t __b) { + uint32_t __b) { return (v128_t)((__i16x8)__a << __b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_shr(v128_t __a, - int32_t __b) { + uint32_t __b) { return (v128_t)((__i16x8)__a >> __b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_shr(v128_t __a, - int32_t __b) { + uint32_t __b) { return (v128_t)((__u16x8)__a >> __b); } @@ -985,22 +1132,22 @@ static __inline__ bool __DEFAULT_FN_ATTRS wasm_i32x4_all_true(v128_t __a) { return __builtin_wasm_all_true_i32x4((__i32x4)__a); } -static __inline__ int32_t __DEFAULT_FN_ATTRS wasm_i32x4_bitmask(v128_t __a) { +static __inline__ uint32_t __DEFAULT_FN_ATTRS wasm_i32x4_bitmask(v128_t __a) { return __builtin_wasm_bitmask_i32x4((__i32x4)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_shl(v128_t __a, - int32_t __b) { + uint32_t __b) { return (v128_t)((__i32x4)__a << __b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_shr(v128_t __a, - int32_t __b) { + uint32_t __b) { return (v128_t)((__i32x4)__a >> __b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_shr(v128_t __a, - int32_t __b) { + uint32_t __b) { return (v128_t)((__u32x4)__a >> __b); } @@ -1056,22 +1203,22 @@ static __inline__ bool __DEFAULT_FN_ATTRS wasm_i64x2_all_true(v128_t __a) { return __builtin_wasm_all_true_i64x2((__i64x2)__a); } -static __inline__ int32_t __DEFAULT_FN_ATTRS wasm_i64x2_bitmask(v128_t __a) { +static __inline__ uint32_t __DEFAULT_FN_ATTRS wasm_i64x2_bitmask(v128_t __a) { return __builtin_wasm_bitmask_i64x2((__i64x2)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_shl(v128_t __a, - int32_t __b) { + uint32_t __b) { return (v128_t)((__i64x2)__a << (int64_t)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_shr(v128_t __a, - int32_t __b) { + uint32_t __b) { return (v128_t)((__i64x2)__a >> (int64_t)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_shr(v128_t __a, - int32_t __b) { + uint32_t __b) { return (v128_t)((__u64x2)__a >> (int64_t)__b); } @@ -1150,14 +1297,12 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_max(v128_t __a, static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_pmin(v128_t __a, v128_t __b) { - __i32x4 __mask = (__i32x4)((__f32x4)__b < (__f32x4)__a); - return (v128_t)((((__i32x4)__b) & __mask) | (((__i32x4)__a) & ~__mask)); + return (v128_t)__builtin_wasm_pmin_f32x4((__f32x4)__a, (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_pmax(v128_t __a, v128_t __b) { - __i32x4 __mask = (__i32x4)((__f32x4)__a < (__f32x4)__b); - return (v128_t)((((__i32x4)__b) & __mask) | (((__i32x4)__a) & ~__mask)); + return (v128_t)__builtin_wasm_pmax_f32x4((__f32x4)__a, (__f32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_abs(v128_t __a) { @@ -1220,14 +1365,12 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_max(v128_t __a, static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_pmin(v128_t __a, v128_t __b) { - __i64x2 __mask = (__i64x2)((__f64x2)__b < (__f64x2)__a); - return (v128_t)((((__i64x2)__b) & __mask) | (((__i64x2)__a) & ~__mask)); + return (v128_t)__builtin_wasm_pmin_f64x2((__f64x2)__a, (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_pmax(v128_t __a, v128_t __b) { - __i64x2 __mask = (__i64x2)((__f64x2)__a < (__f64x2)__b); - return (v128_t)((((__i64x2)__b) & __mask) | (((__i64x2)__a) & ~__mask)); + return (v128_t)__builtin_wasm_pmax_f64x2((__f64x2)__a, (__f64x2)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/xopintrin.h b/clang/lib/Headers/xopintrin.h index 5cedde41b625f..976cdf4902a40 100644 --- a/clang/lib/Headers/xopintrin.h +++ b/clang/lib/Headers/xopintrin.h @@ -225,16 +225,16 @@ _mm_rot_epi64(__m128i __A, __m128i __B) } #define _mm_roti_epi8(A, N) \ - (__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N)) + ((__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N))) #define _mm_roti_epi16(A, N) \ - (__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N)) + ((__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N))) #define _mm_roti_epi32(A, N) \ - (__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N)) + ((__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N))) #define _mm_roti_epi64(A, N) \ - (__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N)) + ((__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N))) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shl_epi8(__m128i __A, __m128i __B) @@ -285,36 +285,36 @@ _mm_sha_epi64(__m128i __A, __m128i __B) } #define _mm_com_epu8(A, B, N) \ - (__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \ - (__v16qi)(__m128i)(B), (N)) + ((__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), (N))) #define _mm_com_epu16(A, B, N) \ - (__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \ - (__v8hi)(__m128i)(B), (N)) + ((__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \ + (__v8hi)(__m128i)(B), (N))) #define _mm_com_epu32(A, B, N) \ - (__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), (N)) + ((__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \ + (__v4si)(__m128i)(B), (N))) #define _mm_com_epu64(A, B, N) \ - (__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), (N)) + ((__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \ + (__v2di)(__m128i)(B), (N))) #define _mm_com_epi8(A, B, N) \ - (__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \ - (__v16qi)(__m128i)(B), (N)) + ((__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), (N))) #define _mm_com_epi16(A, B, N) \ - (__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \ - (__v8hi)(__m128i)(B), (N)) + ((__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \ + (__v8hi)(__m128i)(B), (N))) #define _mm_com_epi32(A, B, N) \ - (__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), (N)) + ((__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \ + (__v4si)(__m128i)(B), (N))) #define _mm_com_epi64(A, B, N) \ - (__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), (N)) + ((__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \ + (__v2di)(__m128i)(B), (N))) #define _MM_PCOMCTRL_LT 0 #define _MM_PCOMCTRL_LE 1 @@ -710,23 +710,23 @@ _mm_comtrue_epi64(__m128i __A, __m128i __B) } #define _mm_permute2_pd(X, Y, C, I) \ - (__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \ - (__v2df)(__m128d)(Y), \ - (__v2di)(__m128i)(C), (I)) + ((__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(Y), \ + (__v2di)(__m128i)(C), (I))) #define _mm256_permute2_pd(X, Y, C, I) \ - (__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \ - (__v4df)(__m256d)(Y), \ - (__v4di)(__m256i)(C), (I)) + ((__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \ + (__v4df)(__m256d)(Y), \ + (__v4di)(__m256i)(C), (I))) #define _mm_permute2_ps(X, Y, C, I) \ - (__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \ - (__v4si)(__m128i)(C), (I)) + ((__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \ + (__v4si)(__m128i)(C), (I))) #define _mm256_permute2_ps(X, Y, C, I) \ - (__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \ - (__v8sf)(__m256)(Y), \ - (__v8si)(__m256i)(C), (I)) + ((__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \ + (__v8sf)(__m256)(Y), \ + (__v8si)(__m256i)(C), (I))) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_frcz_ss(__m128 __A) diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 64944492eb99b..05b84e0c14b3e 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -1446,19 +1446,30 @@ void Lexer::SetByteOffset(unsigned Offset, bool StartOfLine) { IsAtPhysicalStartOfLine = StartOfLine; } +static bool isUnicodeWhitespace(uint32_t Codepoint) { + static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars( + UnicodeWhitespaceCharRanges); + return UnicodeWhitespaceChars.contains(Codepoint); +} + static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts) { if (LangOpts.AsmPreprocessor) { return false; } else if (LangOpts.DollarIdents && '$' == C) { return true; - } else if (LangOpts.CPlusPlus11 || LangOpts.C11) { + } else if (LangOpts.CPlusPlus) { + // A non-leading codepoint must have the XID_Continue property. + // XIDContinueRanges doesn't contains characters also in XIDStartRanges, + // so we need to check both tables. + // '_' doesn't have the XID_Continue property but is allowed in C++. + static const llvm::sys::UnicodeCharSet XIDStartChars(XIDStartRanges); + static const llvm::sys::UnicodeCharSet XIDContinueChars(XIDContinueRanges); + return C == '_' || XIDStartChars.contains(C) || + XIDContinueChars.contains(C); + } else if (LangOpts.C11) { static const llvm::sys::UnicodeCharSet C11AllowedIDChars( C11AllowedIDCharRanges); return C11AllowedIDChars.contains(C); - } else if (LangOpts.CPlusPlus) { - static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars( - CXX03AllowedIDCharRanges); - return CXX03AllowedIDChars.contains(C); } else { static const llvm::sys::UnicodeCharSet C99AllowedIDChars( C99AllowedIDCharRanges); @@ -1467,20 +1478,24 @@ static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts) { } static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts) { - assert(isAllowedIDChar(C, LangOpts)); if (LangOpts.AsmPreprocessor) { return false; - } else if (LangOpts.CPlusPlus11 || LangOpts.C11) { + } + if (LangOpts.CPlusPlus) { + static const llvm::sys::UnicodeCharSet XIDStartChars(XIDStartRanges); + // '_' doesn't have the XID_Start property but is allowed in C++. + return C == '_' || XIDStartChars.contains(C); + } + if (!isAllowedIDChar(C, LangOpts)) + return false; + if (LangOpts.C11) { static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars( C11DisallowedInitialIDCharRanges); return !C11DisallowedInitialIDChars.contains(C); - } else if (LangOpts.CPlusPlus) { - return true; - } else { - static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars( - C99DisallowedInitialIDCharRanges); - return !C99DisallowedInitialIDChars.contains(C); } + static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars( + C99DisallowedInitialIDCharRanges); + return !C99DisallowedInitialIDChars.contains(C); } static inline CharSourceRange makeCharRange(Lexer &L, const char *Begin, @@ -1512,16 +1527,6 @@ static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, << CannotStartIdentifier; } } - - // Check C++98 compatibility. - if (!Diags.isIgnored(diag::warn_cxx98_compat_unicode_id, Range.getBegin())) { - static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars( - CXX03AllowedIDCharRanges); - if (!CXX03AllowedIDChars.contains(C)) { - Diags.Report(Range.getBegin(), diag::warn_cxx98_compat_unicode_id) - << Range; - } - } } /// After encountering UTF-8 character C and interpreting it as an identifier @@ -1608,14 +1613,56 @@ static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, } } +static void diagnoseInvalidUnicodeCodepointInIdentifier( + DiagnosticsEngine &Diags, const LangOptions &LangOpts, uint32_t CodePoint, + CharSourceRange Range, bool IsFirst) { + if (isASCII(CodePoint)) + return; + + bool IsIDStart = isAllowedInitiallyIDChar(CodePoint, LangOpts); + bool IsIDContinue = IsIDStart || isAllowedIDChar(CodePoint, LangOpts); + + if ((IsFirst && IsIDStart) || (!IsFirst && IsIDContinue)) + return; + + bool InvalidOnlyAtStart = IsFirst && !IsIDStart && IsIDContinue; + + llvm::SmallString<5> CharBuf; + llvm::raw_svector_ostream CharOS(CharBuf); + llvm::write_hex(CharOS, CodePoint, llvm::HexPrintStyle::Upper, 4); + + if (!IsFirst || InvalidOnlyAtStart) { + Diags.Report(Range.getBegin(), diag::err_character_not_allowed_identifier) + << Range << CharBuf << int(InvalidOnlyAtStart) + << FixItHint::CreateRemoval(Range); + } else { + Diags.Report(Range.getBegin(), diag::err_character_not_allowed) + << Range << CharBuf << FixItHint::CreateRemoval(Range); + } +} + bool Lexer::tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size, Token &Result) { const char *UCNPtr = CurPtr + Size; uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr, /*Token=*/nullptr); - if (CodePoint == 0 || !isAllowedIDChar(CodePoint, LangOpts)) + if (CodePoint == 0) { return false; + } - if (!isLexingRawMode()) + if (!isAllowedIDChar(CodePoint, LangOpts)) { + if (isASCII(CodePoint) || isUnicodeWhitespace(CodePoint)) + return false; + if (!isLexingRawMode() && !ParsingPreprocessorDirective && + !PP->isPreprocessedOutput()) + diagnoseInvalidUnicodeCodepointInIdentifier( + PP->getDiagnostics(), LangOpts, CodePoint, + makeCharRange(*this, CurPtr, UCNPtr), + /*IsFirst=*/false); + + // We got a unicode codepoint that is neither a space nor a + // a valid identifier part. + // Carry on as if the codepoint was valid for recovery purposes. + } else if (!isLexingRawMode()) maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint, makeCharRange(*this, CurPtr, UCNPtr), /*IsFirst=*/false); @@ -1638,11 +1685,22 @@ bool Lexer::tryConsumeIdentifierUTF8Char(const char *&CurPtr) { (const llvm::UTF8 *)BufferEnd, &CodePoint, llvm::strictConversion); - if (Result != llvm::conversionOK || - !isAllowedIDChar(static_cast(CodePoint), LangOpts)) + if (Result != llvm::conversionOK) return false; - if (!isLexingRawMode()) { + if (!isAllowedIDChar(static_cast(CodePoint), LangOpts)) { + if (isASCII(CodePoint) || isUnicodeWhitespace(CodePoint)) + return false; + + if (!isLexingRawMode() && !ParsingPreprocessorDirective && + !PP->isPreprocessedOutput()) + diagnoseInvalidUnicodeCodepointInIdentifier( + PP->getDiagnostics(), LangOpts, CodePoint, + makeCharRange(*this, CurPtr, UnicodePtr), /*IsFirst=*/false); + // We got a unicode codepoint that is neither a space nor a + // a valid identifier part. Carry on as if the codepoint was + // valid for recovery purposes. + } else if (!isLexingRawMode()) { maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint, makeCharRange(*this, CurPtr, UnicodePtr), /*IsFirst=*/false); @@ -3136,10 +3194,8 @@ uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc, bool Lexer::CheckUnicodeWhitespace(Token &Result, uint32_t C, const char *CurPtr) { - static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars( - UnicodeWhitespaceCharRanges); if (!isLexingRawMode() && !PP->isPreprocessedOutput() && - UnicodeWhitespaceChars.contains(C)) { + isUnicodeWhitespace(C)) { Diag(BufferPtr, diag::ext_unicode_whitespace) << makeCharRange(*this, BufferPtr, CurPtr); @@ -3150,7 +3206,7 @@ bool Lexer::CheckUnicodeWhitespace(Token &Result, uint32_t C, } bool Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) { - if (isAllowedIDChar(C, LangOpts) && isAllowedInitiallyIDChar(C, LangOpts)) { + if (isAllowedInitiallyIDChar(C, LangOpts)) { if (!isLexingRawMode() && !ParsingPreprocessorDirective && !PP->isPreprocessedOutput()) { maybeDiagnoseIDCharCompat(PP->getDiagnostics(), C, @@ -3165,8 +3221,8 @@ bool Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) { } if (!isLexingRawMode() && !ParsingPreprocessorDirective && - !PP->isPreprocessedOutput() && - !isASCII(*BufferPtr) && !isAllowedIDChar(C, LangOpts)) { + !PP->isPreprocessedOutput() && !isASCII(*BufferPtr) && + !isAllowedInitiallyIDChar(C, LangOpts) && !isUnicodeWhitespace(C)) { // Non-ASCII characters tend to creep into source code unintentionally. // Instead of letting the parser complain about the unknown token, // just drop the character. @@ -3176,9 +3232,9 @@ bool Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) { // loophole in the mapping of Unicode characters to basic character set // characters that allows us to map these particular characters to, say, // whitespace. - Diag(BufferPtr, diag::err_non_ascii) - << FixItHint::CreateRemoval(makeCharRange(*this, BufferPtr, CurPtr)); - + diagnoseInvalidUnicodeCodepointInIdentifier( + PP->getDiagnostics(), LangOpts, C, + makeCharRange(*this, BufferPtr, CurPtr), /*IsStart*/ true); BufferPtr = CurPtr; return false; } diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp index 85d826ce9c6f7..f012fb72580ed 100644 --- a/clang/lib/Lex/LiteralSupport.cpp +++ b/clang/lib/Lex/LiteralSupport.cpp @@ -1390,14 +1390,14 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, unsigned NumCharsSoFar = buffer_begin - &codepoint_buffer.front(); if (NumCharsSoFar > 1) { - if (isWide()) - PP.Diag(Loc, diag::warn_extraneous_char_constant); - else if (isAscii() && NumCharsSoFar == 4) + if (isAscii() && NumCharsSoFar == 4) PP.Diag(Loc, diag::warn_four_char_character_literal); else if (isAscii()) PP.Diag(Loc, diag::warn_multichar_character_literal); - else - PP.Diag(Loc, diag::err_multichar_utf_character_literal); + else { + PP.Diag(Loc, diag::err_multichar_character_literal) << (isWide() ? 0 : 1); + HadError = true; + } IsMultiChar = true; } else { IsMultiChar = false; diff --git a/clang/lib/Lex/UnicodeCharSets.h b/clang/lib/Lex/UnicodeCharSets.h index 74dd57fdf1184..33fb4d39ebec2 100644 --- a/clang/lib/Lex/UnicodeCharSets.h +++ b/clang/lib/Lex/UnicodeCharSets.h @@ -10,6 +10,344 @@ #include "llvm/Support/UnicodeCharRanges.h" +// Unicode 13 XID_Start +static const llvm::sys::UnicodeCharRange XIDStartRanges[] = { + {0x0041, 0x005A}, {0x0061, 0x007A}, {0x00AA, 0x00AA}, + {0x00B5, 0x00B5}, {0x00BA, 0x00BA}, {0x00C0, 0x00D6}, + {0x00D8, 0x00F6}, {0x00F8, 0x02C1}, {0x02C6, 0x02D1}, + {0x02E0, 0x02E4}, {0x02EC, 0x02EC}, {0x02EE, 0x02EE}, + {0x0370, 0x0374}, {0x0376, 0x0377}, {0x037B, 0x037D}, + {0x037F, 0x037F}, {0x0386, 0x0386}, {0x0388, 0x038A}, + {0x038C, 0x038C}, {0x038E, 0x03A1}, {0x03A3, 0x03F5}, + {0x03F7, 0x0481}, {0x048A, 0x052F}, {0x0531, 0x0556}, + {0x0559, 0x0559}, {0x0560, 0x0588}, {0x05D0, 0x05EA}, + {0x05EF, 0x05F2}, {0x0620, 0x064A}, {0x066E, 0x066F}, + {0x0671, 0x06D3}, {0x06D5, 0x06D5}, {0x06E5, 0x06E6}, + {0x06EE, 0x06EF}, {0x06FA, 0x06FC}, {0x06FF, 0x06FF}, + {0x0710, 0x0710}, {0x0712, 0x072F}, {0x074D, 0x07A5}, + {0x07B1, 0x07B1}, {0x07CA, 0x07EA}, {0x07F4, 0x07F5}, + {0x07FA, 0x07FA}, {0x0800, 0x0815}, {0x081A, 0x081A}, + {0x0824, 0x0824}, {0x0828, 0x0828}, {0x0840, 0x0858}, + {0x0860, 0x086A}, {0x08A0, 0x08B4}, {0x08B6, 0x08C7}, + {0x0904, 0x0939}, {0x093D, 0x093D}, {0x0950, 0x0950}, + {0x0958, 0x0961}, {0x0971, 0x0980}, {0x0985, 0x098C}, + {0x098F, 0x0990}, {0x0993, 0x09A8}, {0x09AA, 0x09B0}, + {0x09B2, 0x09B2}, {0x09B6, 0x09B9}, {0x09BD, 0x09BD}, + {0x09CE, 0x09CE}, {0x09DC, 0x09DD}, {0x09DF, 0x09E1}, + {0x09F0, 0x09F1}, {0x09FC, 0x09FC}, {0x0A05, 0x0A0A}, + {0x0A0F, 0x0A10}, {0x0A13, 0x0A28}, {0x0A2A, 0x0A30}, + {0x0A32, 0x0A33}, {0x0A35, 0x0A36}, {0x0A38, 0x0A39}, + {0x0A59, 0x0A5C}, {0x0A5E, 0x0A5E}, {0x0A72, 0x0A74}, + {0x0A85, 0x0A8D}, {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8}, + {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9}, + {0x0ABD, 0x0ABD}, {0x0AD0, 0x0AD0}, {0x0AE0, 0x0AE1}, + {0x0AF9, 0x0AF9}, {0x0B05, 0x0B0C}, {0x0B0F, 0x0B10}, + {0x0B13, 0x0B28}, {0x0B2A, 0x0B30}, {0x0B32, 0x0B33}, + {0x0B35, 0x0B39}, {0x0B3D, 0x0B3D}, {0x0B5C, 0x0B5D}, + {0x0B5F, 0x0B61}, {0x0B71, 0x0B71}, {0x0B83, 0x0B83}, + {0x0B85, 0x0B8A}, {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, + {0x0B99, 0x0B9A}, {0x0B9C, 0x0B9C}, {0x0B9E, 0x0B9F}, + {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB9}, + {0x0BD0, 0x0BD0}, {0x0C05, 0x0C0C}, {0x0C0E, 0x0C10}, + {0x0C12, 0x0C28}, {0x0C2A, 0x0C39}, {0x0C3D, 0x0C3D}, + {0x0C58, 0x0C5A}, {0x0C60, 0x0C61}, {0x0C80, 0x0C80}, + {0x0C85, 0x0C8C}, {0x0C8E, 0x0C90}, {0x0C92, 0x0CA8}, + {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9}, {0x0CBD, 0x0CBD}, + {0x0CDE, 0x0CDE}, {0x0CE0, 0x0CE1}, {0x0CF1, 0x0CF2}, + {0x0D04, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D3A}, + {0x0D3D, 0x0D3D}, {0x0D4E, 0x0D4E}, {0x0D54, 0x0D56}, + {0x0D5F, 0x0D61}, {0x0D7A, 0x0D7F}, {0x0D85, 0x0D96}, + {0x0D9A, 0x0DB1}, {0x0DB3, 0x0DBB}, {0x0DBD, 0x0DBD}, + {0x0DC0, 0x0DC6}, {0x0E01, 0x0E30}, {0x0E32, 0x0E32}, + {0x0E40, 0x0E46}, {0x0E81, 0x0E82}, {0x0E84, 0x0E84}, + {0x0E86, 0x0E8A}, {0x0E8C, 0x0EA3}, {0x0EA5, 0x0EA5}, + {0x0EA7, 0x0EB0}, {0x0EB2, 0x0EB2}, {0x0EBD, 0x0EBD}, + {0x0EC0, 0x0EC4}, {0x0EC6, 0x0EC6}, {0x0EDC, 0x0EDF}, + {0x0F00, 0x0F00}, {0x0F40, 0x0F47}, {0x0F49, 0x0F6C}, + {0x0F88, 0x0F8C}, {0x1000, 0x102A}, {0x103F, 0x103F}, + {0x1050, 0x1055}, {0x105A, 0x105D}, {0x1061, 0x1061}, + {0x1065, 0x1066}, {0x106E, 0x1070}, {0x1075, 0x1081}, + {0x108E, 0x108E}, {0x10A0, 0x10C5}, {0x10C7, 0x10C7}, + {0x10CD, 0x10CD}, {0x10D0, 0x10FA}, {0x10FC, 0x1248}, + {0x124A, 0x124D}, {0x1250, 0x1256}, {0x1258, 0x1258}, + {0x125A, 0x125D}, {0x1260, 0x1288}, {0x128A, 0x128D}, + {0x1290, 0x12B0}, {0x12B2, 0x12B5}, {0x12B8, 0x12BE}, + {0x12C0, 0x12C0}, {0x12C2, 0x12C5}, {0x12C8, 0x12D6}, + {0x12D8, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x135A}, + {0x1380, 0x138F}, {0x13A0, 0x13F5}, {0x13F8, 0x13FD}, + {0x1401, 0x166C}, {0x166F, 0x167F}, {0x1681, 0x169A}, + {0x16A0, 0x16EA}, {0x16EE, 0x16F8}, {0x1700, 0x170C}, + {0x170E, 0x1711}, {0x1720, 0x1731}, {0x1740, 0x1751}, + {0x1760, 0x176C}, {0x176E, 0x1770}, {0x1780, 0x17B3}, + {0x17D7, 0x17D7}, {0x17DC, 0x17DC}, {0x1820, 0x1878}, + {0x1880, 0x18A8}, {0x18AA, 0x18AA}, {0x18B0, 0x18F5}, + {0x1900, 0x191E}, {0x1950, 0x196D}, {0x1970, 0x1974}, + {0x1980, 0x19AB}, {0x19B0, 0x19C9}, {0x1A00, 0x1A16}, + {0x1A20, 0x1A54}, {0x1AA7, 0x1AA7}, {0x1B05, 0x1B33}, + {0x1B45, 0x1B4B}, {0x1B83, 0x1BA0}, {0x1BAE, 0x1BAF}, + {0x1BBA, 0x1BE5}, {0x1C00, 0x1C23}, {0x1C4D, 0x1C4F}, + {0x1C5A, 0x1C7D}, {0x1C80, 0x1C88}, {0x1C90, 0x1CBA}, + {0x1CBD, 0x1CBF}, {0x1CE9, 0x1CEC}, {0x1CEE, 0x1CF3}, + {0x1CF5, 0x1CF6}, {0x1CFA, 0x1CFA}, {0x1D00, 0x1DBF}, + {0x1E00, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, + {0x1F48, 0x1F4D}, {0x1F50, 0x1F57}, {0x1F59, 0x1F59}, + {0x1F5B, 0x1F5B}, {0x1F5D, 0x1F5D}, {0x1F5F, 0x1F7D}, + {0x1F80, 0x1FB4}, {0x1FB6, 0x1FBC}, {0x1FBE, 0x1FBE}, + {0x1FC2, 0x1FC4}, {0x1FC6, 0x1FCC}, {0x1FD0, 0x1FD3}, + {0x1FD6, 0x1FDB}, {0x1FE0, 0x1FEC}, {0x1FF2, 0x1FF4}, + {0x1FF6, 0x1FFC}, {0x2071, 0x2071}, {0x207F, 0x207F}, + {0x2090, 0x209C}, {0x2102, 0x2102}, {0x2107, 0x2107}, + {0x210A, 0x2113}, {0x2115, 0x2115}, {0x2118, 0x211D}, + {0x2124, 0x2124}, {0x2126, 0x2126}, {0x2128, 0x2128}, + {0x212A, 0x2139}, {0x213C, 0x213F}, {0x2145, 0x2149}, + {0x214E, 0x214E}, {0x2160, 0x2188}, {0x2C00, 0x2C2E}, + {0x2C30, 0x2C5E}, {0x2C60, 0x2CE4}, {0x2CEB, 0x2CEE}, + {0x2CF2, 0x2CF3}, {0x2D00, 0x2D25}, {0x2D27, 0x2D27}, + {0x2D2D, 0x2D2D}, {0x2D30, 0x2D67}, {0x2D6F, 0x2D6F}, + {0x2D80, 0x2D96}, {0x2DA0, 0x2DA6}, {0x2DA8, 0x2DAE}, + {0x2DB0, 0x2DB6}, {0x2DB8, 0x2DBE}, {0x2DC0, 0x2DC6}, + {0x2DC8, 0x2DCE}, {0x2DD0, 0x2DD6}, {0x2DD8, 0x2DDE}, + {0x3005, 0x3007}, {0x3021, 0x3029}, {0x3031, 0x3035}, + {0x3038, 0x303C}, {0x3041, 0x3096}, {0x309D, 0x309F}, + {0x30A1, 0x30FA}, {0x30FC, 0x30FF}, {0x3105, 0x312F}, + {0x3131, 0x318E}, {0x31A0, 0x31BF}, {0x31F0, 0x31FF}, + {0x3400, 0x4DBF}, {0x4E00, 0x9FFC}, {0xA000, 0xA48C}, + {0xA4D0, 0xA4FD}, {0xA500, 0xA60C}, {0xA610, 0xA61F}, + {0xA62A, 0xA62B}, {0xA640, 0xA66E}, {0xA67F, 0xA69D}, + {0xA6A0, 0xA6EF}, {0xA717, 0xA71F}, {0xA722, 0xA788}, + {0xA78B, 0xA7BF}, {0xA7C2, 0xA7CA}, {0xA7F5, 0xA801}, + {0xA803, 0xA805}, {0xA807, 0xA80A}, {0xA80C, 0xA822}, + {0xA840, 0xA873}, {0xA882, 0xA8B3}, {0xA8F2, 0xA8F7}, + {0xA8FB, 0xA8FB}, {0xA8FD, 0xA8FE}, {0xA90A, 0xA925}, + {0xA930, 0xA946}, {0xA960, 0xA97C}, {0xA984, 0xA9B2}, + {0xA9CF, 0xA9CF}, {0xA9E0, 0xA9E4}, {0xA9E6, 0xA9EF}, + {0xA9FA, 0xA9FE}, {0xAA00, 0xAA28}, {0xAA40, 0xAA42}, + {0xAA44, 0xAA4B}, {0xAA60, 0xAA76}, {0xAA7A, 0xAA7A}, + {0xAA7E, 0xAAAF}, {0xAAB1, 0xAAB1}, {0xAAB5, 0xAAB6}, + {0xAAB9, 0xAABD}, {0xAAC0, 0xAAC0}, {0xAAC2, 0xAAC2}, + {0xAADB, 0xAADD}, {0xAAE0, 0xAAEA}, {0xAAF2, 0xAAF4}, + {0xAB01, 0xAB06}, {0xAB09, 0xAB0E}, {0xAB11, 0xAB16}, + {0xAB20, 0xAB26}, {0xAB28, 0xAB2E}, {0xAB30, 0xAB5A}, + {0xAB5C, 0xAB69}, {0xAB70, 0xABE2}, {0xAC00, 0xD7A3}, + {0xD7B0, 0xD7C6}, {0xD7CB, 0xD7FB}, {0xF900, 0xFA6D}, + {0xFA70, 0xFAD9}, {0xFB00, 0xFB06}, {0xFB13, 0xFB17}, + {0xFB1D, 0xFB1D}, {0xFB1F, 0xFB28}, {0xFB2A, 0xFB36}, + {0xFB38, 0xFB3C}, {0xFB3E, 0xFB3E}, {0xFB40, 0xFB41}, + {0xFB43, 0xFB44}, {0xFB46, 0xFBB1}, {0xFBD3, 0xFC5D}, + {0xFC64, 0xFD3D}, {0xFD50, 0xFD8F}, {0xFD92, 0xFDC7}, + {0xFDF0, 0xFDF9}, {0xFE71, 0xFE71}, {0xFE73, 0xFE73}, + {0xFE77, 0xFE77}, {0xFE79, 0xFE79}, {0xFE7B, 0xFE7B}, + {0xFE7D, 0xFE7D}, {0xFE7F, 0xFEFC}, {0xFF21, 0xFF3A}, + {0xFF41, 0xFF5A}, {0xFF66, 0xFF9D}, {0xFFA0, 0xFFBE}, + {0xFFC2, 0xFFC7}, {0xFFCA, 0xFFCF}, {0xFFD2, 0xFFD7}, + {0xFFDA, 0xFFDC}, {0x10000, 0x1000B}, {0x1000D, 0x10026}, + {0x10028, 0x1003A}, {0x1003C, 0x1003D}, {0x1003F, 0x1004D}, + {0x10050, 0x1005D}, {0x10080, 0x100FA}, {0x10140, 0x10174}, + {0x10280, 0x1029C}, {0x102A0, 0x102D0}, {0x10300, 0x1031F}, + {0x1032D, 0x1034A}, {0x10350, 0x10375}, {0x10380, 0x1039D}, + {0x103A0, 0x103C3}, {0x103C8, 0x103CF}, {0x103D1, 0x103D5}, + {0x10400, 0x1049D}, {0x104B0, 0x104D3}, {0x104D8, 0x104FB}, + {0x10500, 0x10527}, {0x10530, 0x10563}, {0x10600, 0x10736}, + {0x10740, 0x10755}, {0x10760, 0x10767}, {0x10800, 0x10805}, + {0x10808, 0x10808}, {0x1080A, 0x10835}, {0x10837, 0x10838}, + {0x1083C, 0x1083C}, {0x1083F, 0x10855}, {0x10860, 0x10876}, + {0x10880, 0x1089E}, {0x108E0, 0x108F2}, {0x108F4, 0x108F5}, + {0x10900, 0x10915}, {0x10920, 0x10939}, {0x10980, 0x109B7}, + {0x109BE, 0x109BF}, {0x10A00, 0x10A00}, {0x10A10, 0x10A13}, + {0x10A15, 0x10A17}, {0x10A19, 0x10A35}, {0x10A60, 0x10A7C}, + {0x10A80, 0x10A9C}, {0x10AC0, 0x10AC7}, {0x10AC9, 0x10AE4}, + {0x10B00, 0x10B35}, {0x10B40, 0x10B55}, {0x10B60, 0x10B72}, + {0x10B80, 0x10B91}, {0x10C00, 0x10C48}, {0x10C80, 0x10CB2}, + {0x10CC0, 0x10CF2}, {0x10D00, 0x10D23}, {0x10E80, 0x10EA9}, + {0x10EB0, 0x10EB1}, {0x10F00, 0x10F1C}, {0x10F27, 0x10F27}, + {0x10F30, 0x10F45}, {0x10FB0, 0x10FC4}, {0x10FE0, 0x10FF6}, + {0x11003, 0x11037}, {0x11083, 0x110AF}, {0x110D0, 0x110E8}, + {0x11103, 0x11126}, {0x11144, 0x11144}, {0x11147, 0x11147}, + {0x11150, 0x11172}, {0x11176, 0x11176}, {0x11183, 0x111B2}, + {0x111C1, 0x111C4}, {0x111DA, 0x111DA}, {0x111DC, 0x111DC}, + {0x11200, 0x11211}, {0x11213, 0x1122B}, {0x11280, 0x11286}, + {0x11288, 0x11288}, {0x1128A, 0x1128D}, {0x1128F, 0x1129D}, + {0x1129F, 0x112A8}, {0x112B0, 0x112DE}, {0x11305, 0x1130C}, + {0x1130F, 0x11310}, {0x11313, 0x11328}, {0x1132A, 0x11330}, + {0x11332, 0x11333}, {0x11335, 0x11339}, {0x1133D, 0x1133D}, + {0x11350, 0x11350}, {0x1135D, 0x11361}, {0x11400, 0x11434}, + {0x11447, 0x1144A}, {0x1145F, 0x11461}, {0x11480, 0x114AF}, + {0x114C4, 0x114C5}, {0x114C7, 0x114C7}, {0x11580, 0x115AE}, + {0x115D8, 0x115DB}, {0x11600, 0x1162F}, {0x11644, 0x11644}, + {0x11680, 0x116AA}, {0x116B8, 0x116B8}, {0x11700, 0x1171A}, + {0x11800, 0x1182B}, {0x118A0, 0x118DF}, {0x118FF, 0x11906}, + {0x11909, 0x11909}, {0x1190C, 0x11913}, {0x11915, 0x11916}, + {0x11918, 0x1192F}, {0x1193F, 0x1193F}, {0x11941, 0x11941}, + {0x119A0, 0x119A7}, {0x119AA, 0x119D0}, {0x119E1, 0x119E1}, + {0x119E3, 0x119E3}, {0x11A00, 0x11A00}, {0x11A0B, 0x11A32}, + {0x11A3A, 0x11A3A}, {0x11A50, 0x11A50}, {0x11A5C, 0x11A89}, + {0x11A9D, 0x11A9D}, {0x11AC0, 0x11AF8}, {0x11C00, 0x11C08}, + {0x11C0A, 0x11C2E}, {0x11C40, 0x11C40}, {0x11C72, 0x11C8F}, + {0x11D00, 0x11D06}, {0x11D08, 0x11D09}, {0x11D0B, 0x11D30}, + {0x11D46, 0x11D46}, {0x11D60, 0x11D65}, {0x11D67, 0x11D68}, + {0x11D6A, 0x11D89}, {0x11D98, 0x11D98}, {0x11EE0, 0x11EF2}, + {0x11FB0, 0x11FB0}, {0x12000, 0x12399}, {0x12400, 0x1246E}, + {0x12480, 0x12543}, {0x13000, 0x1342E}, {0x14400, 0x14646}, + {0x16800, 0x16A38}, {0x16A40, 0x16A5E}, {0x16AD0, 0x16AED}, + {0x16B00, 0x16B2F}, {0x16B40, 0x16B43}, {0x16B63, 0x16B77}, + {0x16B7D, 0x16B8F}, {0x16E40, 0x16E7F}, {0x16F00, 0x16F4A}, + {0x16F50, 0x16F50}, {0x16F93, 0x16F9F}, {0x16FE0, 0x16FE1}, + {0x16FE3, 0x16FE3}, {0x17000, 0x187F7}, {0x18800, 0x18CD5}, + {0x18D00, 0x18D08}, {0x1B000, 0x1B11E}, {0x1B150, 0x1B152}, + {0x1B164, 0x1B167}, {0x1B170, 0x1B2FB}, {0x1BC00, 0x1BC6A}, + {0x1BC70, 0x1BC7C}, {0x1BC80, 0x1BC88}, {0x1BC90, 0x1BC99}, + {0x1D400, 0x1D454}, {0x1D456, 0x1D49C}, {0x1D49E, 0x1D49F}, + {0x1D4A2, 0x1D4A2}, {0x1D4A5, 0x1D4A6}, {0x1D4A9, 0x1D4AC}, + {0x1D4AE, 0x1D4B9}, {0x1D4BB, 0x1D4BB}, {0x1D4BD, 0x1D4C3}, + {0x1D4C5, 0x1D505}, {0x1D507, 0x1D50A}, {0x1D50D, 0x1D514}, + {0x1D516, 0x1D51C}, {0x1D51E, 0x1D539}, {0x1D53B, 0x1D53E}, + {0x1D540, 0x1D544}, {0x1D546, 0x1D546}, {0x1D54A, 0x1D550}, + {0x1D552, 0x1D6A5}, {0x1D6A8, 0x1D6C0}, {0x1D6C2, 0x1D6DA}, + {0x1D6DC, 0x1D6FA}, {0x1D6FC, 0x1D714}, {0x1D716, 0x1D734}, + {0x1D736, 0x1D74E}, {0x1D750, 0x1D76E}, {0x1D770, 0x1D788}, + {0x1D78A, 0x1D7A8}, {0x1D7AA, 0x1D7C2}, {0x1D7C4, 0x1D7CB}, + {0x1E100, 0x1E12C}, {0x1E137, 0x1E13D}, {0x1E14E, 0x1E14E}, + {0x1E2C0, 0x1E2EB}, {0x1E800, 0x1E8C4}, {0x1E900, 0x1E943}, + {0x1E94B, 0x1E94B}, {0x1EE00, 0x1EE03}, {0x1EE05, 0x1EE1F}, + {0x1EE21, 0x1EE22}, {0x1EE24, 0x1EE24}, {0x1EE27, 0x1EE27}, + {0x1EE29, 0x1EE32}, {0x1EE34, 0x1EE37}, {0x1EE39, 0x1EE39}, + {0x1EE3B, 0x1EE3B}, {0x1EE42, 0x1EE42}, {0x1EE47, 0x1EE47}, + {0x1EE49, 0x1EE49}, {0x1EE4B, 0x1EE4B}, {0x1EE4D, 0x1EE4F}, + {0x1EE51, 0x1EE52}, {0x1EE54, 0x1EE54}, {0x1EE57, 0x1EE57}, + {0x1EE59, 0x1EE59}, {0x1EE5B, 0x1EE5B}, {0x1EE5D, 0x1EE5D}, + {0x1EE5F, 0x1EE5F}, {0x1EE61, 0x1EE62}, {0x1EE64, 0x1EE64}, + {0x1EE67, 0x1EE6A}, {0x1EE6C, 0x1EE72}, {0x1EE74, 0x1EE77}, + {0x1EE79, 0x1EE7C}, {0x1EE7E, 0x1EE7E}, {0x1EE80, 0x1EE89}, + {0x1EE8B, 0x1EE9B}, {0x1EEA1, 0x1EEA3}, {0x1EEA5, 0x1EEA9}, + {0x1EEAB, 0x1EEBB}, {0x20000, 0x2A6DD}, {0x2A700, 0x2B734}, + {0x2B740, 0x2B81D}, {0x2B820, 0x2CEA1}, {0x2CEB0, 0x2EBE0}, + {0x2F800, 0x2FA1D}, {0x30000, 0x3134A}}; + +// Unicode 13 XID_Continue, excluding XID_Start +// The Unicode Property XID_Continue is a super set of XID_Start. +// To save Space, the table below only contains the codepoints +// that are not also in XID_Start. +static const llvm::sys::UnicodeCharRange XIDContinueRanges[] = { + {0x0030, 0x0039}, {0x005F, 0x005F}, {0x00B7, 0x00B7}, + {0x0300, 0x036F}, {0x0387, 0x0387}, {0x0483, 0x0487}, + {0x0591, 0x05BD}, {0x05BF, 0x05BF}, {0x05C1, 0x05C2}, + {0x05C4, 0x05C5}, {0x05C7, 0x05C7}, {0x0610, 0x061A}, + {0x064B, 0x0669}, {0x0670, 0x0670}, {0x06D6, 0x06DC}, + {0x06DF, 0x06E4}, {0x06E7, 0x06E8}, {0x06EA, 0x06ED}, + {0x06F0, 0x06F9}, {0x0711, 0x0711}, {0x0730, 0x074A}, + {0x07A6, 0x07B0}, {0x07C0, 0x07C9}, {0x07EB, 0x07F3}, + {0x07FD, 0x07FD}, {0x0816, 0x0819}, {0x081B, 0x0823}, + {0x0825, 0x0827}, {0x0829, 0x082D}, {0x0859, 0x085B}, + {0x08D3, 0x08E1}, {0x08E3, 0x0903}, {0x093A, 0x093C}, + {0x093E, 0x094F}, {0x0951, 0x0957}, {0x0962, 0x0963}, + {0x0966, 0x096F}, {0x0981, 0x0983}, {0x09BC, 0x09BC}, + {0x09BE, 0x09C4}, {0x09C7, 0x09C8}, {0x09CB, 0x09CD}, + {0x09D7, 0x09D7}, {0x09E2, 0x09E3}, {0x09E6, 0x09EF}, + {0x09FE, 0x09FE}, {0x0A01, 0x0A03}, {0x0A3C, 0x0A3C}, + {0x0A3E, 0x0A42}, {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, + {0x0A51, 0x0A51}, {0x0A66, 0x0A71}, {0x0A75, 0x0A75}, + {0x0A81, 0x0A83}, {0x0ABC, 0x0ABC}, {0x0ABE, 0x0AC5}, + {0x0AC7, 0x0AC9}, {0x0ACB, 0x0ACD}, {0x0AE2, 0x0AE3}, + {0x0AE6, 0x0AEF}, {0x0AFA, 0x0AFF}, {0x0B01, 0x0B03}, + {0x0B3C, 0x0B3C}, {0x0B3E, 0x0B44}, {0x0B47, 0x0B48}, + {0x0B4B, 0x0B4D}, {0x0B55, 0x0B57}, {0x0B62, 0x0B63}, + {0x0B66, 0x0B6F}, {0x0B82, 0x0B82}, {0x0BBE, 0x0BC2}, + {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0BD7, 0x0BD7}, + {0x0BE6, 0x0BEF}, {0x0C00, 0x0C04}, {0x0C3E, 0x0C44}, + {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D}, {0x0C55, 0x0C56}, + {0x0C62, 0x0C63}, {0x0C66, 0x0C6F}, {0x0C81, 0x0C83}, + {0x0CBC, 0x0CBC}, {0x0CBE, 0x0CC4}, {0x0CC6, 0x0CC8}, + {0x0CCA, 0x0CCD}, {0x0CD5, 0x0CD6}, {0x0CE2, 0x0CE3}, + {0x0CE6, 0x0CEF}, {0x0D00, 0x0D03}, {0x0D3B, 0x0D3C}, + {0x0D3E, 0x0D44}, {0x0D46, 0x0D48}, {0x0D4A, 0x0D4D}, + {0x0D57, 0x0D57}, {0x0D62, 0x0D63}, {0x0D66, 0x0D6F}, + {0x0D81, 0x0D83}, {0x0DCA, 0x0DCA}, {0x0DCF, 0x0DD4}, + {0x0DD6, 0x0DD6}, {0x0DD8, 0x0DDF}, {0x0DE6, 0x0DEF}, + {0x0DF2, 0x0DF3}, {0x0E31, 0x0E31}, {0x0E33, 0x0E3A}, + {0x0E47, 0x0E4E}, {0x0E50, 0x0E59}, {0x0EB1, 0x0EB1}, + {0x0EB3, 0x0EBC}, {0x0EC8, 0x0ECD}, {0x0ED0, 0x0ED9}, + {0x0F18, 0x0F19}, {0x0F20, 0x0F29}, {0x0F35, 0x0F35}, + {0x0F37, 0x0F37}, {0x0F39, 0x0F39}, {0x0F3E, 0x0F3F}, + {0x0F71, 0x0F84}, {0x0F86, 0x0F87}, {0x0F8D, 0x0F97}, + {0x0F99, 0x0FBC}, {0x0FC6, 0x0FC6}, {0x102B, 0x103E}, + {0x1040, 0x1049}, {0x1056, 0x1059}, {0x105E, 0x1060}, + {0x1062, 0x1064}, {0x1067, 0x106D}, {0x1071, 0x1074}, + {0x1082, 0x108D}, {0x108F, 0x109D}, {0x135D, 0x135F}, + {0x1369, 0x1371}, {0x1712, 0x1714}, {0x1732, 0x1734}, + {0x1752, 0x1753}, {0x1772, 0x1773}, {0x17B4, 0x17D3}, + {0x17DD, 0x17DD}, {0x17E0, 0x17E9}, {0x180B, 0x180D}, + {0x1810, 0x1819}, {0x18A9, 0x18A9}, {0x1920, 0x192B}, + {0x1930, 0x193B}, {0x1946, 0x194F}, {0x19D0, 0x19DA}, + {0x1A17, 0x1A1B}, {0x1A55, 0x1A5E}, {0x1A60, 0x1A7C}, + {0x1A7F, 0x1A89}, {0x1A90, 0x1A99}, {0x1AB0, 0x1ABD}, + {0x1ABF, 0x1AC0}, {0x1B00, 0x1B04}, {0x1B34, 0x1B44}, + {0x1B50, 0x1B59}, {0x1B6B, 0x1B73}, {0x1B80, 0x1B82}, + {0x1BA1, 0x1BAD}, {0x1BB0, 0x1BB9}, {0x1BE6, 0x1BF3}, + {0x1C24, 0x1C37}, {0x1C40, 0x1C49}, {0x1C50, 0x1C59}, + {0x1CD0, 0x1CD2}, {0x1CD4, 0x1CE8}, {0x1CED, 0x1CED}, + {0x1CF4, 0x1CF4}, {0x1CF7, 0x1CF9}, {0x1DC0, 0x1DF9}, + {0x1DFB, 0x1DFF}, {0x203F, 0x2040}, {0x2054, 0x2054}, + {0x20D0, 0x20DC}, {0x20E1, 0x20E1}, {0x20E5, 0x20F0}, + {0x2CEF, 0x2CF1}, {0x2D7F, 0x2D7F}, {0x2DE0, 0x2DFF}, + {0x302A, 0x302F}, {0x3099, 0x309A}, {0xA620, 0xA629}, + {0xA66F, 0xA66F}, {0xA674, 0xA67D}, {0xA69E, 0xA69F}, + {0xA6F0, 0xA6F1}, {0xA802, 0xA802}, {0xA806, 0xA806}, + {0xA80B, 0xA80B}, {0xA823, 0xA827}, {0xA82C, 0xA82C}, + {0xA880, 0xA881}, {0xA8B4, 0xA8C5}, {0xA8D0, 0xA8D9}, + {0xA8E0, 0xA8F1}, {0xA8FF, 0xA909}, {0xA926, 0xA92D}, + {0xA947, 0xA953}, {0xA980, 0xA983}, {0xA9B3, 0xA9C0}, + {0xA9D0, 0xA9D9}, {0xA9E5, 0xA9E5}, {0xA9F0, 0xA9F9}, + {0xAA29, 0xAA36}, {0xAA43, 0xAA43}, {0xAA4C, 0xAA4D}, + {0xAA50, 0xAA59}, {0xAA7B, 0xAA7D}, {0xAAB0, 0xAAB0}, + {0xAAB2, 0xAAB4}, {0xAAB7, 0xAAB8}, {0xAABE, 0xAABF}, + {0xAAC1, 0xAAC1}, {0xAAEB, 0xAAEF}, {0xAAF5, 0xAAF6}, + {0xABE3, 0xABEA}, {0xABEC, 0xABED}, {0xABF0, 0xABF9}, + {0xFB1E, 0xFB1E}, {0xFE00, 0xFE0F}, {0xFE20, 0xFE2F}, + {0xFE33, 0xFE34}, {0xFE4D, 0xFE4F}, {0xFF10, 0xFF19}, + {0xFF3F, 0xFF3F}, {0xFF9E, 0xFF9F}, {0x101FD, 0x101FD}, + {0x102E0, 0x102E0}, {0x10376, 0x1037A}, {0x104A0, 0x104A9}, + {0x10A01, 0x10A03}, {0x10A05, 0x10A06}, {0x10A0C, 0x10A0F}, + {0x10A38, 0x10A3A}, {0x10A3F, 0x10A3F}, {0x10AE5, 0x10AE6}, + {0x10D24, 0x10D27}, {0x10D30, 0x10D39}, {0x10EAB, 0x10EAC}, + {0x10F46, 0x10F50}, {0x11000, 0x11002}, {0x11038, 0x11046}, + {0x11066, 0x1106F}, {0x1107F, 0x11082}, {0x110B0, 0x110BA}, + {0x110F0, 0x110F9}, {0x11100, 0x11102}, {0x11127, 0x11134}, + {0x11136, 0x1113F}, {0x11145, 0x11146}, {0x11173, 0x11173}, + {0x11180, 0x11182}, {0x111B3, 0x111C0}, {0x111C9, 0x111CC}, + {0x111CE, 0x111D9}, {0x1122C, 0x11237}, {0x1123E, 0x1123E}, + {0x112DF, 0x112EA}, {0x112F0, 0x112F9}, {0x11300, 0x11303}, + {0x1133B, 0x1133C}, {0x1133E, 0x11344}, {0x11347, 0x11348}, + {0x1134B, 0x1134D}, {0x11357, 0x11357}, {0x11362, 0x11363}, + {0x11366, 0x1136C}, {0x11370, 0x11374}, {0x11435, 0x11446}, + {0x11450, 0x11459}, {0x1145E, 0x1145E}, {0x114B0, 0x114C3}, + {0x114D0, 0x114D9}, {0x115AF, 0x115B5}, {0x115B8, 0x115C0}, + {0x115DC, 0x115DD}, {0x11630, 0x11640}, {0x11650, 0x11659}, + {0x116AB, 0x116B7}, {0x116C0, 0x116C9}, {0x1171D, 0x1172B}, + {0x11730, 0x11739}, {0x1182C, 0x1183A}, {0x118E0, 0x118E9}, + {0x11930, 0x11935}, {0x11937, 0x11938}, {0x1193B, 0x1193E}, + {0x11940, 0x11940}, {0x11942, 0x11943}, {0x11950, 0x11959}, + {0x119D1, 0x119D7}, {0x119DA, 0x119E0}, {0x119E4, 0x119E4}, + {0x11A01, 0x11A0A}, {0x11A33, 0x11A39}, {0x11A3B, 0x11A3E}, + {0x11A47, 0x11A47}, {0x11A51, 0x11A5B}, {0x11A8A, 0x11A99}, + {0x11C2F, 0x11C36}, {0x11C38, 0x11C3F}, {0x11C50, 0x11C59}, + {0x11C92, 0x11CA7}, {0x11CA9, 0x11CB6}, {0x11D31, 0x11D36}, + {0x11D3A, 0x11D3A}, {0x11D3C, 0x11D3D}, {0x11D3F, 0x11D45}, + {0x11D47, 0x11D47}, {0x11D50, 0x11D59}, {0x11D8A, 0x11D8E}, + {0x11D90, 0x11D91}, {0x11D93, 0x11D97}, {0x11DA0, 0x11DA9}, + {0x11EF3, 0x11EF6}, {0x16A60, 0x16A69}, {0x16AF0, 0x16AF4}, + {0x16B30, 0x16B36}, {0x16B50, 0x16B59}, {0x16F4F, 0x16F4F}, + {0x16F51, 0x16F87}, {0x16F8F, 0x16F92}, {0x16FE4, 0x16FE4}, + {0x16FF0, 0x16FF1}, {0x1BC9D, 0x1BC9E}, {0x1D165, 0x1D169}, + {0x1D16D, 0x1D172}, {0x1D17B, 0x1D182}, {0x1D185, 0x1D18B}, + {0x1D1AA, 0x1D1AD}, {0x1D242, 0x1D244}, {0x1D7CE, 0x1D7FF}, + {0x1DA00, 0x1DA36}, {0x1DA3B, 0x1DA6C}, {0x1DA75, 0x1DA75}, + {0x1DA84, 0x1DA84}, {0x1DA9B, 0x1DA9F}, {0x1DAA1, 0x1DAAF}, + {0x1E000, 0x1E006}, {0x1E008, 0x1E018}, {0x1E01B, 0x1E021}, + {0x1E023, 0x1E024}, {0x1E026, 0x1E02A}, {0x1E130, 0x1E136}, + {0x1E140, 0x1E149}, {0x1E2EC, 0x1E2F9}, {0x1E8D0, 0x1E8D6}, + {0x1E944, 0x1E94A}, {0x1E950, 0x1E959}, {0x1FBF0, 0x1FBF9}, + {0xE0100, 0xE01EF}}; + // C11 D.1, C++11 [charname.allowed] static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[] = { // 1 @@ -40,127 +378,6 @@ static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[] = { { 0xD0000, 0xDFFFD }, { 0xE0000, 0xEFFFD } }; -// C++03 [extendid] -// Note that this is not the same as C++98, but we don't distinguish C++98 -// and C++03 in Clang. -static const llvm::sys::UnicodeCharRange CXX03AllowedIDCharRanges[] = { - // Latin - { 0x00C0, 0x00D6 }, { 0x00D8, 0x00F6 }, { 0x00F8, 0x01F5 }, - { 0x01FA, 0x0217 }, { 0x0250, 0x02A8 }, - - // Greek - { 0x0384, 0x0384 }, { 0x0388, 0x038A }, { 0x038C, 0x038C }, - { 0x038E, 0x03A1 }, { 0x03A3, 0x03CE }, { 0x03D0, 0x03D6 }, - { 0x03DA, 0x03DA }, { 0x03DC, 0x03DC }, { 0x03DE, 0x03DE }, - { 0x03E0, 0x03E0 }, { 0x03E2, 0x03F3 }, - - // Cyrillic - { 0x0401, 0x040D }, { 0x040F, 0x044F }, { 0x0451, 0x045C }, - { 0x045E, 0x0481 }, { 0x0490, 0x04C4 }, { 0x04C7, 0x04C8 }, - { 0x04CB, 0x04CC }, { 0x04D0, 0x04EB }, { 0x04EE, 0x04F5 }, - { 0x04F8, 0x04F9 }, - - // Armenian - { 0x0531, 0x0556 }, { 0x0561, 0x0587 }, - - // Hebrew - { 0x05D0, 0x05EA }, { 0x05F0, 0x05F4 }, - - // Arabic - { 0x0621, 0x063A }, { 0x0640, 0x0652 }, { 0x0670, 0x06B7 }, - { 0x06BA, 0x06BE }, { 0x06C0, 0x06CE }, { 0x06E5, 0x06E7 }, - - // Devanagari - { 0x0905, 0x0939 }, { 0x0958, 0x0962 }, - - // Bengali - { 0x0985, 0x098C }, { 0x098F, 0x0990 }, { 0x0993, 0x09A8 }, - { 0x09AA, 0x09B0 }, { 0x09B2, 0x09B2 }, { 0x09B6, 0x09B9 }, - { 0x09DC, 0x09DD }, { 0x09DF, 0x09E1 }, { 0x09F0, 0x09F1 }, - - // Gurmukhi - { 0x0A05, 0x0A0A }, { 0x0A0F, 0x0A10 }, { 0x0A13, 0x0A28 }, - { 0x0A2A, 0x0A30 }, { 0x0A32, 0x0A33 }, { 0x0A35, 0x0A36 }, - { 0x0A38, 0x0A39 }, { 0x0A59, 0x0A5C }, { 0x0A5E, 0x0A5E }, - - // Gujarti - { 0x0A85, 0x0A8B }, { 0x0A8D, 0x0A8D }, { 0x0A8F, 0x0A91 }, - { 0x0A93, 0x0AA8 }, { 0x0AAA, 0x0AB0 }, { 0x0AB2, 0x0AB3 }, - { 0x0AB5, 0x0AB9 }, { 0x0AE0, 0x0AE0 }, - - // Oriya - { 0x0B05, 0x0B0C }, { 0x0B0F, 0x0B10 }, { 0x0B13, 0x0B28 }, - { 0x0B2A, 0x0B30 }, { 0x0B32, 0x0B33 }, { 0x0B36, 0x0B39 }, - { 0x0B5C, 0x0B5D }, { 0x0B5F, 0x0B61 }, - - // Tamil - { 0x0B85, 0x0B8A }, { 0x0B8E, 0x0B90 }, { 0x0B92, 0x0B95 }, - { 0x0B99, 0x0B9A }, { 0x0B9C, 0x0B9C }, { 0x0B9E, 0x0B9F }, - { 0x0BA3, 0x0BA4 }, { 0x0BA8, 0x0BAA }, { 0x0BAE, 0x0BB5 }, - { 0x0BB7, 0x0BB9 }, - - // Telugu - { 0x0C05, 0x0C0C }, { 0x0C0E, 0x0C10 }, { 0x0C12, 0x0C28 }, - { 0x0C2A, 0x0C33 }, { 0x0C35, 0x0C39 }, { 0x0C60, 0x0C61 }, - - // Kannada - { 0x0C85, 0x0C8C }, { 0x0C8E, 0x0C90 }, { 0x0C92, 0x0CA8 }, - { 0x0CAA, 0x0CB3 }, { 0x0CB5, 0x0CB9 }, { 0x0CE0, 0x0CE1 }, - - // Malayam - { 0x0D05, 0x0D0C }, { 0x0D0E, 0x0D10 }, { 0x0D12, 0x0D28 }, - { 0x0D2A, 0x0D39 }, { 0x0D60, 0x0D61 }, - - // Thai - { 0x0E01, 0x0E30 }, { 0x0E32, 0x0E33 }, { 0x0E40, 0x0E46 }, - { 0x0E4F, 0x0E5B }, - - // Lao - { 0x0E81, 0x0E82 }, { 0x0E84, 0x0E84 }, { 0x0E87, 0x0E87 }, - { 0x0E88, 0x0E88 }, { 0x0E8A, 0x0E8A }, { 0x0E8D, 0x0E8D }, - { 0x0E94, 0x0E97 }, { 0x0E99, 0x0E9F }, { 0x0EA1, 0x0EA3 }, - { 0x0EA5, 0x0EA5 }, { 0x0EA7, 0x0EA7 }, { 0x0EAA, 0x0EAA }, - { 0x0EAB, 0x0EAB }, { 0x0EAD, 0x0EB0 }, { 0x0EB2, 0x0EB2 }, - { 0x0EB3, 0x0EB3 }, { 0x0EBD, 0x0EBD }, { 0x0EC0, 0x0EC4 }, - { 0x0EC6, 0x0EC6 }, - - // Georgian - { 0x10A0, 0x10C5 }, { 0x10D0, 0x10F6 }, - - // Hangul - { 0x1100, 0x1159 }, { 0x1161, 0x11A2 }, { 0x11A8, 0x11F9 }, - - // Latin (2) - { 0x1E00, 0x1E9A }, { 0x1EA0, 0x1EF9 }, - - // Greek (2) - { 0x1F00, 0x1F15 }, { 0x1F18, 0x1F1D }, { 0x1F20, 0x1F45 }, - { 0x1F48, 0x1F4D }, { 0x1F50, 0x1F57 }, { 0x1F59, 0x1F59 }, - { 0x1F5B, 0x1F5B }, { 0x1F5D, 0x1F5D }, { 0x1F5F, 0x1F7D }, - { 0x1F80, 0x1FB4 }, { 0x1FB6, 0x1FBC }, { 0x1FC2, 0x1FC4 }, - { 0x1FC6, 0x1FCC }, { 0x1FD0, 0x1FD3 }, { 0x1FD6, 0x1FDB }, - { 0x1FE0, 0x1FEC }, { 0x1FF2, 0x1FF4 }, { 0x1FF6, 0x1FFC }, - - // Hiragana - { 0x3041, 0x3094 }, { 0x309B, 0x309E }, - - // Katakana - { 0x30A1, 0x30FE }, - - // Bopmofo [sic] - { 0x3105, 0x312C }, - - // CJK Unified Ideographs - { 0x4E00, 0x9FA5 }, { 0xF900, 0xFA2D }, { 0xFB1F, 0xFB36 }, - { 0xFB38, 0xFB3C }, { 0xFB3E, 0xFB3E }, { 0xFB40, 0xFB41 }, - { 0xFB42, 0xFB44 }, { 0xFB46, 0xFBB1 }, { 0xFBD3, 0xFD3F }, - { 0xFD50, 0xFD8F }, { 0xFD92, 0xFDC7 }, { 0xFDF0, 0xFDFB }, - { 0xFE70, 0xFE72 }, { 0xFE74, 0xFE74 }, { 0xFE76, 0xFEFC }, - { 0xFF21, 0xFF3A }, { 0xFF41, 0xFF5A }, { 0xFF66, 0xFFBE }, - { 0xFFC2, 0xFFC7 }, { 0xFFCA, 0xFFCF }, { 0xFFD2, 0xFFD7 }, - { 0xFFDA, 0xFFDC } -}; - // C99 Annex D static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[] = { // Latin (1) diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index 4896640a4eabe..7870b3d30d70e 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -2401,7 +2401,7 @@ Parser::ParseExprAfterUnaryExprOrTypeTrait(const Token &OpTok, /// a parameter. ExprResult Parser::ParseSYCLUniqueStableNameExpression() { assert(Tok.is(tok::kw___builtin_sycl_unique_stable_name) && - "Not __bulitin_sycl_unique_stable_name"); + "Not __builtin_sycl_unique_stable_name"); SourceLocation OpLoc = ConsumeToken(); BalancedDelimiterTracker T(*this, tok::l_paren); diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index aa2602c8d9256..99ce143d3559d 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -1125,7 +1125,7 @@ namespace { // unreachable in all instantiations of the template. if (!IsTemplateInstantiation) S.Diag(AS->getBeginLoc(), - diag::warn_fallthrough_attr_unreachable); + diag::warn_unreachable_fallthrough_attr); markFallthroughVisited(AS); ++AnnotatedCnt; break; diff --git a/clang/lib/Sema/DeclSpec.cpp b/clang/lib/Sema/DeclSpec.cpp index 72d9ea6dd3bf3..7fee9545f8b28 100644 --- a/clang/lib/Sema/DeclSpec.cpp +++ b/clang/lib/Sema/DeclSpec.cpp @@ -1300,8 +1300,8 @@ void DeclSpec::Finish(Sema &S, const PrintingPolicy &Policy) { if (!S.getLangOpts().CPlusPlus) S.Diag(TSTLoc, diag::ext_integer_complex); } else if (TypeSpecType != TST_float && TypeSpecType != TST_double && - TypeSpecType != TST_float128) { - // FIXME: _Float16, __fp16? + TypeSpecType != TST_float128 && TypeSpecType != TST_float16) { + // FIXME: __fp16? S.Diag(TSCLoc, diag::err_invalid_complex_spec) << getSpecifierName((TST)TypeSpecType, Policy); TypeSpecComplex = TSC_unspecified; diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index ecf94a1d78070..7332ca06d5d9c 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3903,14 +3903,22 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_vcvttss2si64: case X86::BI__builtin_ia32_vcvttss2usi32: case X86::BI__builtin_ia32_vcvttss2usi64: + case X86::BI__builtin_ia32_vcvttsh2si32: + case X86::BI__builtin_ia32_vcvttsh2si64: + case X86::BI__builtin_ia32_vcvttsh2usi32: + case X86::BI__builtin_ia32_vcvttsh2usi64: ArgNum = 1; break; case X86::BI__builtin_ia32_maxpd512: case X86::BI__builtin_ia32_maxps512: case X86::BI__builtin_ia32_minpd512: case X86::BI__builtin_ia32_minps512: + case X86::BI__builtin_ia32_maxph512: + case X86::BI__builtin_ia32_minph512: ArgNum = 2; break; + case X86::BI__builtin_ia32_vcvtph2pd512_mask: + case X86::BI__builtin_ia32_vcvtph2psx512_mask: case X86::BI__builtin_ia32_cvtps2pd512_mask: case X86::BI__builtin_ia32_cvttpd2dq512_mask: case X86::BI__builtin_ia32_cvttpd2qq512_mask: @@ -3920,16 +3928,24 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_cvttps2qq512_mask: case X86::BI__builtin_ia32_cvttps2udq512_mask: case X86::BI__builtin_ia32_cvttps2uqq512_mask: + case X86::BI__builtin_ia32_vcvttph2w512_mask: + case X86::BI__builtin_ia32_vcvttph2uw512_mask: + case X86::BI__builtin_ia32_vcvttph2dq512_mask: + case X86::BI__builtin_ia32_vcvttph2udq512_mask: + case X86::BI__builtin_ia32_vcvttph2qq512_mask: + case X86::BI__builtin_ia32_vcvttph2uqq512_mask: case X86::BI__builtin_ia32_exp2pd_mask: case X86::BI__builtin_ia32_exp2ps_mask: case X86::BI__builtin_ia32_getexppd512_mask: case X86::BI__builtin_ia32_getexpps512_mask: + case X86::BI__builtin_ia32_getexpph512_mask: case X86::BI__builtin_ia32_rcp28pd_mask: case X86::BI__builtin_ia32_rcp28ps_mask: case X86::BI__builtin_ia32_rsqrt28pd_mask: case X86::BI__builtin_ia32_rsqrt28ps_mask: case X86::BI__builtin_ia32_vcomisd: case X86::BI__builtin_ia32_vcomiss: + case X86::BI__builtin_ia32_vcomish: case X86::BI__builtin_ia32_vcvtph2ps512_mask: ArgNum = 3; break; @@ -3937,21 +3953,30 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_cmpps512_mask: case X86::BI__builtin_ia32_cmpsd_mask: case X86::BI__builtin_ia32_cmpss_mask: + case X86::BI__builtin_ia32_cmpsh_mask: + case X86::BI__builtin_ia32_vcvtsh2sd_round_mask: + case X86::BI__builtin_ia32_vcvtsh2ss_round_mask: case X86::BI__builtin_ia32_cvtss2sd_round_mask: case X86::BI__builtin_ia32_getexpsd128_round_mask: case X86::BI__builtin_ia32_getexpss128_round_mask: + case X86::BI__builtin_ia32_getexpsh128_round_mask: case X86::BI__builtin_ia32_getmantpd512_mask: case X86::BI__builtin_ia32_getmantps512_mask: + case X86::BI__builtin_ia32_getmantph512_mask: case X86::BI__builtin_ia32_maxsd_round_mask: case X86::BI__builtin_ia32_maxss_round_mask: + case X86::BI__builtin_ia32_maxsh_round_mask: case X86::BI__builtin_ia32_minsd_round_mask: case X86::BI__builtin_ia32_minss_round_mask: + case X86::BI__builtin_ia32_minsh_round_mask: case X86::BI__builtin_ia32_rcp28sd_round_mask: case X86::BI__builtin_ia32_rcp28ss_round_mask: case X86::BI__builtin_ia32_reducepd512_mask: case X86::BI__builtin_ia32_reduceps512_mask: + case X86::BI__builtin_ia32_reduceph512_mask: case X86::BI__builtin_ia32_rndscalepd_mask: case X86::BI__builtin_ia32_rndscaleps_mask: + case X86::BI__builtin_ia32_rndscaleph_mask: case X86::BI__builtin_ia32_rsqrt28sd_round_mask: case X86::BI__builtin_ia32_rsqrt28ss_round_mask: ArgNum = 4; @@ -3966,14 +3991,17 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_fixupimmss_maskz: case X86::BI__builtin_ia32_getmantsd_round_mask: case X86::BI__builtin_ia32_getmantss_round_mask: + case X86::BI__builtin_ia32_getmantsh_round_mask: case X86::BI__builtin_ia32_rangepd512_mask: case X86::BI__builtin_ia32_rangeps512_mask: case X86::BI__builtin_ia32_rangesd128_round_mask: case X86::BI__builtin_ia32_rangess128_round_mask: case X86::BI__builtin_ia32_reducesd_mask: case X86::BI__builtin_ia32_reducess_mask: + case X86::BI__builtin_ia32_reducesh_mask: case X86::BI__builtin_ia32_rndscalesd_round_mask: case X86::BI__builtin_ia32_rndscaless_round_mask: + case X86::BI__builtin_ia32_rndscalesh_round_mask: ArgNum = 5; break; case X86::BI__builtin_ia32_vcvtsd2si64: @@ -3984,11 +4012,20 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_vcvtss2si64: case X86::BI__builtin_ia32_vcvtss2usi32: case X86::BI__builtin_ia32_vcvtss2usi64: + case X86::BI__builtin_ia32_vcvtsh2si32: + case X86::BI__builtin_ia32_vcvtsh2si64: + case X86::BI__builtin_ia32_vcvtsh2usi32: + case X86::BI__builtin_ia32_vcvtsh2usi64: case X86::BI__builtin_ia32_sqrtpd512: case X86::BI__builtin_ia32_sqrtps512: + case X86::BI__builtin_ia32_sqrtph512: ArgNum = 1; HasRC = true; break; + case X86::BI__builtin_ia32_addph512: + case X86::BI__builtin_ia32_divph512: + case X86::BI__builtin_ia32_mulph512: + case X86::BI__builtin_ia32_subph512: case X86::BI__builtin_ia32_addpd512: case X86::BI__builtin_ia32_addps512: case X86::BI__builtin_ia32_divpd512: @@ -4003,11 +4040,17 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_cvtusi2sd64: case X86::BI__builtin_ia32_cvtusi2ss32: case X86::BI__builtin_ia32_cvtusi2ss64: + case X86::BI__builtin_ia32_vcvtusi2sh: + case X86::BI__builtin_ia32_vcvtusi642sh: + case X86::BI__builtin_ia32_vcvtsi2sh: + case X86::BI__builtin_ia32_vcvtsi642sh: ArgNum = 2; HasRC = true; break; case X86::BI__builtin_ia32_cvtdq2ps512_mask: case X86::BI__builtin_ia32_cvtudq2ps512_mask: + case X86::BI__builtin_ia32_vcvtpd2ph512_mask: + case X86::BI__builtin_ia32_vcvtps2phx512_mask: case X86::BI__builtin_ia32_cvtpd2ps512_mask: case X86::BI__builtin_ia32_cvtpd2dq512_mask: case X86::BI__builtin_ia32_cvtpd2qq512_mask: @@ -4021,24 +4064,45 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_cvtqq2ps512_mask: case X86::BI__builtin_ia32_cvtuqq2pd512_mask: case X86::BI__builtin_ia32_cvtuqq2ps512_mask: + case X86::BI__builtin_ia32_vcvtdq2ph512_mask: + case X86::BI__builtin_ia32_vcvtudq2ph512_mask: + case X86::BI__builtin_ia32_vcvtw2ph512_mask: + case X86::BI__builtin_ia32_vcvtuw2ph512_mask: + case X86::BI__builtin_ia32_vcvtph2w512_mask: + case X86::BI__builtin_ia32_vcvtph2uw512_mask: + case X86::BI__builtin_ia32_vcvtph2dq512_mask: + case X86::BI__builtin_ia32_vcvtph2udq512_mask: + case X86::BI__builtin_ia32_vcvtph2qq512_mask: + case X86::BI__builtin_ia32_vcvtph2uqq512_mask: + case X86::BI__builtin_ia32_vcvtqq2ph512_mask: + case X86::BI__builtin_ia32_vcvtuqq2ph512_mask: ArgNum = 3; HasRC = true; break; + case X86::BI__builtin_ia32_addsh_round_mask: case X86::BI__builtin_ia32_addss_round_mask: case X86::BI__builtin_ia32_addsd_round_mask: + case X86::BI__builtin_ia32_divsh_round_mask: case X86::BI__builtin_ia32_divss_round_mask: case X86::BI__builtin_ia32_divsd_round_mask: + case X86::BI__builtin_ia32_mulsh_round_mask: case X86::BI__builtin_ia32_mulss_round_mask: case X86::BI__builtin_ia32_mulsd_round_mask: + case X86::BI__builtin_ia32_subsh_round_mask: case X86::BI__builtin_ia32_subss_round_mask: case X86::BI__builtin_ia32_subsd_round_mask: + case X86::BI__builtin_ia32_scalefph512_mask: case X86::BI__builtin_ia32_scalefpd512_mask: case X86::BI__builtin_ia32_scalefps512_mask: case X86::BI__builtin_ia32_scalefsd_round_mask: case X86::BI__builtin_ia32_scalefss_round_mask: + case X86::BI__builtin_ia32_scalefsh_round_mask: case X86::BI__builtin_ia32_cvtsd2ss_round_mask: + case X86::BI__builtin_ia32_vcvtss2sh_round_mask: + case X86::BI__builtin_ia32_vcvtsd2sh_round_mask: case X86::BI__builtin_ia32_sqrtsd_round_mask: case X86::BI__builtin_ia32_sqrtss_round_mask: + case X86::BI__builtin_ia32_sqrtsh_round_mask: case X86::BI__builtin_ia32_vfmaddsd3_mask: case X86::BI__builtin_ia32_vfmaddsd3_maskz: case X86::BI__builtin_ia32_vfmaddsd3_mask3: @@ -4412,6 +4476,9 @@ bool Sema::CheckX86BuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, case X86::BI__builtin_ia32_getmantps256_mask: case X86::BI__builtin_ia32_getmantpd512_mask: case X86::BI__builtin_ia32_getmantps512_mask: + case X86::BI__builtin_ia32_getmantph128_mask: + case X86::BI__builtin_ia32_getmantph256_mask: + case X86::BI__builtin_ia32_getmantph512_mask: case X86::BI__builtin_ia32_vec_ext_v16qi: case X86::BI__builtin_ia32_vec_ext_v16hi: i = 1; l = 0; u = 15; @@ -4430,6 +4497,7 @@ bool Sema::CheckX86BuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, case X86::BI__builtin_ia32_rangeps512_mask: case X86::BI__builtin_ia32_getmantsd_round_mask: case X86::BI__builtin_ia32_getmantss_round_mask: + case X86::BI__builtin_ia32_getmantsh_round_mask: case X86::BI__builtin_ia32_vec_set_v16qi: case X86::BI__builtin_ia32_vec_set_v16hi: i = 2; l = 0; u = 15; @@ -4482,12 +4550,16 @@ bool Sema::CheckX86BuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, case X86::BI__builtin_ia32_rndscalepd_256_mask: case X86::BI__builtin_ia32_rndscaleps_mask: case X86::BI__builtin_ia32_rndscalepd_mask: + case X86::BI__builtin_ia32_rndscaleph_mask: case X86::BI__builtin_ia32_reducepd128_mask: case X86::BI__builtin_ia32_reducepd256_mask: case X86::BI__builtin_ia32_reducepd512_mask: case X86::BI__builtin_ia32_reduceps128_mask: case X86::BI__builtin_ia32_reduceps256_mask: case X86::BI__builtin_ia32_reduceps512_mask: + case X86::BI__builtin_ia32_reduceph128_mask: + case X86::BI__builtin_ia32_reduceph256_mask: + case X86::BI__builtin_ia32_reduceph512_mask: case X86::BI__builtin_ia32_prold512: case X86::BI__builtin_ia32_prolq512: case X86::BI__builtin_ia32_prold128: @@ -4506,8 +4578,12 @@ bool Sema::CheckX86BuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, case X86::BI__builtin_ia32_fpclassps256_mask: case X86::BI__builtin_ia32_fpclassps512_mask: case X86::BI__builtin_ia32_fpclasspd512_mask: + case X86::BI__builtin_ia32_fpclassph128_mask: + case X86::BI__builtin_ia32_fpclassph256_mask: + case X86::BI__builtin_ia32_fpclassph512_mask: case X86::BI__builtin_ia32_fpclasssd_mask: case X86::BI__builtin_ia32_fpclassss_mask: + case X86::BI__builtin_ia32_fpclasssh_mask: case X86::BI__builtin_ia32_pslldqi128_byteshift: case X86::BI__builtin_ia32_pslldqi256_byteshift: case X86::BI__builtin_ia32_pslldqi512_byteshift: @@ -4618,6 +4694,8 @@ bool Sema::CheckX86BuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, case X86::BI__builtin_ia32_reducess_mask: case X86::BI__builtin_ia32_rndscalesd_round_mask: case X86::BI__builtin_ia32_rndscaless_round_mask: + case X86::BI__builtin_ia32_rndscalesh_round_mask: + case X86::BI__builtin_ia32_reducesh_mask: i = 4; l = 0; u = 255; break; } diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp index e8e8fb209cc9a..8c551a798b32b 100644 --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -4389,7 +4389,8 @@ void Sema::CodeCompleteAttribute(AttributeCommonInfo::Syntax Syntax, auto AddCompletions = [&](const ParsedAttrInfo &A) { if (A.IsTargetSpecific && !A.existsInTarget(Context.getTargetInfo())) return; - // FIXME: filter by langopts (diagLangOpts method requires a ParsedAttr) + if (!A.acceptsLangOpts(getLangOpts())) + return; for (const auto &S : A.Spellings) { if (S.Syntax != Syntax) continue; @@ -4422,33 +4423,59 @@ void Sema::CodeCompleteAttribute(AttributeCommonInfo::Syntax Syntax, Scope = ""; } + auto Add = [&](llvm::StringRef Scope, llvm::StringRef Name, + bool Underscores) { + CodeCompletionBuilder Builder(Results.getAllocator(), + Results.getCodeCompletionTUInfo()); + llvm::SmallString<32> Text; + if (!Scope.empty()) { + Text.append(Scope); + Text.append("::"); + } + if (Underscores) + Text.append("__"); + Text.append(Name); + if (Underscores) + Text.append("__"); + Builder.AddTypedTextChunk(Results.getAllocator().CopyString(Text)); + + if (!A.ArgNames.empty()) { + Builder.AddChunk(CodeCompletionString::CK_LeftParen, "("); + bool First = true; + for (const char *Arg : A.ArgNames) { + if (!First) + Builder.AddChunk(CodeCompletionString::CK_Comma, ", "); + First = false; + Builder.AddPlaceholderChunk(Arg); + } + Builder.AddChunk(CodeCompletionString::CK_RightParen, ")"); + } + + Results.AddResult(Builder.TakeString()); + }; + // Generate the non-underscore-guarded result. // Note this is (a suffix of) the NormalizedFullName, no need to copy. // If an underscore-guarded scope was specified, only the // underscore-guarded attribute name is relevant. if (!InScopeUnderscore) - Results.AddResult(Scope.empty() ? Name.data() : S.NormalizedFullName); + Add(Scope, Name, /*Underscores=*/false); // Generate the underscore-guarded version, for syntaxes that support it. // We skip this if the scope was already spelled and not guarded, or // we must spell it and can't guard it. if (!(InScope && !InScopeUnderscore) && SyntaxSupportsGuards) { llvm::SmallString<32> Guarded; - if (!Scope.empty()) { + if (Scope.empty()) { + Add(Scope, Name, /*Underscores=*/true); + } else { const char *GuardedScope = underscoreAttrScope(Scope); if (!GuardedScope) continue; - Guarded.append(GuardedScope); - Guarded.append("::"); + Add(GuardedScope, Name, /*Underscores=*/true); } - Guarded.append("__"); - Guarded.append(Name); - Guarded.append("__"); - Results.AddResult( - CodeCompletionResult(Results.getAllocator().CopyString(Guarded))); } - // FIXME: include the list of arg names (not currently exposed). // It may be nice to include the Kind so we can look up the docs later. } }; diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index a00a72982bac8..4827f6b3bb345 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -16836,10 +16836,7 @@ NamedDecl *Sema::ActOnFriendFunctionDecl(Scope *S, Declarator &D, while (DC->isRecord()) DC = DC->getParent(); - DeclContext *LookupDC = DC; - while (LookupDC->isTransparentContext()) - LookupDC = LookupDC->getParent(); - + DeclContext *LookupDC = DC->getNonTransparentContext(); while (true) { LookupQualifiedName(Previous, LookupDC); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 68b6c5e384599..9d9b39b3ab996 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -18442,7 +18442,6 @@ static ExprResult rebuildPotentialResultsAsNonOdrUsed(Sema &S, Expr *E, ME->getQualifierLoc(), ME->getTemplateKeywordLoc(), ME->getMemberDecl(), ME->getFoundDecl(), ME->getMemberNameInfo(), CopiedTemplateArgs(ME), ME->getType(), ME->getValueKind(), ME->getObjectKind(), NOUR); - return ExprEmpty(); } case Expr::BinaryOperatorClass: { diff --git a/clang/lib/Sema/SemaExprMember.cpp b/clang/lib/Sema/SemaExprMember.cpp index af2aa49c01039..92b7464cd0bbf 100644 --- a/clang/lib/Sema/SemaExprMember.cpp +++ b/clang/lib/Sema/SemaExprMember.cpp @@ -564,10 +564,7 @@ bool Sema::CheckQualifiedMemberReference(Expr *BaseExpr, return false; // Note that we use the DC of the decl, not the underlying decl. - DeclContext *DC = (*I)->getDeclContext(); - while (DC->isTransparentContext()) - DC = DC->getParent(); - + DeclContext *DC = (*I)->getDeclContext()->getNonTransparentContext(); if (!DC->isRecord()) continue; diff --git a/clang/lib/Sema/SemaExprObjC.cpp b/clang/lib/Sema/SemaExprObjC.cpp index 8a9c933fc93f8..9e46801ea5089 100644 --- a/clang/lib/Sema/SemaExprObjC.cpp +++ b/clang/lib/Sema/SemaExprObjC.cpp @@ -4015,12 +4015,11 @@ static bool CheckObjCBridgeNSCast(Sema &S, QualType castType, Expr *castExpr, if (Parm->isStr("id")) return true; - NamedDecl *Target = nullptr; // Check for an existing type with this name. LookupResult R(S, DeclarationName(Parm), SourceLocation(), Sema::LookupOrdinaryName); if (S.LookupName(R, S.TUScope)) { - Target = R.getFoundDecl(); + NamedDecl *Target = R.getFoundDecl(); if (Target && isa(Target)) { ObjCInterfaceDecl *ExprClass = cast(Target); if (const ObjCObjectPointerType *InterfacePointerType = @@ -4056,8 +4055,6 @@ static bool CheckObjCBridgeNSCast(Sema &S, QualType castType, Expr *castExpr, diag::err_objc_cf_bridged_not_interface) << castExpr->getType() << Parm; S.Diag(TDNDecl->getBeginLoc(), diag::note_declared_at); - if (Target) - S.Diag(Target->getBeginLoc(), diag::note_declared_at); } return true; } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index fe81ddf6bf851..51db4a28ee92c 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -5814,6 +5814,31 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( ErrorFound = true; } } + // OpenMP 5.0 [2.19.7] + // If a list item appears in a reduction, lastprivate or linear + // clause on a combined target construct then it is treated as + // if it also appears in a map clause with a map-type of tofrom + if (getLangOpts().OpenMP >= 50 && Kind != OMPD_target && + isOpenMPTargetExecutionDirective(Kind)) { + SmallVector ImplicitExprs; + for (OMPClause *C : Clauses) { + if (auto *RC = dyn_cast(C)) + for (Expr *E : RC->varlists()) + if (!isa(E->IgnoreParenImpCasts())) + ImplicitExprs.emplace_back(E); + } + if (!ImplicitExprs.empty()) { + ArrayRef Exprs = ImplicitExprs; + CXXScopeSpec MapperIdScopeSpec; + DeclarationNameInfo MapperId; + if (OMPClause *Implicit = ActOnOpenMPMapClause( + OMPC_MAP_MODIFIER_unknown, SourceLocation(), MapperIdScopeSpec, + MapperId, OMPC_MAP_tofrom, + /*IsMapTypeImplicit=*/true, SourceLocation(), SourceLocation(), + Exprs, OMPVarListLocTy(), /*NoDiagnose=*/true)) + ClausesWithImplicit.emplace_back(Implicit); + } + } for (unsigned I = 0, E = DefaultmapKindNum; I < E; ++I) { int ClauseKindCnt = -1; for (ArrayRef ImplicitMap : ImplicitMaps[I]) { @@ -18734,7 +18759,10 @@ class MapBaseChecker final : public StmtVisitor { } bool VisitOMPArraySectionExpr(OMPArraySectionExpr *OASE) { - assert(!NoDiagnose && "Array sections cannot be implicitly mapped."); + // After OMP 5.0 Array section in reduction clause will be implicitly + // mapped + assert(!(SemaRef.getLangOpts().OpenMP < 50 && NoDiagnose) && + "Array sections cannot be implicitly mapped."); Expr *E = OASE->getBase()->IgnoreParenImpCasts(); QualType CurType = OMPArraySectionExpr::getBaseOriginalType(E).getCanonicalType(); @@ -18777,6 +18805,8 @@ class MapBaseChecker final : public StmtVisitor { } else if (AllowUnitySizeArraySection && NotUnity) { // A unity or whole array section is not allowed and that is not // compatible with the properties of the current array section. + if (NoDiagnose) + return false; SemaRef.Diag( ELoc, diag::err_array_section_does_not_specify_contiguous_storage) << OASE->getSourceRange(); @@ -19320,7 +19350,7 @@ static void checkMappableExpressionList( CXXScopeSpec &MapperIdScopeSpec, DeclarationNameInfo MapperId, ArrayRef UnresolvedMappers, OpenMPMapClauseKind MapType = OMPC_MAP_unknown, - bool IsMapTypeImplicit = false) { + bool IsMapTypeImplicit = false, bool NoDiagnose = false) { // We only expect mappable expressions in 'to', 'from', and 'map' clauses. assert((CKind == OMPC_map || CKind == OMPC_to || CKind == OMPC_from) && "Unexpected clause kind with mappable expressions!"); @@ -19399,9 +19429,9 @@ static void checkMappableExpressionList( // Obtain the array or member expression bases if required. Also, fill the // components array with all the components identified in the process. - const Expr *BE = checkMapClauseExpressionBase( - SemaRef, SimpleExpr, CurComponents, CKind, DSAS->getCurrentDirective(), - /*NoDiagnose=*/false); + const Expr *BE = + checkMapClauseExpressionBase(SemaRef, SimpleExpr, CurComponents, CKind, + DSAS->getCurrentDirective(), NoDiagnose); if (!BE) continue; @@ -19447,6 +19477,8 @@ static void checkMappableExpressionList( // OpenMP 4.5 [2.10.5, target update Construct] // threadprivate variables cannot appear in a from clause. if (VD && DSAS->isThreadPrivate(VD)) { + if (NoDiagnose) + continue; DSAStackTy::DSAVarData DVar = DSAS->getTopDSA(VD, /*FromParent=*/false); SemaRef.Diag(ELoc, diag::err_omp_threadprivate_in_clause) << getOpenMPClauseName(CKind); @@ -19507,7 +19539,7 @@ static void checkMappableExpressionList( // OpenMP 4.5 [2.15.5.1, map Clause, Restrictions, p.9] // A list item must have a mappable type. if (!checkTypeMappable(VE->getExprLoc(), VE->getSourceRange(), SemaRef, - DSAS, Type)) + DSAS, Type, /*FullCheck=*/true)) continue; if (CKind == OMPC_map) { @@ -19610,7 +19642,8 @@ OMPClause *Sema::ActOnOpenMPMapClause( CXXScopeSpec &MapperIdScopeSpec, DeclarationNameInfo &MapperId, OpenMPMapClauseKind MapType, bool IsMapTypeImplicit, SourceLocation MapLoc, SourceLocation ColonLoc, ArrayRef VarList, - const OMPVarListLocTy &Locs, ArrayRef UnresolvedMappers) { + const OMPVarListLocTy &Locs, bool NoDiagnose, + ArrayRef UnresolvedMappers) { OpenMPMapModifierKind Modifiers[] = { OMPC_MAP_MODIFIER_unknown, OMPC_MAP_MODIFIER_unknown, OMPC_MAP_MODIFIER_unknown, OMPC_MAP_MODIFIER_unknown}; @@ -19634,7 +19667,7 @@ OMPClause *Sema::ActOnOpenMPMapClause( MappableVarListInfo MVLI(VarList); checkMappableExpressionList(*this, DSAStack, OMPC_map, MVLI, Locs.StartLoc, MapperIdScopeSpec, MapperId, UnresolvedMappers, - MapType, IsMapTypeImplicit); + MapType, IsMapTypeImplicit, NoDiagnose); // We need to produce a map clause even if we don't have variables so that // other diagnostics related with non-existing map clauses are accurate. diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp index ac0de21da0d38..acf258a6e357b 100644 --- a/clang/lib/Sema/SemaStmt.cpp +++ b/clang/lib/Sema/SemaStmt.cpp @@ -3316,7 +3316,7 @@ Sema::ActOnBreakStmt(SourceLocation BreakLoc, Scope *CurScope) { /// being thrown, or being co_returned from a coroutine. This expression /// might be modified by the implementation. /// -/// \param ForceCXX2b Overrides detection of current language mode +/// \param Mode Overrides detection of current language mode /// and uses the rules for C++2b. /// /// \returns An aggregate which contains the Candidate and isMoveEligible diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 4fa98877a2657..f9f6a134262d3 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -1933,10 +1933,10 @@ class TreeTransform { OpenMPMapClauseKind MapType, bool IsMapTypeImplicit, SourceLocation MapLoc, SourceLocation ColonLoc, ArrayRef VarList, const OMPVarListLocTy &Locs, ArrayRef UnresolvedMappers) { - return getSema().ActOnOpenMPMapClause(MapTypeModifiers, MapTypeModifiersLoc, - MapperIdScopeSpec, MapperId, MapType, - IsMapTypeImplicit, MapLoc, ColonLoc, - VarList, Locs, UnresolvedMappers); + return getSema().ActOnOpenMPMapClause( + MapTypeModifiers, MapTypeModifiersLoc, MapperIdScopeSpec, MapperId, + MapType, IsMapTypeImplicit, MapLoc, ColonLoc, VarList, Locs, + /*NoDiagnose=*/false, UnresolvedMappers); } /// Build a new OpenMP 'allocate' clause. diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 0fafa0eb6551f..d6f9710ad803d 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -4240,8 +4240,11 @@ ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName, PreviousGeneration = incrementGeneration(*ContextObj); unsigned NumModules = ModuleMgr.size(); - auto removeModulesAndReturn = [&](ASTReadResult ReadResult) { - assert(ReadResult && "expected to return error"); + SmallVector Loaded; + if (ASTReadResult ReadResult = + ReadASTCore(FileName, Type, ImportLoc, + /*ImportedBy=*/nullptr, Loaded, 0, 0, ASTFileSignature(), + ClientLoadCapabilities)) { ModuleMgr.removeModules(ModuleMgr.begin() + NumModules, PP.getLangOpts().Modules ? &PP.getHeaderSearchInfo().getModuleMap() @@ -4252,22 +4255,6 @@ ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName, GlobalIndex.reset(); ModuleMgr.setGlobalIndex(nullptr); return ReadResult; - }; - - SmallVector Loaded; - switch (ASTReadResult ReadResult = - ReadASTCore(FileName, Type, ImportLoc, - /*ImportedBy=*/nullptr, Loaded, 0, 0, - ASTFileSignature(), ClientLoadCapabilities)) { - case Failure: - case Missing: - case OutOfDate: - case VersionMismatch: - case ConfigurationMismatch: - case HadErrors: - return removeModulesAndReturn(ReadResult); - case Success: - break; } // Here comes stuff that we only do once the entire chain is loaded. @@ -4279,18 +4266,18 @@ ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName, // Read the AST block. if (ASTReadResult Result = ReadASTBlock(F, ClientLoadCapabilities)) - return removeModulesAndReturn(Result); + return Failure; // The AST block should always have a definition for the main module. if (F.isModule() && !F.DidReadTopLevelSubmodule) { Error(diag::err_module_file_missing_top_level_submodule, F.FileName); - return removeModulesAndReturn(Failure); + return Failure; } // Read the extension blocks. while (!SkipCursorToBlock(F.Stream, EXTENSION_BLOCK_ID)) { if (ASTReadResult Result = ReadExtensionBlock(F)) - return removeModulesAndReturn(Result); + return Failure; } // Once read, set the ModuleFile bit base offset and update the size in @@ -5605,17 +5592,20 @@ ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { } case SUBMODULE_UMBRELLA_HEADER: { + // FIXME: This doesn't work for framework modules as `Filename` is the + // name as written in the module file and does not include + // `Headers/`, so this path will never exist. std::string Filename = std::string(Blob); ResolveImportedPath(F, Filename); if (auto Umbrella = PP.getFileManager().getFile(Filename)) { - if (!CurrentModule->getUmbrellaHeader()) + if (!CurrentModule->getUmbrellaHeader()) { // FIXME: NameAsWritten ModMap.setUmbrellaHeader(CurrentModule, *Umbrella, Blob, ""); - else if (CurrentModule->getUmbrellaHeader().Entry != *Umbrella) { - if ((ClientLoadCapabilities & ARR_OutOfDate) == 0) - Error("mismatched umbrella headers in submodule"); - return OutOfDate; } + // Note that it's too late at this point to return out of date if the + // name from the PCM doesn't match up with the one in the module map, + // but also quite unlikely since we will have already checked the + // modification time and size of the module map file itself. } break; } @@ -5639,16 +5629,13 @@ ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { break; case SUBMODULE_UMBRELLA_DIR: { + // See comments in SUBMODULE_UMBRELLA_HEADER std::string Dirname = std::string(Blob); ResolveImportedPath(F, Dirname); if (auto Umbrella = PP.getFileManager().getDirectory(Dirname)) { - if (!CurrentModule->getUmbrellaDir()) + if (!CurrentModule->getUmbrellaDir()) { // FIXME: NameAsWritten ModMap.setUmbrellaDir(CurrentModule, *Umbrella, Blob, ""); - else if (CurrentModule->getUmbrellaDir().Entry != *Umbrella) { - if ((ClientLoadCapabilities & ARR_OutOfDate) == 0) - Error("mismatched umbrella directories in submodule"); - return OutOfDate; } } break; diff --git a/clang/lib/StaticAnalyzer/Checkers/CheckObjCInstMethSignature.cpp b/clang/lib/StaticAnalyzer/Checkers/CheckObjCInstMethSignature.cpp index 175dfcef0df45..a13de306eac84 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CheckObjCInstMethSignature.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CheckObjCInstMethSignature.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// This file defines a CheckObjCInstMethSignature, a flow-insenstive check +// This file defines a CheckObjCInstMethSignature, a flow-insensitive check // that determines if an Objective-C class interface incorrectly redefines // the method signature in a subclass. // diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp index a6470da09c458..7db4066653cbd 100644 --- a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp @@ -48,6 +48,7 @@ #include "InterCheckerAPI.h" #include "clang/AST/Attr.h" #include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclTemplate.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/ParentMap.h" @@ -64,12 +65,15 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h" #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Compiler.h" @@ -298,6 +302,8 @@ class MallocChecker /// which might free a pointer are annotated. DefaultBool ShouldIncludeOwnershipAnnotatedFunctions; + DefaultBool ShouldRegisterNoOwnershipChangeVisitor; + /// Many checkers are essentially built into this one, so enabling them will /// make MallocChecker perform additional modeling and reporting. enum CheckKind { @@ -722,11 +728,146 @@ class MallocChecker bool isArgZERO_SIZE_PTR(ProgramStateRef State, CheckerContext &C, SVal ArgVal) const; }; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Definition of NoOwnershipChangeVisitor. +//===----------------------------------------------------------------------===// + +namespace { +class NoOwnershipChangeVisitor final : public NoStateChangeFuncVisitor { + SymbolRef Sym; + using OwnerSet = llvm::SmallPtrSet; + + // Collect which entities point to the allocated memory, and could be + // responsible for deallocating it. + class OwnershipBindingsHandler : public StoreManager::BindingsHandler { + SymbolRef Sym; + OwnerSet &Owners; + + public: + OwnershipBindingsHandler(SymbolRef Sym, OwnerSet &Owners) + : Sym(Sym), Owners(Owners) {} + + bool HandleBinding(StoreManager &SMgr, Store Store, const MemRegion *Region, + SVal Val) override { + if (Val.getAsSymbol() == Sym) + Owners.insert(Region); + return true; + } + }; + +protected: + OwnerSet getOwnersAtNode(const ExplodedNode *N) { + OwnerSet Ret; + + ProgramStateRef State = N->getState(); + OwnershipBindingsHandler Handler{Sym, Ret}; + State->getStateManager().getStoreManager().iterBindings(State->getStore(), + Handler); + return Ret; + } + + static const ExplodedNode *getCallExitEnd(const ExplodedNode *N) { + while (N && !N->getLocationAs()) + N = N->getFirstSucc(); + return N; + } + + virtual bool + wasModifiedBeforeCallExit(const ExplodedNode *CurrN, + const ExplodedNode *CallExitN) override { + if (CurrN->getLocationAs()) + return true; + + // Its the state right *after* the call that is interesting. Any pointers + // inside the call that pointed to the allocated memory are of little + // consequence if their lifetime ends within the function. + CallExitN = getCallExitEnd(CallExitN); + if (!CallExitN) + return true; + + if (CurrN->getState()->get(Sym) != + CallExitN->getState()->get(Sym)) + return true; + + OwnerSet CurrOwners = getOwnersAtNode(CurrN); + OwnerSet ExitOwners = getOwnersAtNode(CallExitN); + + // Owners in the current set may be purged from the analyzer later on. + // If a variable is dead (is not referenced directly or indirectly after + // some point), it will be removed from the Store before the end of its + // actual lifetime. + // This means that that if the ownership status didn't change, CurrOwners + // must be a superset of, but not necessarily equal to ExitOwners. + return !llvm::set_is_subset(ExitOwners, CurrOwners); + } + + static PathDiagnosticPieceRef emitNote(const ExplodedNode *N) { + PathDiagnosticLocation L = PathDiagnosticLocation::create( + N->getLocation(), + N->getState()->getStateManager().getContext().getSourceManager()); + return std::make_shared( + L, "Returning without deallocating memory or storing the pointer for " + "later deallocation"); + } + + virtual PathDiagnosticPieceRef + maybeEmitNoteForObjCSelf(PathSensitiveBugReport &R, + const ObjCMethodCall &Call, + const ExplodedNode *N) override { + // TODO: Implement. + return nullptr; + } + + virtual PathDiagnosticPieceRef + maybeEmitNoteForCXXThis(PathSensitiveBugReport &R, + const CXXConstructorCall &Call, + const ExplodedNode *N) override { + // TODO: Implement. + return nullptr; + } + + virtual PathDiagnosticPieceRef + maybeEmitNoteForParameters(PathSensitiveBugReport &R, const CallEvent &Call, + const ExplodedNode *N) override { + // TODO: Factor the logic of "what constitutes as an entity being passed + // into a function call" out by reusing the code in + // NoStoreFuncVisitor::maybeEmitNoteForParameters, maybe by incorporating + // the printing technology in UninitializedObject's FieldChainInfo. + ArrayRef Parameters = Call.parameters(); + for (unsigned I = 0; I < Call.getNumArgs() && I < Parameters.size(); ++I) { + SVal V = Call.getArgSVal(I); + if (V.getAsSymbol() == Sym) + return emitNote(N); + } + return nullptr; + } + +public: + NoOwnershipChangeVisitor(SymbolRef Sym) + : NoStateChangeFuncVisitor(bugreporter::TrackingKind::Thorough), + Sym(Sym) {} + + void Profile(llvm::FoldingSetNodeID &ID) const override { + static int Tag = 0; + ID.AddPointer(&Tag); + ID.AddPointer(Sym); + } + + void *getTag() const { + static int Tag = 0; + return static_cast(&Tag); + } +}; + +} // end anonymous namespace //===----------------------------------------------------------------------===// // Definition of MallocBugVisitor. //===----------------------------------------------------------------------===// +namespace { /// The bug visitor which allows us to print extra diagnostics along the /// BugReport path. For example, showing the allocation site of the leaked /// region. @@ -851,7 +992,6 @@ class MallocBugVisitor final : public BugReporterVisitor { } }; }; - } // end anonymous namespace // A map from the freed symbol to the symbol representing the return value of @@ -2579,6 +2719,8 @@ void MallocChecker::HandleLeak(SymbolRef Sym, ExplodedNode *N, AllocNode->getLocationContext()->getDecl()); R->markInteresting(Sym); R->addVisitor(Sym, true); + if (ShouldRegisterNoOwnershipChangeVisitor) + R->addVisitor(Sym); C.emitReport(std::move(R)); } @@ -3395,6 +3537,9 @@ void ento::registerDynamicMemoryModeling(CheckerManager &mgr) { auto *checker = mgr.registerChecker(); checker->ShouldIncludeOwnershipAnnotatedFunctions = mgr.getAnalyzerOptions().getCheckerBooleanOption(checker, "Optimistic"); + checker->ShouldRegisterNoOwnershipChangeVisitor = + mgr.getAnalyzerOptions().getCheckerBooleanOption( + checker, "AddNoOwnershipChangeNotes"); } bool ento::shouldRegisterDynamicMemoryModeling(const CheckerManager &mgr) { diff --git a/clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp index 90c5583d89691..dcca8be55e337 100644 --- a/clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// This file defines a CheckNSError, a flow-insenstive check +// This file defines a CheckNSError, a flow-insensitive check // that determines if an Objective-C class interface correctly returns // a non-void return type. // diff --git a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp index d06a2d4933038..ecd9b649c4f46 100644 --- a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp +++ b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp @@ -343,46 +343,140 @@ BugReporterVisitor::getDefaultEndPath(const BugReporterContext &BRC, return P; } +//===----------------------------------------------------------------------===// +// Implementation of NoStateChangeFuncVisitor. +//===----------------------------------------------------------------------===// + +bool NoStateChangeFuncVisitor::isModifiedInFrame(const ExplodedNode *N) { + const LocationContext *Ctx = N->getLocationContext(); + const StackFrameContext *SCtx = Ctx->getStackFrame(); + if (!FramesModifyingCalculated.count(SCtx)) + findModifyingFrames(N); + return FramesModifying.count(SCtx); +} + +void NoStateChangeFuncVisitor::findModifyingFrames( + const ExplodedNode *const CallExitBeginN) { + + assert(CallExitBeginN->getLocationAs()); + const ExplodedNode *LastReturnN = CallExitBeginN; + const StackFrameContext *const OriginalSCtx = + CallExitBeginN->getLocationContext()->getStackFrame(); + + const ExplodedNode *CurrN = CallExitBeginN; + + do { + ProgramStateRef State = CurrN->getState(); + auto CallExitLoc = CurrN->getLocationAs(); + if (CallExitLoc) { + LastReturnN = CurrN; + } + + FramesModifyingCalculated.insert( + CurrN->getLocationContext()->getStackFrame()); + + if (wasModifiedBeforeCallExit(CurrN, LastReturnN)) { + const StackFrameContext *SCtx = CurrN->getStackFrame(); + while (!SCtx->inTopFrame()) { + auto p = FramesModifying.insert(SCtx); + if (!p.second) + break; // Frame and all its parents already inserted. + SCtx = SCtx->getParent()->getStackFrame(); + } + } + + // Stop calculation at the call to the current function. + if (auto CE = CurrN->getLocationAs()) + if (CE->getCalleeContext() == OriginalSCtx) + break; + + CurrN = CurrN->getFirstPred(); + } while (CurrN); +} + +PathDiagnosticPieceRef NoStateChangeFuncVisitor::VisitNode( + const ExplodedNode *N, BugReporterContext &BR, PathSensitiveBugReport &R) { + + const LocationContext *Ctx = N->getLocationContext(); + const StackFrameContext *SCtx = Ctx->getStackFrame(); + ProgramStateRef State = N->getState(); + auto CallExitLoc = N->getLocationAs(); + + // No diagnostic if region was modified inside the frame. + if (!CallExitLoc || isModifiedInFrame(N)) + return nullptr; + + CallEventRef<> Call = + BR.getStateManager().getCallEventManager().getCaller(SCtx, State); + + // Optimistically suppress uninitialized value bugs that result + // from system headers having a chance to initialize the value + // but failing to do so. It's too unlikely a system header's fault. + // It's much more likely a situation in which the function has a failure + // mode that the user decided not to check. If we want to hunt such + // omitted checks, we should provide an explicit function-specific note + // describing the precondition under which the function isn't supposed to + // initialize its out-parameter, and additionally check that such + // precondition can actually be fulfilled on the current path. + if (Call->isInSystemHeader()) { + // We make an exception for system header functions that have no branches. + // Such functions unconditionally fail to initialize the variable. + // If they call other functions that have more paths within them, + // this suppression would still apply when we visit these inner functions. + // One common example of a standard function that doesn't ever initialize + // its out parameter is operator placement new; it's up to the follow-up + // constructor (if any) to initialize the memory. + if (!N->getStackFrame()->getCFG()->isLinear()) { + static int i = 0; + R.markInvalid(&i, nullptr); + } + return nullptr; + } + + if (const auto *MC = dyn_cast(Call)) { + // If we failed to construct a piece for self, we still want to check + // whether the entity of interest is in a parameter. + if (PathDiagnosticPieceRef Piece = maybeEmitNoteForObjCSelf(R, *MC, N)) + return Piece; + } + + if (const auto *CCall = dyn_cast(Call)) { + // Do not generate diagnostics for not modified parameters in + // constructors. + return maybeEmitNoteForCXXThis(R, *CCall, N); + } + + return maybeEmitNoteForParameters(R, *Call, N); +} + //===----------------------------------------------------------------------===// // Implementation of NoStoreFuncVisitor. //===----------------------------------------------------------------------===// namespace { - /// Put a diagnostic on return statement of all inlined functions /// for which the region of interest \p RegionOfInterest was passed into, /// but not written inside, and it has caused an undefined read or a null /// pointer dereference outside. -class NoStoreFuncVisitor final : public BugReporterVisitor { +class NoStoreFuncVisitor final : public NoStateChangeFuncVisitor { const SubRegion *RegionOfInterest; MemRegionManager &MmrMgr; const SourceManager &SM; const PrintingPolicy &PP; - bugreporter::TrackingKind TKind; /// Recursion limit for dereferencing fields when looking for the /// region of interest. /// The limit of two indicates that we will dereference fields only once. static const unsigned DEREFERENCE_LIMIT = 2; - /// Frames writing into \c RegionOfInterest. - /// This visitor generates a note only if a function does not write into - /// a region of interest. This information is not immediately available - /// by looking at the node associated with the exit from the function - /// (usually the return statement). To avoid recomputing the same information - /// many times (going up the path for each node and checking whether the - /// region was written into) we instead lazily compute the - /// stack frames along the path which write into the region of interest. - llvm::SmallPtrSet FramesModifyingRegion; - llvm::SmallPtrSet FramesModifyingCalculated; - using RegionVector = SmallVector; public: NoStoreFuncVisitor(const SubRegion *R, bugreporter::TrackingKind TKind) - : RegionOfInterest(R), MmrMgr(R->getMemRegionManager()), + : NoStateChangeFuncVisitor(TKind), RegionOfInterest(R), + MmrMgr(R->getMemRegionManager()), SM(MmrMgr.getContext().getSourceManager()), - PP(MmrMgr.getContext().getPrintingPolicy()), TKind(TKind) {} + PP(MmrMgr.getContext().getPrintingPolicy()) {} void Profile(llvm::FoldingSetNodeID &ID) const override { static int Tag = 0; @@ -395,11 +489,13 @@ class NoStoreFuncVisitor final : public BugReporterVisitor { return static_cast(&Tag); } - PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, - BugReporterContext &BR, - PathSensitiveBugReport &R) override; - private: + /// \return Whether \c RegionOfInterest was modified at \p CurrN compared to + /// the value it holds in \p CallExitBeginN. + virtual bool + wasModifiedBeforeCallExit(const ExplodedNode *CurrN, + const ExplodedNode *CallExitBeginN) override; + /// Attempts to find the region of interest in a given record decl, /// by either following the base classes or fields. /// Dereferences fields up to a given recursion limit. @@ -411,20 +507,21 @@ class NoStoreFuncVisitor final : public BugReporterVisitor { const MemRegion *R, const RegionVector &Vec = {}, int depth = 0); - /// Check and lazily calculate whether the region of interest is - /// modified in the stack frame to which \p N belongs. - /// The calculation is cached in FramesModifyingRegion. - bool isRegionOfInterestModifiedInFrame(const ExplodedNode *N) { - const LocationContext *Ctx = N->getLocationContext(); - const StackFrameContext *SCtx = Ctx->getStackFrame(); - if (!FramesModifyingCalculated.count(SCtx)) - findModifyingFrames(N); - return FramesModifyingRegion.count(SCtx); - } + // Region of interest corresponds to an IVar, exiting a method + // which could have written into that IVar, but did not. + virtual PathDiagnosticPieceRef + maybeEmitNoteForObjCSelf(PathSensitiveBugReport &R, + const ObjCMethodCall &Call, + const ExplodedNode *N) override final; + + virtual PathDiagnosticPieceRef + maybeEmitNoteForCXXThis(PathSensitiveBugReport &R, + const CXXConstructorCall &Call, + const ExplodedNode *N) override final; - /// Write to \c FramesModifyingRegion all stack frames along - /// the path in the current stack frame which modify \c RegionOfInterest. - void findModifyingFrames(const ExplodedNode *N); + virtual PathDiagnosticPieceRef + maybeEmitNoteForParameters(PathSensitiveBugReport &R, const CallEvent &Call, + const ExplodedNode *N) override final; /// Consume the information on the no-store stack frame in order to /// either emit a note or suppress the report enirely. @@ -436,22 +533,18 @@ class NoStoreFuncVisitor final : public BugReporterVisitor { const MemRegion *MatchedRegion, StringRef FirstElement, bool FirstIsReferenceType, unsigned IndirectionLevel); - /// Pretty-print region \p MatchedRegion to \p os. - /// \return Whether printing succeeded. - bool prettyPrintRegionName(StringRef FirstElement, bool FirstIsReferenceType, + bool prettyPrintRegionName(const RegionVector &FieldChain, const MemRegion *MatchedRegion, - const RegionVector &FieldChain, - int IndirectionLevel, + StringRef FirstElement, bool FirstIsReferenceType, + unsigned IndirectionLevel, llvm::raw_svector_ostream &os); - /// Print first item in the chain, return new separator. - static StringRef prettyPrintFirstElement(StringRef FirstElement, - bool MoreItemsExpected, - int IndirectionLevel, - llvm::raw_svector_ostream &os); + StringRef prettyPrintFirstElement(StringRef FirstElement, + bool MoreItemsExpected, + int IndirectionLevel, + llvm::raw_svector_ostream &os); }; - -} // end of anonymous namespace +} // namespace /// \return Whether the method declaration \p Parent /// syntactically has a binary operation writing into the ivar \p Ivar. @@ -486,25 +579,6 @@ static bool potentiallyWritesIntoIvar(const Decl *Parent, return false; } -/// Get parameters associated with runtime definition in order -/// to get the correct parameter name. -static ArrayRef getCallParameters(CallEventRef<> Call) { - // Use runtime definition, if available. - RuntimeDefinition RD = Call->getRuntimeDefinition(); - if (const auto *FD = dyn_cast_or_null(RD.getDecl())) - return FD->parameters(); - if (const auto *MD = dyn_cast_or_null(RD.getDecl())) - return MD->parameters(); - - return Call->parameters(); -} - -/// \return whether \p Ty points to a const type, or is a const reference. -static bool isPointerToConst(QualType Ty) { - return !Ty->getPointeeType().isNull() && - Ty->getPointeeType().getCanonicalType().isConstQualified(); -} - /// Attempts to find the region of interest in a given CXX decl, /// by either following the base classes or fields. /// Dereferences fields up to a given recursion limit. @@ -564,68 +638,66 @@ NoStoreFuncVisitor::findRegionOfInterestInRecord( } PathDiagnosticPieceRef -NoStoreFuncVisitor::VisitNode(const ExplodedNode *N, BugReporterContext &BR, - PathSensitiveBugReport &R) { - - const LocationContext *Ctx = N->getLocationContext(); - const StackFrameContext *SCtx = Ctx->getStackFrame(); - ProgramStateRef State = N->getState(); - auto CallExitLoc = N->getLocationAs(); - - // No diagnostic if region was modified inside the frame. - if (!CallExitLoc || isRegionOfInterestModifiedInFrame(N)) - return nullptr; - - CallEventRef<> Call = - BR.getStateManager().getCallEventManager().getCaller(SCtx, State); - - // Region of interest corresponds to an IVar, exiting a method - // which could have written into that IVar, but did not. - if (const auto *MC = dyn_cast(Call)) { - if (const auto *IvarR = dyn_cast(RegionOfInterest)) { - const MemRegion *SelfRegion = MC->getReceiverSVal().getAsRegion(); - if (RegionOfInterest->isSubRegionOf(SelfRegion) && - potentiallyWritesIntoIvar(Call->getRuntimeDefinition().getDecl(), - IvarR->getDecl())) - return maybeEmitNote(R, *Call, N, {}, SelfRegion, "self", - /*FirstIsReferenceType=*/false, 1); - } +NoStoreFuncVisitor::maybeEmitNoteForObjCSelf(PathSensitiveBugReport &R, + const ObjCMethodCall &Call, + const ExplodedNode *N) { + if (const auto *IvarR = dyn_cast(RegionOfInterest)) { + const MemRegion *SelfRegion = Call.getReceiverSVal().getAsRegion(); + if (RegionOfInterest->isSubRegionOf(SelfRegion) && + potentiallyWritesIntoIvar(Call.getRuntimeDefinition().getDecl(), + IvarR->getDecl())) + return maybeEmitNote(R, Call, N, {}, SelfRegion, "self", + /*FirstIsReferenceType=*/false, 1); } + return nullptr; +} - if (const auto *CCall = dyn_cast(Call)) { - const MemRegion *ThisR = CCall->getCXXThisVal().getAsRegion(); - if (RegionOfInterest->isSubRegionOf(ThisR) && - !CCall->getDecl()->isImplicit()) - return maybeEmitNote(R, *Call, N, {}, ThisR, "this", - /*FirstIsReferenceType=*/false, 1); +PathDiagnosticPieceRef +NoStoreFuncVisitor::maybeEmitNoteForCXXThis(PathSensitiveBugReport &R, + const CXXConstructorCall &Call, + const ExplodedNode *N) { + const MemRegion *ThisR = Call.getCXXThisVal().getAsRegion(); + if (RegionOfInterest->isSubRegionOf(ThisR) && !Call.getDecl()->isImplicit()) + return maybeEmitNote(R, Call, N, {}, ThisR, "this", + /*FirstIsReferenceType=*/false, 1); + + // Do not generate diagnostics for not modified parameters in + // constructors. + return nullptr; +} - // Do not generate diagnostics for not modified parameters in - // constructors. - return nullptr; - } +/// \return whether \p Ty points to a const type, or is a const reference. +static bool isPointerToConst(QualType Ty) { + return !Ty->getPointeeType().isNull() && + Ty->getPointeeType().getCanonicalType().isConstQualified(); +} - ArrayRef parameters = getCallParameters(Call); - for (unsigned I = 0; I < Call->getNumArgs() && I < parameters.size(); ++I) { - const ParmVarDecl *PVD = parameters[I]; - SVal V = Call->getArgSVal(I); +PathDiagnosticPieceRef NoStoreFuncVisitor::maybeEmitNoteForParameters( + PathSensitiveBugReport &R, const CallEvent &Call, const ExplodedNode *N) { + ArrayRef Parameters = Call.parameters(); + for (unsigned I = 0; I < Call.getNumArgs() && I < Parameters.size(); ++I) { + const ParmVarDecl *PVD = Parameters[I]; + SVal V = Call.getArgSVal(I); bool ParamIsReferenceType = PVD->getType()->isReferenceType(); std::string ParamName = PVD->getNameAsString(); - int IndirectionLevel = 1; + unsigned IndirectionLevel = 1; QualType T = PVD->getType(); while (const MemRegion *MR = V.getAsRegion()) { if (RegionOfInterest->isSubRegionOf(MR) && !isPointerToConst(T)) - return maybeEmitNote(R, *Call, N, {}, MR, ParamName, + return maybeEmitNote(R, Call, N, {}, MR, ParamName, ParamIsReferenceType, IndirectionLevel); QualType PT = T->getPointeeType(); if (PT.isNull() || PT->isVoidType()) break; + ProgramStateRef State = N->getState(); + if (const RecordDecl *RD = PT->getAsRecordDecl()) if (Optional P = findRegionOfInterestInRecord(RD, State, MR)) - return maybeEmitNote(R, *Call, N, *P, RegionOfInterest, ParamName, + return maybeEmitNote(R, Call, N, *P, RegionOfInterest, ParamName, ParamIsReferenceType, IndirectionLevel); V = State->getSVal(MR, PT); @@ -637,40 +709,11 @@ NoStoreFuncVisitor::VisitNode(const ExplodedNode *N, BugReporterContext &BR, return nullptr; } -void NoStoreFuncVisitor::findModifyingFrames(const ExplodedNode *N) { - assert(N->getLocationAs()); - ProgramStateRef LastReturnState = N->getState(); - SVal ValueAtReturn = LastReturnState->getSVal(RegionOfInterest); - const LocationContext *Ctx = N->getLocationContext(); - const StackFrameContext *OriginalSCtx = Ctx->getStackFrame(); - - do { - ProgramStateRef State = N->getState(); - auto CallExitLoc = N->getLocationAs(); - if (CallExitLoc) { - LastReturnState = State; - ValueAtReturn = LastReturnState->getSVal(RegionOfInterest); - } - - FramesModifyingCalculated.insert(N->getLocationContext()->getStackFrame()); - - if (wasRegionOfInterestModifiedAt(RegionOfInterest, N, ValueAtReturn)) { - const StackFrameContext *SCtx = N->getStackFrame(); - while (!SCtx->inTopFrame()) { - auto p = FramesModifyingRegion.insert(SCtx); - if (!p.second) - break; // Frame and all its parents already inserted. - SCtx = SCtx->getParent()->getStackFrame(); - } - } - - // Stop calculation at the call to the current function. - if (auto CE = N->getLocationAs()) - if (CE->getCalleeContext() == OriginalSCtx) - break; - - N = N->getFirstPred(); - } while (N); +bool NoStoreFuncVisitor::wasModifiedBeforeCallExit( + const ExplodedNode *CurrN, const ExplodedNode *CallExitBeginN) { + return ::wasRegionOfInterestModifiedAt( + RegionOfInterest, CurrN, + CallExitBeginN->getState()->getSVal(RegionOfInterest)); } static llvm::StringLiteral WillBeUsedForACondition = @@ -681,27 +724,6 @@ PathDiagnosticPieceRef NoStoreFuncVisitor::maybeEmitNote( const RegionVector &FieldChain, const MemRegion *MatchedRegion, StringRef FirstElement, bool FirstIsReferenceType, unsigned IndirectionLevel) { - // Optimistically suppress uninitialized value bugs that result - // from system headers having a chance to initialize the value - // but failing to do so. It's too unlikely a system header's fault. - // It's much more likely a situation in which the function has a failure - // mode that the user decided not to check. If we want to hunt such - // omitted checks, we should provide an explicit function-specific note - // describing the precondition under which the function isn't supposed to - // initialize its out-parameter, and additionally check that such - // precondition can actually be fulfilled on the current path. - if (Call.isInSystemHeader()) { - // We make an exception for system header functions that have no branches. - // Such functions unconditionally fail to initialize the variable. - // If they call other functions that have more paths within them, - // this suppression would still apply when we visit these inner functions. - // One common example of a standard function that doesn't ever initialize - // its out parameter is operator placement new; it's up to the follow-up - // constructor (if any) to initialize the memory. - if (!N->getStackFrame()->getCFG()->isLinear()) - R.markInvalid(getTag(), nullptr); - return nullptr; - } PathDiagnosticLocation L = PathDiagnosticLocation::create(N->getLocation(), SM); @@ -717,8 +739,8 @@ PathDiagnosticPieceRef NoStoreFuncVisitor::maybeEmitNote( os << "Returning without writing to '"; // Do not generate the note if failed to pretty-print. - if (!prettyPrintRegionName(FirstElement, FirstIsReferenceType, MatchedRegion, - FieldChain, IndirectionLevel, os)) + if (!prettyPrintRegionName(FieldChain, MatchedRegion, FirstElement, + FirstIsReferenceType, IndirectionLevel, os)) return nullptr; os << "'"; @@ -727,11 +749,11 @@ PathDiagnosticPieceRef NoStoreFuncVisitor::maybeEmitNote( return std::make_shared(L, os.str()); } -bool NoStoreFuncVisitor::prettyPrintRegionName(StringRef FirstElement, - bool FirstIsReferenceType, +bool NoStoreFuncVisitor::prettyPrintRegionName(const RegionVector &FieldChain, const MemRegion *MatchedRegion, - const RegionVector &FieldChain, - int IndirectionLevel, + StringRef FirstElement, + bool FirstIsReferenceType, + unsigned IndirectionLevel, llvm::raw_svector_ostream &os) { if (FirstIsReferenceType) @@ -1153,7 +1175,7 @@ class StoreSiteFinder final : public TrackingBugReporterVisitor { public: /// \param V We're searching for the store where \c R received this value. /// \param R The region we're tracking. - /// \param TKind May limit the amount of notes added to the bug report. + /// \param Options Tracking behavior options. /// \param OriginSFC Only adds notes when the last store happened in a /// different stackframe to this one. Disregarded if the tracking kind /// is thorough. diff --git a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp index 3ee12c0bdf651..c90046ffb4131 100644 --- a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp +++ b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp @@ -1289,15 +1289,32 @@ var findNum = function() { return out; }; +var classListAdd = function(el, theClass) { + if(!el.className.baseVal) + el.className += " " + theClass; + else + el.className.baseVal += " " + theClass; +}; + +var classListRemove = function(el, theClass) { + var className = (!el.className.baseVal) ? + el.className : el.className.baseVal; + className = className.replace(" " + theClass, ""); + if(!el.className.baseVal) + el.className = className; + else + el.className.baseVal = className; +}; + var scrollTo = function(el) { querySelectorAllArray(".selected").forEach(function(s) { - s.classList.remove("selected"); + classListRemove(s, "selected"); }); - el.classList.add("selected"); + classListAdd(el, "selected"); window.scrollBy(0, el.getBoundingClientRect().top - (window.innerHeight / 2)); highlightArrowsForSelectedEvent(); -} +}; var move = function(num, up, numItems) { if (num == 1 && up || num == numItems - 1 && !up) { @@ -1332,9 +1349,11 @@ window.addEventListener("keydown", function (event) { if (event.defaultPrevented) { return; } - if (event.key == "j") { + // key 'j' + if (event.keyCode == 74) { navigateTo(/*up=*/false); - } else if (event.key == "k") { + // key 'k' + } else if (event.keyCode == 75) { navigateTo(/*up=*/true); } else { return; @@ -1350,8 +1369,11 @@ StringRef HTMLDiagnostics::generateArrowDrawingJavascript() {