diff --git a/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.cpp b/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.cpp index 83111a1c752b09..2ce1dc14c3b0da 100644 --- a/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.cpp @@ -12,8 +12,6 @@ #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" -#include - using namespace clang::ast_matchers; namespace clang { @@ -132,6 +130,12 @@ void ConstCorrectnessCheck::check(const MatchFinder::MatchResult &Result) { VC = VariableCategory::Reference; if (Variable->getType()->isPointerType()) VC = VariableCategory::Pointer; + if (Variable->getType()->isArrayType()) { + if (const auto *ArrayT = dyn_cast(Variable->getType())) { + if (ArrayT->getElementType()->isPointerType()) + VC = VariableCategory::Pointer; + } + } // Each variable can only be in one category: Value, Pointer, Reference. // Analysis can be controlled for every category. diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-pointer-as-values.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-pointer-as-values.cpp index 8a267eb04a1516..9a150e887234d9 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-pointer-as-values.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-pointer-as-values.cpp @@ -10,4 +10,65 @@ void potential_const_pointer() { double *p_local0 = &np_local0[1]; // CHECK-MESSAGES: [[@LINE-1]]:3: warning: variable 'p_local0' of type 'double *' can be declared 'const' // CHECK-FIXES: double *const p_local0 + + using doublePtr = double*; + using doubleArray = double[15]; + doubleArray np_local1; + doublePtr p_local1 = &np_local1[0]; + // CHECK-MESSAGES: [[@LINE-1]]:3: warning: variable 'p_local1' of type 'doublePtr' (aka 'double *') can be declared 'const' + // CHECK-FIXES: doublePtr const p_local1 +} + +void range_for() { + int np_local0[2] = {1, 2}; + int *p_local0[2] = {&np_local0[0], &np_local0[1]}; + // CHECK-MESSAGES: [[@LINE-1]]:3: warning: variable 'p_local0' of type 'int *[2]' can be declared 'const' + // CHECK-FIXES: int *const p_local0[2] + for (const int *p_local1 : p_local0) { + // CHECK-MESSAGES: [[@LINE-1]]:8: warning: variable 'p_local1' of type 'const int *' can be declared 'const' + // CHECK-FIXES: for (const int *const p_local1 : p_local0) + } + + int *p_local2[2] = {nullptr, nullptr}; + // CHECK-MESSAGES: [[@LINE-1]]:3: warning: variable 'p_local2' of type 'int *[2]' can be declared 'const' + // CHECK-FIXES: int *const p_local2[2] + for (const auto *con_ptr : p_local2) { + } + +} + +template +struct SmallVectorBase { + T data[4]; + void push_back(const T &el) {} + int size() const { return 4; } + T *begin() { return data; } + const T *begin() const { return data; } + T *end() { return data + 4; } + const T *end() const { return data + 4; } +}; + +template +struct SmallVector : SmallVectorBase {}; + +template +void EmitProtocolMethodList(T &&Methods) { + // Note: If the template is uninstantiated the analysis does not figure out, + // that p_local0 could be const. Not sure why, but probably bails because + // some expressions are type-dependent. + SmallVector p_local0; + // CHECK-MESSAGES: [[@LINE-1]]:3: warning: variable 'p_local0' of type 'SmallVector' can be declared 'const' + // CHECK-FIXES: SmallVector const p_local0 + SmallVector np_local0; + for (const auto *I : Methods) { + if (I == nullptr) + np_local0.push_back(I); + } + p_local0.size(); +} +void instantiate() { + int *p_local0[4] = {nullptr, nullptr, nullptr, nullptr}; + // CHECK-MESSAGES: [[@LINE-1]]:3: warning: variable 'p_local0' of type 'int *[4]' can be declared 'const' + // CHECK-FIXES: int *const p_local0[4] + EmitProtocolMethodList(p_local0); } diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-values.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-values.cpp index 8acb0bd6b42c42..f469bfd055c932 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-values.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-values.cpp @@ -526,18 +526,13 @@ void range_for() { // CHECK-FIXES: int const p_local1[2] for (const int &const_ref : p_local1) { } +} - int *p_local2[2] = {&np_local0[0], &np_local0[1]}; - // CHECK-MESSAGES: [[@LINE-1]]:3: warning: variable 'p_local2' of type 'int *[2]' can be declared 'const' - // CHECK-FIXES: int *const p_local2[2] - for (const int *con_ptr : p_local2) { - } - - int *p_local3[2] = {nullptr, nullptr}; - // CHECK-MESSAGES: [[@LINE-1]]:3: warning: variable 'p_local3' of type 'int *[2]' can be declared 'const' - // CHECK-FIXES: int *const p_local3[2] - for (const auto *con_ptr : p_local3) { - } +void arrays_of_pointers_are_ignored() { + int *np_local0[2] = {nullptr, nullptr}; + + using intPtr = int*; + intPtr np_local1[2] = {nullptr, nullptr}; } inline void *operator new(decltype(sizeof(void *)), void *p) { return p; } @@ -908,41 +903,6 @@ void vlas() { sizeof(int[++N]); } -template -struct SmallVectorBase { - T data[4]; - void push_back(const T &el) {} - int size() const { return 4; } - T *begin() { return data; } - const T *begin() const { return data; } - T *end() { return data + 4; } - const T *end() const { return data + 4; } -}; - -template -struct SmallVector : SmallVectorBase {}; - -template -void EmitProtocolMethodList(T &&Methods) { - // Note: If the template is uninstantiated the analysis does not figure out, - // that p_local0 could be const. Not sure why, but probably bails because - // some expressions are type-dependent. - SmallVector p_local0; - // CHECK-MESSAGES: [[@LINE-1]]:3: warning: variable 'p_local0' of type 'SmallVector' can be declared 'const' - // CHECK-FIXES: SmallVector const p_local0 - SmallVector np_local0; - for (const auto *I : Methods) { - if (I == nullptr) - np_local0.push_back(I); - } - p_local0.size(); -} -void instantiate() { - int *p_local0[4] = {nullptr, nullptr, nullptr, nullptr}; - // CHECK-MESSAGES: [[@LINE-1]]:3: warning: variable 'p_local0' of type 'int *[4]' can be declared 'const' - // CHECK-FIXES: int *const p_local0[4] - EmitProtocolMethodList(p_local0); -} struct base { int member; }; diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h index 220aee759a935c..a595d517cd2768 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h @@ -214,8 +214,14 @@ struct NodeBuilderContext { const CFGBlock *Block; const LocationContext *LC; + NodeBuilderContext(const CoreEngine &E, const CFGBlock *B, + const LocationContext *L) + : Eng(E), Block(B), LC(L) { + assert(B); + } + NodeBuilderContext(const CoreEngine &E, const CFGBlock *B, ExplodedNode *N) - : Eng(E), Block(B), LC(N->getLocationContext()) { assert(B); } + : NodeBuilderContext(E, B, N->getLocationContext()) {} /// Return the CFGBlock associated with this builder. const CFGBlock *getBlock() const { return Block; } diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h index a905f9097750d0..848e43d15fbff4 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h @@ -732,6 +732,7 @@ class ExprEngine { /// A multi-dimensional array is also a continuous memory location in a /// row major order, so for arr[0][0] Idx is 0 and for arr[2][2] Idx is 8. SVal computeObjectUnderConstruction(const Expr *E, ProgramStateRef State, + const NodeBuilderContext *BldrCtx, const LocationContext *LCtx, const ConstructionContext *CC, EvalCallOptions &CallOpts, @@ -748,13 +749,13 @@ class ExprEngine { /// A convenient wrapper around computeObjectUnderConstruction /// and updateObjectsUnderConstruction. - std::pair - handleConstructionContext(const Expr *E, ProgramStateRef State, - const LocationContext *LCtx, - const ConstructionContext *CC, - EvalCallOptions &CallOpts, unsigned Idx = 0) { + std::pair handleConstructionContext( + const Expr *E, ProgramStateRef State, const NodeBuilderContext *BldrCtx, + const LocationContext *LCtx, const ConstructionContext *CC, + EvalCallOptions &CallOpts, unsigned Idx = 0) { - SVal V = computeObjectUnderConstruction(E, State, LCtx, CC, CallOpts, Idx); + SVal V = computeObjectUnderConstruction(E, State, BldrCtx, LCtx, CC, + CallOpts, Idx); State = updateObjectsUnderConstruction(V, E, State, LCtx, CC, CallOpts); return std::make_pair(State, V); diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 6f6bad1052a15a..63027413f37aa1 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -370,6 +370,7 @@ CGDebugInfo::computeChecksum(FileID FID, SmallString<64> &Checksum) const { llvm::toHex(llvm::SHA256::hash(Data), /*LowerCase=*/true, Checksum); return llvm::DIFile::CSK_SHA256; } + llvm_unreachable("Unhandled DebugSrcHashKind enum"); } Optional CGDebugInfo::getSource(const SourceManager &SM, diff --git a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp index 57591960a14017..a8b49adfb4c9a6 100644 --- a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp +++ b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp @@ -485,9 +485,9 @@ CallEvent::getReturnValueUnderConstruction() const { EvalCallOptions CallOpts; ExprEngine &Engine = getState()->getStateManager().getOwningEngine(); - SVal RetVal = - Engine.computeObjectUnderConstruction(getOriginExpr(), getState(), - getLocationContext(), CC, CallOpts); + SVal RetVal = Engine.computeObjectUnderConstruction( + getOriginExpr(), getState(), &Engine.getBuilderContext(), + getLocationContext(), CC, CallOpts); return RetVal; } diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp index ae878ecbcc34a7..476afc598ac6cd 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp @@ -111,9 +111,15 @@ SVal ExprEngine::makeElementRegion(ProgramStateRef State, SVal LValue, return LValue; } +// In case when the prvalue is returned from the function (kind is one of +// SimpleReturnedValueKind, CXX17ElidedCopyReturnedValueKind), then +// it's materialization happens in context of the caller. +// We pass BldrCtx explicitly, as currBldrCtx always refers to callee's context. SVal ExprEngine::computeObjectUnderConstruction( - const Expr *E, ProgramStateRef State, const LocationContext *LCtx, - const ConstructionContext *CC, EvalCallOptions &CallOpts, unsigned Idx) { + const Expr *E, ProgramStateRef State, const NodeBuilderContext *BldrCtx, + const LocationContext *LCtx, const ConstructionContext *CC, + EvalCallOptions &CallOpts, unsigned Idx) { + SValBuilder &SVB = getSValBuilder(); MemRegionManager &MRMgr = SVB.getRegionManager(); ASTContext &ACtx = SVB.getContext(); @@ -210,8 +216,11 @@ SVal ExprEngine::computeObjectUnderConstruction( CallerLCtx = CallerLCtx->getParent(); assert(!isa(CallerLCtx)); } + + NodeBuilderContext CallerBldrCtx(getCoreEngine(), + SFC->getCallSiteBlock(), CallerLCtx); return computeObjectUnderConstruction( - cast(SFC->getCallSite()), State, CallerLCtx, + cast(SFC->getCallSite()), State, &CallerBldrCtx, CallerLCtx, RTC->getConstructionContext(), CallOpts); } else { // We are on the top frame of the analysis. We do not know where is the @@ -251,7 +260,7 @@ SVal ExprEngine::computeObjectUnderConstruction( EvalCallOptions PreElideCallOpts = CallOpts; SVal V = computeObjectUnderConstruction( - TCC->getConstructorAfterElision(), State, LCtx, + TCC->getConstructorAfterElision(), State, BldrCtx, LCtx, TCC->getConstructionContextAfterElision(), CallOpts); // FIXME: This definition of "copy elision has not failed" is unreliable. @@ -319,7 +328,7 @@ SVal ExprEngine::computeObjectUnderConstruction( CallEventManager &CEMgr = getStateManager().getCallEventManager(); auto getArgLoc = [&](CallEventRef<> Caller) -> Optional { const LocationContext *FutureSFC = - Caller->getCalleeStackFrame(currBldrCtx->blockCount()); + Caller->getCalleeStackFrame(BldrCtx->blockCount()); // Return early if we are unable to reliably foresee // the future stack frame. if (!FutureSFC) @@ -338,7 +347,7 @@ SVal ExprEngine::computeObjectUnderConstruction( // because this-argument is implemented as a normal argument in // operator call expressions but not in operator declarations. const TypedValueRegion *TVR = Caller->getParameterLocation( - *Caller->getAdjustedParameterIndex(Idx), currBldrCtx->blockCount()); + *Caller->getAdjustedParameterIndex(Idx), BldrCtx->blockCount()); if (!TVR) return None; @@ -643,8 +652,8 @@ void ExprEngine::handleConstructor(const Expr *E, } // The target region is found from construction context. - std::tie(State, Target) = - handleConstructionContext(CE, State, LCtx, CC, CallOpts, Idx); + std::tie(State, Target) = handleConstructionContext( + CE, State, currBldrCtx, LCtx, CC, CallOpts, Idx); break; } case CXXConstructExpr::CK_VirtualBase: { diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp index dbfded29c1ae4f..48b5db1eb4a521 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp @@ -774,9 +774,9 @@ ProgramStateRef ExprEngine::bindReturnValue(const CallEvent &Call, SVal Target; assert(RTC->getStmt() == Call.getOriginExpr()); EvalCallOptions CallOpts; // FIXME: We won't really need those. - std::tie(State, Target) = - handleConstructionContext(Call.getOriginExpr(), State, LCtx, - RTC->getConstructionContext(), CallOpts); + std::tie(State, Target) = handleConstructionContext( + Call.getOriginExpr(), State, currBldrCtx, LCtx, + RTC->getConstructionContext(), CallOpts); const MemRegion *TargetR = Target.getAsRegion(); assert(TargetR); // Invalidate the region so that it didn't look uninitialized. If this is diff --git a/clang/test/Analysis/copy-elision.cpp b/clang/test/Analysis/copy-elision.cpp index dee1a5fc86e7fa..991f325c05853d 100644 --- a/clang/test/Analysis/copy-elision.cpp +++ b/clang/test/Analysis/copy-elision.cpp @@ -20,6 +20,7 @@ #endif void clang_analyzer_eval(bool); +void clang_analyzer_dump(int); namespace variable_functional_cast_crash { @@ -418,3 +419,31 @@ void test_copy_elision() { } } // namespace address_vector_tests + +namespace arg_directly_from_return_in_loop { + +struct Result { + int value; +}; + +Result create() { + return Result{10}; +} + +int accessValue(Result r) { + return r.value; +} + +void test() { + for (int i = 0; i < 3; ++i) { + int v = accessValue(create()); + if (i == 0) { + clang_analyzer_dump(v); // expected-warning {{10 S32b}} + } else { + clang_analyzer_dump(v); // expected-warning {{10 S32b}} + // was {{reg_${{[0-9]+}} }} for C++11 + } + } +} + +} // namespace arg_directly_from_return_in_loop diff --git a/clang/test/C/drs/dr3xx.c b/clang/test/C/drs/dr3xx.c index 795bc590f5351b..0f06118ca6e572 100644 --- a/clang/test/C/drs/dr3xx.c +++ b/clang/test/C/drs/dr3xx.c @@ -1,8 +1,8 @@ /* RUN: %clang_cc1 -std=c89 -fsyntax-only -Wvla -verify=expected,c89only -pedantic -Wno-c11-extensions %s - RUN: %clang_cc1 -std=c99 -fsyntax-only -Wvla -verify -pedantic -Wno-c11-extensions %s - RUN: %clang_cc1 -std=c11 -fsyntax-only -Wvla -verify -pedantic %s - RUN: %clang_cc1 -std=c17 -fsyntax-only -Wvla -verify -pedantic %s - RUN: %clang_cc1 -std=c2x -fsyntax-only -Wvla -verify -pedantic %s + RUN: %clang_cc1 -std=c99 -fsyntax-only -Wvla -verify=expected,c99andup -pedantic -Wno-c11-extensions %s + RUN: %clang_cc1 -std=c11 -fsyntax-only -Wvla -verify=expected,c99andup -pedantic %s + RUN: %clang_cc1 -std=c17 -fsyntax-only -Wvla -verify=expected,c99andup -pedantic %s + RUN: %clang_cc1 -std=c2x -fsyntax-only -Wvla -verify=expected,c99andup -pedantic %s */ /* The following are DRs which do not require tests to demonstrate @@ -108,10 +108,91 @@ _Static_assert(sizeof(dr315.a + dr315.b) == sizeof(unsigned long long), ""); /* */ _Static_assert(sizeof(dr315.c + dr315.d) == sizeof(int), ""); +#if __STDC_VERSION__ < 202000L /* WG14 DR316: yes * Unprototyped function types */ -#if __STDC_VERSION__ < 202000L void dr316_1(a) int a; {} /* expected-warning {{a function definition without a prototype is deprecated in all versions of C and is not supported in C2x}} */ void (*dr316_1_ptr)(int, int, int) = dr316_1; + +/* WG14 DR317: yes + * Function definitions with empty parentheses + * + * Despite the function with empty parens being a definition, this does not + * provide a prototype for the function. However, calling the function with + * arguments is undefined behavior, so it is defensible for us to warn the user + * about it. They key point to this DR is that we give the "without a + * prototype" warnings to demonstrate we don't give this function a prototype. + */ +void dr317_1() {} /* expected-warning {{a function declaration without a prototype is deprecated in all versions of C}} */ +void dr317_2(void) { + if (0) + dr317_1(1); /* expected-warning {{too many arguments in call to 'dr317_1'}} + expected-warning {{passing arguments to 'dr317_1' without a prototype is deprecated in all versions of C and is not supported in C2x}} + */ +} #endif /* __STDC_VERSION__ < 202000L */ + +/* WG14 DR320: yes + * Scope of variably modified type + */ +int dr320_v; +typedef int dr320_t[dr320_v]; /* c89only-warning {{variable length arrays are a C99 feature}} + expected-error {{variable length array declaration not allowed at file scope}} + c99andup-warning {{variable length array used}} + */ +void dr320(int okay[dr320_v]) { /* c89only-warning {{variable length arrays are a C99 feature}} + c99andup-warning {{variable length array used}} + */ + typedef int type[dr320_v]; /* c89only-warning {{variable length arrays are a C99 feature}} + c99andup-warning {{variable length array used}} + */ + extern type bad; /* expected-error {{variable length array declaration cannot have 'extern' linkage}} */ + + /* C99 6.7.5.2p2, second sentence. */ + static type fine; /* expected-error {{variable length array declaration cannot have 'static' storage duration}} */ +} + +/* WG14 DR321: yes + * Wide character code values for members of the basic character set + */ +#define DR321 (\ + ' ' == L' ' && '\t' == L'\t' && '\v' == L'\v' && '\r' == L'\r' && \ + '\n' == L'\n' && \ + 'a' == L'a' && 'b' == L'b' && 'c' == L'c' && 'd' == L'd' && 'e' == L'e' && \ + 'f' == L'f' && 'g' == L'g' && 'h' == L'h' && 'i' == L'i' && 'j' == L'j' && \ + 'k' == L'k' && 'l' == L'l' && 'm' == L'm' && 'n' == L'n' && 'o' == L'o' && \ + 'p' == L'p' && 'q' == L'q' && 'r' == L'r' && 's' == L's' && 't' == L't' && \ + 'u' == L'u' && 'v' == L'v' && 'w' == L'w' && 'x' == L'x' && 'y' == L'y' && \ + 'z' == L'z' && \ + 'A' == L'A' && 'B' == L'B' && 'C' == L'C' && 'D' == L'D' && 'E' == L'E' && \ + 'F' == L'F' && 'G' == L'G' && 'H' == L'H' && 'I' == L'I' && 'J' == L'J' && \ + 'K' == L'K' && 'L' == L'L' && 'M' == L'M' && 'N' == L'N' && 'O' == L'O' && \ + 'P' == L'P' && 'Q' == L'Q' && 'R' == L'R' && 'S' == L'S' && 'T' == L'T' && \ + 'U' == L'U' && 'V' == L'V' && 'W' == L'W' && 'X' == L'X' && 'Y' == L'Y' && \ + 'Z' == L'Z' && \ + '0' == L'0' && '1' == L'1' && '2' == L'2' && '3' == L'3' && '4' == L'4' && \ + '5' == L'5' && '6' == L'6' && '7' == L'7' && '8' == L'8' && \ + '9' == L'9' && \ + '_' == L'_' && '{' == L'{' && '}' == L'}' && '[' == L'[' && ']' == L']' && \ + '#' == L'#' && '(' == L'(' && ')' == L')' && '<' == L'<' && '>' == L'>' && \ + '%' == L'%' && ':' == L':' && ';' == L';' && '.' == L'.' && '?' == L'?' && \ + '*' == L'*' && '+' == L'+' && '-' == L'-' && '/' == L'/' && '^' == L'^' && \ + '&' == L'&' && '|' == L'|' && '~' == L'~' && '!' == L'!' && '=' == L'=' && \ + ',' == L',' && '\\' == L'\\' && '"' == L'"' && '\'' == L'\'' \ + ) +#if __STDC_MB_MIGHT_NEQ_WC__ +#ifndef __FreeBSD__ // PR22208, FreeBSD expects us to give a bad (but conforming) answer here. +_Static_assert(!DR321, "__STDC_MB_MIGHT_NEQ_WC__ but all basic source characters have same representation"); +#endif +#else +_Static_assert(DR321, "!__STDC_MB_MIGHT_NEQ_WC__ but some character differs"); +#endif + +/* WG14 DR328: yes + * String literals in compound literal initialization + */ +const char *dr328_v = (const char *){"this is a string literal"}; /* c89only-warning {{compound literals are a C99-specific feature}} */ +void dr328(void) { + const char *val = (const char *){"also a string literal"}; /* c89only-warning {{compound literals are a C99-specific feature}} */ +} diff --git a/clang/www/c_dr_status.html b/clang/www/c_dr_status.html index 4ef2937e7fe0f1..52966d705e765b 100644 --- a/clang/www/c_dr_status.html +++ b/clang/www/c_dr_status.html @@ -1855,7 +1855,7 @@

C defect report implementation status

317 NAD Function definitions with empty parentheses - Unknown + Yes 318 @@ -1873,13 +1873,13 @@

C defect report implementation status

320 C99 Scope of variably modified type - Unknown + Yes 321 C99 Wide character code values for members of the basic character set - Unknown + Yes 322 @@ -1921,7 +1921,7 @@

C defect report implementation status

328 C99 String literals in compound literal initialization - Unknown + Yes 329 diff --git a/flang/include/flang/Lower/CallInterface.h b/flang/include/flang/Lower/CallInterface.h index 0a8bad06779944..c7615da73039e1 100644 --- a/flang/include/flang/Lower/CallInterface.h +++ b/flang/include/flang/Lower/CallInterface.h @@ -161,6 +161,8 @@ class CallInterface { bool mayBeReadByCall() const; /// Is the argument INTENT(OUT) bool isIntentOut() const; + /// Does the argument have the CONTIGUOUS attribute or have explicit shape ? + bool mustBeMadeContiguous() const; /// How entity is passed by. PassEntityBy passBy; /// What is the entity (SymbolRef for callee/ActualArgument* for caller) diff --git a/flang/lib/Lower/CallInterface.cpp b/flang/lib/Lower/CallInterface.cpp index b55e2ed4b804d4..ddf8fe9bd2ccd0 100644 --- a/flang/lib/Lower/CallInterface.cpp +++ b/flang/lib/Lower/CallInterface.cpp @@ -1061,6 +1061,27 @@ bool Fortran::lower::CallInterface::PassedEntity::isIntentOut() const { return true; return characteristics->GetIntent() == Fortran::common::Intent::Out; } +template +bool Fortran::lower::CallInterface::PassedEntity::mustBeMadeContiguous() + const { + if (!characteristics) + return true; + const auto *dummy = + std::get_if( + &characteristics->u); + if (!dummy) + return false; + const auto &shapeAttrs = dummy->type.attrs(); + using ShapeAttrs = Fortran::evaluate::characteristics::TypeAndShape::Attr; + if (shapeAttrs.test(ShapeAttrs::AssumedRank) || + shapeAttrs.test(ShapeAttrs::AssumedShape)) + return dummy->attrs.test( + Fortran::evaluate::characteristics::DummyDataObject::Attr::Contiguous); + if (shapeAttrs.test(ShapeAttrs::DeferredShape)) + return false; + // Explicit shape arrays are contiguous. + return dummy->type.Rank() > 0; +} template void Fortran::lower::CallInterface::determineInterface( diff --git a/flang/lib/Lower/ConvertExpr.cpp b/flang/lib/Lower/ConvertExpr.cpp index 676dfa05833f8d..cebb1a2acd4cf5 100644 --- a/flang/lib/Lower/ConvertExpr.cpp +++ b/flang/lib/Lower/ConvertExpr.cpp @@ -3071,7 +3071,11 @@ class ScalarExprLowering { /// the creation of the temp if the actual is a variable and \p byValue is /// true. It handles the cases where the actual may be absent, and all of the /// copying has to be conditional at runtime. - ExtValue prepareActualToBaseAddressLike( + /// If the actual argument may be dynamically absent, return an additional + /// boolean mlir::Value that if true means that the actual argument is + /// present. + std::pair> + prepareActualToBaseAddressLike( const Fortran::lower::SomeExpr &expr, const Fortran::lower::CallerInterface::PassedEntity &arg, CopyOutPairs ©OutPairs, bool byValue) { @@ -3092,21 +3096,23 @@ class ScalarExprLowering { (byValue || (isArray && !Fortran::evaluate::IsSimplyContiguous( expr, converter.getFoldingContext()))); const bool needsCopy = isStaticConstantByValue || variableNeedsCopy; - auto argAddr = [&]() -> ExtValue { + auto [argAddr, isPresent] = + [&]() -> std::pair> { if (!actualArgIsVariable && !needsCopy) // Actual argument is not a variable. Make sure a variable address is // not passed. - return genTempExtAddr(expr); + return {genTempExtAddr(expr), llvm::None}; ExtValue baseAddr; if (arg.isOptional() && Fortran::evaluate::MayBePassedAsAbsentOptional( expr, converter.getFoldingContext())) { auto [actualArgBind, isPresent] = prepareActualThatMayBeAbsent(expr); const ExtValue &actualArg = actualArgBind; if (!needsCopy) - return actualArg; + return {actualArg, isPresent}; if (isArray) - return genCopyIn(actualArg, arg, copyOutPairs, isPresent, byValue); + return {genCopyIn(actualArg, arg, copyOutPairs, isPresent, byValue), + isPresent}; // Scalars, create a temp, and use it conditionally at runtime if // the argument is present. ExtValue temp = @@ -3127,25 +3133,26 @@ class ScalarExprLowering { builder.create(loc, absent); }) .getResults()[0]; - return fir::substBase(temp, selectAddr); + return {fir::substBase(temp, selectAddr), isPresent}; } // Actual cannot be absent, the actual argument can safely be // copied-in/copied-out without any care if needed. if (isArray) { ExtValue box = genBoxArg(expr); if (needsCopy) - return genCopyIn(box, arg, copyOutPairs, - /*restrictCopyAtRuntime=*/llvm::None, byValue); + return {genCopyIn(box, arg, copyOutPairs, + /*restrictCopyAtRuntime=*/llvm::None, byValue), + llvm::None}; // Contiguous: just use the box we created above! // This gets "unboxed" below, if needed. - return box; + return {box, llvm::None}; } // Actual argument is a non-optional, non-pointer, non-allocatable // scalar. ExtValue actualArg = genExtAddr(expr); if (needsCopy) - return createInMemoryScalarCopy(builder, loc, actualArg); - return actualArg; + return {createInMemoryScalarCopy(builder, loc, actualArg), llvm::None}; + return {actualArg, llvm::None}; }(); // Scalar and contiguous expressions may be lowered to a fir.box, // either to account for potential polymorphism, or because lowering @@ -3154,7 +3161,7 @@ class ScalarExprLowering { // is passed, not one of the dynamic type), and the expr is known to // be simply contiguous, so it is safe to unbox it and pass the // address without making a copy. - return readIfBoxValue(argAddr); + return {readIfBoxValue(argAddr), isPresent}; } /// Lower a non-elemental procedure reference. @@ -3264,7 +3271,8 @@ class ScalarExprLowering { const bool byValue = arg.passBy == PassBy::BaseAddressValueAttribute || arg.passBy == PassBy::CharBoxValueAttribute; ExtValue argAddr = - prepareActualToBaseAddressLike(*expr, arg, copyOutPairs, byValue); + prepareActualToBaseAddressLike(*expr, arg, copyOutPairs, byValue) + .first; if (arg.passBy == PassBy::BaseAddress || arg.passBy == PassBy::BaseAddressValueAttribute) { caller.placeInput(arg, fir::getBase(argAddr)); @@ -3294,13 +3302,49 @@ class ScalarExprLowering { caller.placeInput(arg, boxChar); } } else if (arg.passBy == PassBy::Box) { - // Before lowering to an address, handle the allocatable/pointer actual - // argument to optional fir.box dummy. It is legal to pass - // unallocated/disassociated entity to an optional. In this case, an - // absent fir.box must be created instead of a fir.box with a null value - // (Fortran 2018 15.5.2.12 point 1). - if (arg.isOptional() && Fortran::evaluate::IsAllocatableOrPointerObject( - *expr, converter.getFoldingContext())) { + if (arg.mustBeMadeContiguous() && + !Fortran::evaluate::IsSimplyContiguous( + *expr, converter.getFoldingContext())) { + // If the expression is a PDT, or a polymorphic entity, or an assumed + // rank, it cannot currently be safely handled by + // prepareActualToBaseAddressLike that is intended to prepare + // arguments that can be passed as simple base address. + if (auto dynamicType = expr->GetType()) + if (dynamicType->IsPolymorphic()) + TODO(loc, "passing a polymorphic entity to an OPTIONAL " + "CONTIGUOUS argument"); + if (fir::isRecordWithTypeParameters( + fir::unwrapSequenceType(fir::unwrapPassByRefType(argTy)))) + TODO(loc, "passing to an OPTIONAL CONTIGUOUS derived type argument " + "with length parameters"); + if (Fortran::evaluate::IsAssumedRank(*expr)) + TODO(loc, "passing an assumed rank entity to an OPTIONAL " + "CONTIGUOUS argument"); + // Assumed shape VALUE are currently TODO in the call interface + // lowering. + const bool byValue = false; + auto [argAddr, isPresentValue] = + prepareActualToBaseAddressLike(*expr, arg, copyOutPairs, byValue); + mlir::Value box = builder.createBox(loc, argAddr); + if (isPresentValue) { + mlir::Value convertedBox = builder.createConvert(loc, argTy, box); + auto absent = builder.create(loc, argTy); + caller.placeInput(arg, + builder.create( + loc, *isPresentValue, convertedBox, absent)); + } else { + caller.placeInput(arg, builder.createBox(loc, argAddr)); + } + + } else if (arg.isOptional() && + Fortran::evaluate::IsAllocatableOrPointerObject( + *expr, converter.getFoldingContext())) { + // Before lowering to an address, handle the allocatable/pointer + // actual argument to optional fir.box dummy. It is legal to pass + // unallocated/disassociated entity to an optional. In this case, an + // absent fir.box must be created instead of a fir.box with a null + // value (Fortran 2018 15.5.2.12 point 1). + // // Note that passing an absent allocatable to a non-allocatable // optional dummy argument is illegal (15.5.2.12 point 3 (8)). So // nothing has to be done to generate an absent argument in this case, diff --git a/flang/test/Lower/dummy-argument-assumed-shape-optional.f90 b/flang/test/Lower/dummy-argument-assumed-shape-optional.f90 new file mode 100644 index 00000000000000..94d0fac4be87b3 --- /dev/null +++ b/flang/test/Lower/dummy-argument-assumed-shape-optional.f90 @@ -0,0 +1,377 @@ +! RUN: bbc -emit-fir %s -o - | FileCheck %s +module tests +interface + subroutine takes_contiguous(a) + real, contiguous :: a(:) + end subroutine + subroutine takes_contiguous_optional(a) + real, contiguous, optional :: a(:) + end subroutine +end interface + +contains + +! ----------------------------------------------------------------------------- +! Test passing assumed shapes to contiguous assumed shapes +! ----------------------------------------------------------------------------- +! Base case. + +subroutine test_assumed_shape_to_contiguous(x) + real :: x(:) + call takes_contiguous(x) +end subroutine +! CHECK-LABEL: func.func @_QMtestsPtest_assumed_shape_to_contiguous( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "x"}) { +! CHECK: %[[VAL_1:.*]] = fir.convert %[[VAL_0]] : (!fir.box>) -> !fir.box +! CHECK: %[[VAL_2:.*]] = fir.call @_FortranAIsContiguous(%[[VAL_1]]) : (!fir.box) -> i1 +! CHECK: %[[VAL_3:.*]] = fir.if %[[VAL_2]] -> (!fir.heap>) { +! CHECK: %[[VAL_4:.*]] = fir.box_addr %[[VAL_0]] : (!fir.box>) -> !fir.heap> +! CHECK: fir.result %[[VAL_4]] : !fir.heap> +! CHECK: } else { +! CHECK: %[[VAL_7:.*]] = fir.allocmem !fir.array +! CHECK: fir.do_loop {{.*}} { + ! ... copy +! CHECK: } +! CHECK: fir.result %[[VAL_7]] : !fir.heap> +! CHECK: } +! CHECK: %[[VAL_20:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_21:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_20]] : (!fir.box>, index) -> (index, index, index) +! CHECK: %[[VAL_22:.*]] = arith.constant false +! CHECK: %[[VAL_23:.*]] = arith.cmpi eq, %[[VAL_2]], %[[VAL_22]] : i1 +! CHECK: %[[VAL_24:.*]] = fir.shape %[[VAL_21]]#1 : (index) -> !fir.shape<1> +! CHECK: %[[VAL_25:.*]] = fir.embox %[[VAL_3]](%[[VAL_24]]) : (!fir.heap>, !fir.shape<1>) -> !fir.box> +! CHECK: fir.call @_QPtakes_contiguous(%[[VAL_25]]) : (!fir.box>) -> () +! CHECK: fir.if %[[VAL_23]] { +! CHECK: fir.do_loop {{.*}} { + ! ... copy +! CHECK: } +! CHECK: fir.freemem %[[VAL_3]] : !fir.heap> +! CHECK: } +! CHECK: return +! CHECK:} + +subroutine test_assumed_shape_contiguous_to_contiguous(x) + real, contiguous :: x(:) + call takes_contiguous(x) +end subroutine +! CHECK-LABEL: func.func @_QMtestsPtest_assumed_shape_contiguous_to_contiguous( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "x", fir.contiguous}) { +! CHECK: %[[VAL_1:.*]] = fir.box_addr %[[VAL_0]] : (!fir.box>) -> !fir.ref> +! CHECK: %[[VAL_2:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_3:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box>, index) -> (index, index, index) +! CHECK: %[[VAL_4:.*]] = arith.constant 1 : index +! CHECK: %[[VAL_5:.*]] = fir.shape_shift %[[VAL_4]], %[[VAL_3]]#1 : (index, index) -> !fir.shapeshift<1> +! CHECK: %[[VAL_6:.*]] = fir.embox %[[VAL_1]](%[[VAL_5]]) : (!fir.ref>, !fir.shapeshift<1>) -> !fir.box> +! CHECK: fir.call @_QPtakes_contiguous(%[[VAL_6]]) : (!fir.box>) -> () +! CHECK-NEXT: return + +subroutine test_assumed_shape_opt_to_contiguous(x) + real, optional :: x(:) + call takes_contiguous(x) +end subroutine +! CHECK-LABEL: func.func @_QMtestsPtest_assumed_shape_opt_to_contiguous( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "x", fir.optional}) { +! CHECK: %[[VAL_1:.*]] = fir.convert %[[VAL_0]] : (!fir.box>) -> !fir.box +! CHECK: %[[VAL_2:.*]] = fir.call @_FortranAIsContiguous(%[[VAL_1]]) : (!fir.box) -> i1 +! CHECK: %[[VAL_3:.*]] = fir.if %[[VAL_2]] -> (!fir.heap>) { +! CHECK: %[[VAL_4:.*]] = fir.box_addr %[[VAL_0]] : (!fir.box>) -> !fir.heap> +! CHECK: fir.result %[[VAL_4]] : !fir.heap> +! CHECK: } else { +! CHECK: %[[VAL_7:.*]] = fir.allocmem !fir.array +! CHECK: fir.do_loop {{.*}} { + ! ... copy +! CHECK: } +! CHECK: fir.result %[[VAL_7]] : !fir.heap> +! CHECK: } +! CHECK: %[[VAL_20:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_21:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_20]] : (!fir.box>, index) -> (index, index, index) +! CHECK: %[[VAL_22:.*]] = arith.constant false +! CHECK: %[[VAL_23:.*]] = arith.cmpi eq, %[[VAL_2]], %[[VAL_22]] : i1 +! CHECK: %[[VAL_24:.*]] = fir.shape %[[VAL_21]]#1 : (index) -> !fir.shape<1> +! CHECK: %[[VAL_25:.*]] = fir.embox %[[VAL_3]](%[[VAL_24]]) : (!fir.heap>, !fir.shape<1>) -> !fir.box> +! CHECK: fir.call @_QPtakes_contiguous(%[[VAL_25]]) : (!fir.box>) -> () +! CHECK: fir.if %[[VAL_23]] { +! CHECK: fir.do_loop {{.*}} { + ! ... copy +! CHECK: } +! CHECK: fir.freemem %[[VAL_3]] : !fir.heap> +! CHECK: } +! CHECK: return +! CHECK:} + +subroutine test_assumed_shape_contiguous_opt_to_contiguous(x) + real, optional, contiguous :: x(:) + call takes_contiguous(x) +end subroutine +! CHECK-LABEL: func.func @_QMtestsPtest_assumed_shape_contiguous_opt_to_contiguous( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "x", fir.contiguous, fir.optional}) { +! CHECK: fir.call @_QPtakes_contiguous(%[[VAL_0]]) : (!fir.box>) -> () +! CHECK-NEXT: return + + +! ----------------------------------------------------------------------------- +! Test passing assumed shapes to contiguous optional assumed shapes +! ----------------------------------------------------------------------------- +! The copy-in/out must take into account the actual argument presence (which may +! not be known until runtime). + +subroutine test_assumed_shape_to_contiguous_opt(x) + real :: x(:) + call takes_contiguous_optional(x) +end subroutine +! CHECK-LABEL: func.func @_QMtestsPtest_assumed_shape_to_contiguous_opt( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "x"}) { +! CHECK: %[[VAL_1:.*]] = fir.convert %[[VAL_0]] : (!fir.box>) -> !fir.box +! CHECK: %[[VAL_2:.*]] = fir.call @_FortranAIsContiguous(%[[VAL_1]]) : (!fir.box) -> i1 +! CHECK: %[[VAL_3:.*]] = fir.if %[[VAL_2]] -> (!fir.heap>) { +! CHECK: %[[VAL_4:.*]] = fir.box_addr %[[VAL_0]] : (!fir.box>) -> !fir.heap> +! CHECK: fir.result %[[VAL_4]] : !fir.heap> +! CHECK: } else { +! CHECK: %[[VAL_7:.*]] = fir.allocmem !fir.array +! CHECK: fir.do_loop {{.*}} { + ! ... copy +! CHECK: } +! CHECK: fir.result %[[VAL_7]] : !fir.heap> +! CHECK: } +! CHECK: %[[VAL_20:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_21:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_20]] : (!fir.box>, index) -> (index, index, index) +! CHECK: %[[VAL_22:.*]] = arith.constant false +! CHECK: %[[VAL_23:.*]] = arith.cmpi eq, %[[VAL_2]], %[[VAL_22]] : i1 +! CHECK: %[[VAL_24:.*]] = fir.shape %[[VAL_21]]#1 : (index) -> !fir.shape<1> +! CHECK: %[[VAL_25:.*]] = fir.embox %[[VAL_3]](%[[VAL_24]]) : (!fir.heap>, !fir.shape<1>) -> !fir.box> +! CHECK: fir.call @_QPtakes_contiguous_optional(%[[VAL_25]]) : (!fir.box>) -> () +! CHECK: fir.if %[[VAL_23]] { +! CHECK: fir.do_loop {{.*}} { + ! ... copy +! CHECK: } +! CHECK: fir.freemem %[[VAL_3]] : !fir.heap> +! CHECK: } +! CHECK: return +! CHECK:} + +subroutine test_assumed_shape_contiguous_to_contiguous_opt(x) + real, contiguous :: x(:) + call takes_contiguous_optional(x) +end subroutine +! CHECK-LABEL: func.func @_QMtestsPtest_assumed_shape_contiguous_to_contiguous_opt( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "x", fir.contiguous}) { +! CHECK: %[[VAL_1:.*]] = fir.box_addr %[[VAL_0]] : (!fir.box>) -> !fir.ref> +! CHECK: %[[VAL_2:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_3:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box>, index) -> (index, index, index) +! CHECK: %[[VAL_4:.*]] = arith.constant 1 : index +! CHECK: %[[VAL_5:.*]] = fir.shape_shift %[[VAL_4]], %[[VAL_3]]#1 : (index, index) -> !fir.shapeshift<1> +! CHECK: %[[VAL_6:.*]] = fir.embox %[[VAL_1]](%[[VAL_5]]) : (!fir.ref>, !fir.shapeshift<1>) -> !fir.box> +! CHECK: fir.call @_QPtakes_contiguous_optional(%[[VAL_6]]) : (!fir.box>) -> () +! CHECK-NEXT: return + +subroutine test_assumed_shape_opt_to_contiguous_opt(x) + real, optional :: x(:) + call takes_contiguous_optional(x) +end subroutine +! CHECK-LABEL: func.func @_QMtestsPtest_assumed_shape_opt_to_contiguous_opt( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "x", fir.optional}) { +! CHECK: %[[VAL_1:.*]] = fir.is_present %[[VAL_0]] : (!fir.box>) -> i1 +! CHECK: %[[VAL_2:.*]] = fir.zero_bits !fir.ref> +! CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_5:.*]] = fir.embox %[[VAL_2]](%[[VAL_4]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> +! CHECK: %[[VAL_6:.*]] = arith.select %[[VAL_1]], %[[VAL_0]], %[[VAL_5]] : !fir.box> +! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_6]] : (!fir.box>) -> !fir.box +! CHECK: %[[VAL_8:.*]] = fir.call @_FortranAIsContiguous(%[[VAL_7]]) : (!fir.box) -> i1 +! CHECK: %[[VAL_9:.*]] = fir.if %[[VAL_1]] -> (!fir.heap>) { +! CHECK: %[[VAL_10:.*]] = fir.if %[[VAL_8]] -> (!fir.heap>) { +! CHECK: %[[VAL_11:.*]] = fir.box_addr %[[VAL_6]] : (!fir.box>) -> !fir.heap> +! CHECK: fir.result %[[VAL_11]] : !fir.heap> +! CHECK: } else { +! CHECK: %[[VAL_14:.*]] = fir.allocmem !fir.array +! CHECK: fir.do_loop {{.*}} { + ! copy ... +! CHECK: } +! CHECK: fir.result %[[VAL_14]] : !fir.heap> +! CHECK: } +! CHECK: fir.result %[[VAL_10]] : !fir.heap> +! CHECK: } else { +! CHECK: %[[VAL_28:.*]] = fir.zero_bits !fir.heap> +! CHECK: fir.result %[[VAL_28]] : !fir.heap> +! CHECK: } +! CHECK: %[[VAL_29:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_30:.*]]:3 = fir.box_dims %[[VAL_6]], %[[VAL_29]] : (!fir.box>, index) -> (index, index, index) +! CHECK: %[[VAL_31:.*]] = arith.constant false +! CHECK: %[[VAL_32:.*]] = arith.cmpi eq, %[[VAL_8]], %[[VAL_31]] : i1 +! CHECK: %[[VAL_33:.*]] = arith.andi %[[VAL_1]], %[[VAL_32]] : i1 +! CHECK: %[[VAL_34:.*]] = fir.shape %[[VAL_30]]#1 : (index) -> !fir.shape<1> +! CHECK: %[[VAL_35:.*]] = fir.embox %[[VAL_9]](%[[VAL_34]]) : (!fir.heap>, !fir.shape<1>) -> !fir.box> +! CHECK: %[[VAL_37:.*]] = fir.absent !fir.box> +! CHECK: %[[VAL_38:.*]] = arith.select %[[VAL_1]], %[[VAL_35]], %[[VAL_37]] : !fir.box> +! CHECK: fir.call @_QPtakes_contiguous_optional(%[[VAL_38]]) : (!fir.box>) -> () +! CHECK: fir.if %[[VAL_33]] { +! CHECK: %[[VAL_47:.*]] = fir.do_loop {{.*}} { + ! copy ... +! CHECK: } +! CHECK: fir.freemem %[[VAL_9]] : !fir.heap> +! CHECK: } +! CHECK: return +! CHECK:} + +subroutine test_assumed_shape_contiguous_opt_to_contiguous_opt(x) + real, contiguous, optional :: x(:) + call takes_contiguous_optional(x) +end subroutine +! CHECK-LABEL: func.func @_QMtestsPtest_assumed_shape_contiguous_opt_to_contiguous_opt( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "x", fir.contiguous, fir.optional}) { +! CHECK: fir.call @_QPtakes_contiguous_optional(%[[VAL_0]]) : (!fir.box>) -> () +! CHECK-NEXT: return + +! ----------------------------------------------------------------------------- +! Test passing pointers to contiguous optional assumed shapes +! ----------------------------------------------------------------------------- +! This case is interesting because pointers may be non contiguous, and also because +! a pointer passed to an optional assumed shape dummy is present if and only if the +! pointer is associated (regardless of the pointer optionality). + +subroutine test_pointer_to_contiguous_opt(x) + real, pointer :: x(:) + call takes_contiguous_optional(x) +end subroutine +! CHECK-LABEL: func.func @_QMtestsPtest_pointer_to_contiguous_opt( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>>> {fir.bindc_name = "x"}) { +! CHECK: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref>>> +! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box>>) -> !fir.ptr> +! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.ptr>) -> i64 +! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64 +! CHECK: %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64 +! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_0]] : !fir.ref>>> +! CHECK: %[[VAL_7:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_8:.*]]:3 = fir.box_dims %[[VAL_6]], %[[VAL_7]] : (!fir.box>>, index) -> (index, index, index) +! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_6]] : (!fir.box>>) -> !fir.box +! CHECK: %[[VAL_10:.*]] = fir.call @_FortranAIsContiguous(%[[VAL_9]]) : (!fir.box) -> i1 +! CHECK: %[[VAL_11:.*]] = fir.if %[[VAL_5]] -> (!fir.heap>) { +! CHECK: %[[VAL_12:.*]] = fir.if %[[VAL_10]] -> (!fir.heap>) { +! CHECK: %[[VAL_13:.*]] = fir.box_addr %[[VAL_6]] : (!fir.box>>) -> !fir.heap> +! CHECK: fir.result %[[VAL_13]] : !fir.heap> +! CHECK: } else { +! CHECK: %[[VAL_16:.*]] = fir.allocmem !fir.array +! CHECK: fir.do_loop {{.*}} { + ! copy +! CHECK: } +! CHECK: fir.result %[[VAL_16]] : !fir.heap> +! CHECK: } +! CHECK: fir.result %[[VAL_12]] : !fir.heap> +! CHECK: } else { +! CHECK: %[[VAL_31:.*]] = fir.zero_bits !fir.heap> +! CHECK: fir.result %[[VAL_31]] : !fir.heap> +! CHECK: } +! CHECK: %[[VAL_32:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_33:.*]]:3 = fir.box_dims %[[VAL_6]], %[[VAL_32]] : (!fir.box>>, index) -> (index, index, index) +! CHECK: %[[VAL_34:.*]] = arith.constant false +! CHECK: %[[VAL_35:.*]] = arith.cmpi eq, %[[VAL_10]], %[[VAL_34]] : i1 +! CHECK: %[[VAL_36:.*]] = arith.andi %[[VAL_5]], %[[VAL_35]] : i1 +! CHECK: %[[VAL_37:.*]] = fir.shape_shift %[[VAL_8]]#0, %[[VAL_33]]#1 : (index, index) -> !fir.shapeshift<1> +! CHECK: %[[VAL_38:.*]] = fir.embox %[[VAL_11]](%[[VAL_37]]) : (!fir.heap>, !fir.shapeshift<1>) -> !fir.box> +! CHECK: %[[VAL_40:.*]] = fir.absent !fir.box> +! CHECK: %[[VAL_41:.*]] = arith.select %[[VAL_5]], %[[VAL_38]], %[[VAL_40]] : !fir.box> +! CHECK: fir.call @_QPtakes_contiguous_optional(%[[VAL_41]]) : (!fir.box>) -> () +! CHECK: fir.if %[[VAL_36]] { +! CHECK: fir.do_loop {{.*}} { + ! copy +! CHECK: } +! CHECK: fir.freemem %[[VAL_11]] : !fir.heap> +! CHECK: } +! CHECK: return +! CHECK:} + +subroutine test_pointer_contiguous_to_contiguous_opt(x) + real, pointer, contiguous :: x(:) + call takes_contiguous_optional(x) +end subroutine +! CHECK-LABEL: func.func @_QMtestsPtest_pointer_contiguous_to_contiguous_opt( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>>> {fir.bindc_name = "x", fir.contiguous}) { +! CHECK: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref>>> +! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box>>) -> !fir.ptr> +! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.ptr>) -> i64 +! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64 +! CHECK: %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64 +! CHECK: %[[VAL_6:.*]] = fir.absent !fir.box> +! CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_0]] : !fir.ref>>> +! CHECK: %[[VAL_8:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_9:.*]]:3 = fir.box_dims %[[VAL_7]], %[[VAL_8]] : (!fir.box>>, index) -> (index, index, index) +! CHECK: %[[VAL_10:.*]] = fir.box_addr %[[VAL_7]] : (!fir.box>>) -> !fir.ptr> +! CHECK: %[[VAL_11:.*]] = fir.shape_shift %[[VAL_9]]#0, %[[VAL_9]]#1 : (index, index) -> !fir.shapeshift<1> +! CHECK: %[[VAL_12:.*]] = fir.embox %[[VAL_10]](%[[VAL_11]]) : (!fir.ptr>, !fir.shapeshift<1>) -> !fir.box> +! CHECK: %[[VAL_13:.*]] = arith.select %[[VAL_5]], %[[VAL_12]], %[[VAL_6]] : !fir.box> +! CHECK: fir.call @_QPtakes_contiguous_optional(%[[VAL_13]]) : (!fir.box>) -> () +! CHECK-NEXT: return + +subroutine test_pointer_opt_to_contiguous_opt(x) + real, pointer, optional :: x(:) + call takes_contiguous_optional(x) +end subroutine +! CHECK-LABEL: func.func @_QMtestsPtest_pointer_opt_to_contiguous_opt( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>>> {fir.bindc_name = "x", fir.optional}) { +! CHECK: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref>>> +! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box>>) -> !fir.ptr> +! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.ptr>) -> i64 +! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64 +! CHECK: %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64 +! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_0]] : !fir.ref>>> +! CHECK: %[[VAL_7:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_8:.*]]:3 = fir.box_dims %[[VAL_6]], %[[VAL_7]] : (!fir.box>>, index) -> (index, index, index) +! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_6]] : (!fir.box>>) -> !fir.box +! CHECK: %[[VAL_10:.*]] = fir.call @_FortranAIsContiguous(%[[VAL_9]]) : (!fir.box) -> i1 +! CHECK: %[[VAL_11:.*]] = fir.if %[[VAL_5]] -> (!fir.heap>) { +! CHECK: %[[VAL_12:.*]] = fir.if %[[VAL_10]] -> (!fir.heap>) { +! CHECK: %[[VAL_13:.*]] = fir.box_addr %[[VAL_6]] : (!fir.box>>) -> !fir.heap> +! CHECK: fir.result %[[VAL_13]] : !fir.heap> +! CHECK: } else { +! CHECK: %[[VAL_16:.*]] = fir.allocmem !fir.array +! CHECK: fir.do_loop {{.*}} { + ! copy +! CHECK: } +! CHECK: fir.result %[[VAL_16]] : !fir.heap> +! CHECK: } +! CHECK: fir.result %[[VAL_12]] : !fir.heap> +! CHECK: } else { +! CHECK: %[[VAL_31:.*]] = fir.zero_bits !fir.heap> +! CHECK: fir.result %[[VAL_31]] : !fir.heap> +! CHECK: } +! CHECK: %[[VAL_32:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_33:.*]]:3 = fir.box_dims %[[VAL_6]], %[[VAL_32]] : (!fir.box>>, index) -> (index, index, index) +! CHECK: %[[VAL_34:.*]] = arith.constant false +! CHECK: %[[VAL_35:.*]] = arith.cmpi eq, %[[VAL_10]], %[[VAL_34]] : i1 +! CHECK: %[[VAL_36:.*]] = arith.andi %[[VAL_5]], %[[VAL_35]] : i1 +! CHECK: %[[VAL_37:.*]] = fir.shape_shift %[[VAL_8]]#0, %[[VAL_33]]#1 : (index, index) -> !fir.shapeshift<1> +! CHECK: %[[VAL_38:.*]] = fir.embox %[[VAL_11]](%[[VAL_37]]) : (!fir.heap>, !fir.shapeshift<1>) -> !fir.box> +! CHECK: %[[VAL_40:.*]] = fir.absent !fir.box> +! CHECK: %[[VAL_41:.*]] = arith.select %[[VAL_5]], %[[VAL_38]], %[[VAL_40]] : !fir.box> +! CHECK: fir.call @_QPtakes_contiguous_optional(%[[VAL_41]]) : (!fir.box>) -> () +! CHECK: fir.if %[[VAL_36]] { +! CHECK: fir.do_loop {{.*}} { + ! copy +! CHECK: } +! CHECK: fir.freemem %[[VAL_11]] : !fir.heap> +! CHECK: } +! CHECK: return +! CHECK:} + +subroutine test_pointer_contiguous_opt_to_contiguous_opt(x) + real, pointer, contiguous, optional :: x(:) + call takes_contiguous_optional(x) +end subroutine +! CHECK-LABEL: func.func @_QMtestsPtest_pointer_contiguous_opt_to_contiguous_opt( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>>> {fir.bindc_name = "x", fir.contiguous, fir.optional}) { +! CHECK: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref>>> +! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box>>) -> !fir.ptr> +! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.ptr>) -> i64 +! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64 +! CHECK: %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64 +! CHECK: %[[VAL_6:.*]] = fir.absent !fir.box> +! CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_0]] : !fir.ref>>> +! CHECK: %[[VAL_8:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_9:.*]]:3 = fir.box_dims %[[VAL_7]], %[[VAL_8]] : (!fir.box>>, index) -> (index, index, index) +! CHECK: %[[VAL_10:.*]] = fir.box_addr %[[VAL_7]] : (!fir.box>>) -> !fir.ptr> +! CHECK: %[[VAL_11:.*]] = fir.shape_shift %[[VAL_9]]#0, %[[VAL_9]]#1 : (index, index) -> !fir.shapeshift<1> +! CHECK: %[[VAL_12:.*]] = fir.embox %[[VAL_10]](%[[VAL_11]]) : (!fir.ptr>, !fir.shapeshift<1>) -> !fir.box> +! CHECK: %[[VAL_13:.*]] = arith.select %[[VAL_5]], %[[VAL_12]], %[[VAL_6]] : !fir.box> +! CHECK-NEXT: fir.call @_QPtakes_contiguous_optional(%[[VAL_13]]) : (!fir.box>) -> () +! CHECK: return +end module diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt index 4740c761d1114b..1b11d888bdc4f8 100644 --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -7,7 +7,7 @@ add_header_library( DEPENDS libc.src.__support.CPP.bitset .memory_utils.memcpy_implementation - .memory_utils.memset_implementation + .memory_utils.bzero_implementation ) add_entrypoint_object( @@ -65,7 +65,7 @@ add_entrypoint_object( HDRS stpncpy.h DEPENDS - .memory_utils.memset_implementation + .memory_utils.bzero_implementation ) add_entrypoint_object( diff --git a/libc/src/string/bzero.cpp b/libc/src/string/bzero.cpp index c57c922f6eff6f..b04cca834f9867 100644 --- a/libc/src/string/bzero.cpp +++ b/libc/src/string/bzero.cpp @@ -8,12 +8,12 @@ #include "src/string/bzero.h" #include "src/__support/common.h" -#include "src/string/memory_utils/memset_implementations.h" +#include "src/string/memory_utils/bzero_implementations.h" namespace __llvm_libc { LLVM_LIBC_FUNCTION(void, bzero, (void *ptr, size_t count)) { - inline_memset(reinterpret_cast(ptr), 0, count); + inline_bzero(reinterpret_cast(ptr), count); } } // namespace __llvm_libc diff --git a/libc/src/string/memory_utils/CMakeLists.txt b/libc/src/string/memory_utils/CMakeLists.txt index 6cd45ddc42ace6..d735fcfe54174a 100644 --- a/libc/src/string/memory_utils/CMakeLists.txt +++ b/libc/src/string/memory_utils/CMakeLists.txt @@ -5,6 +5,7 @@ add_header_library( utils.h elements.h bcmp_implementations.h + bzero_implementations.h memcmp_implementations.h memcpy_implementations.h memset_implementations.h @@ -35,3 +36,11 @@ add_header_library( DEPS .memory_utils ) + +add_header_library( + bzero_implementation + HDRS + bzero_implementations.h + DEPS + .memset_implementation +) diff --git a/libc/src/string/memory_utils/address.h b/libc/src/string/memory_utils/address.h deleted file mode 100644 index caa71be5b1da94..00000000000000 --- a/libc/src/string/memory_utils/address.h +++ /dev/null @@ -1,133 +0,0 @@ -//===-- Strongly typed address with alignment and access semantics --------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_COMMON_H -#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_COMMON_H - -#include "src/__support/CPP/type_traits.h" // cpp::ConditionalType -#include "src/string/memory_utils/utils.h" // is_power2 -#include // size_t -#include // uint8_t, uint16_t, uint32_t, uint64_t - -namespace __llvm_libc { - -// Utility to enable static_assert(false) in templates. -template static void DeferredStaticAssert(const char *msg) { - static_assert(flag, "compilation error"); -} - -// A non-coercible type to represent raw data. -enum class ubyte : unsigned char { ZERO = 0 }; - -// Address attribute specifying whether the underlying load / store operations -// are temporal or non-temporal. -enum class Temporality { TEMPORAL, NON_TEMPORAL }; - -// Address attribute specifying whether the underlying load / store operations -// are aligned or unaligned. -enum class Aligned { NO, YES }; - -// Address attribute to discriminate between readable and writable addresses. -enum class Permission { Read, Write }; - -// Address is semantically equivalent to a pointer but also conveys compile time -// information that helps with instructions selection (aligned/unaligned, -// temporal/non-temporal). -template struct Address { - static_assert(is_power2(Alignment)); - static constexpr size_t ALIGNMENT = Alignment; - static constexpr Permission PERMISSION = P; - static constexpr Temporality TEMPORALITY = TS; - static constexpr bool IS_READ = P == Permission::Read; - static constexpr bool IS_WRITE = P == Permission::Write; - using PointeeType = cpp::conditional_t; - using VoidType = cpp::conditional_t; - - Address(VoidType *ptr) : ptr_(reinterpret_cast(ptr)) {} - - PointeeType *ptr() const { - return reinterpret_cast( - __builtin_assume_aligned(ptr_, ALIGNMENT)); - } - - PointeeType *const ptr_; - - template auto offset(size_t byte_offset) const { - static constexpr size_t NewAlignment = commonAlign(); - return Address(ptr_ + byte_offset); - } - -private: - static constexpr size_t gcd(size_t A, size_t B) { - return B == 0 ? A : gcd(B, A % B); - } - - template static constexpr size_t commonAlign() { - constexpr size_t GCD = gcd(ByteOffset, ALIGNMENT); - if constexpr (is_power2(GCD)) - return GCD; - else - return 1; - } -}; - -template struct IsAddressType : public cpp::false_type {}; -template -struct IsAddressType> : public cpp::true_type {}; - -// Reinterpret the address as a pointer to T. -// This is not UB since the underlying pointer always refers to a `char` in a -// buffer of raw data. -template static T *as(AddrT addr) { - static_assert(IsAddressType::value); - return reinterpret_cast(addr.ptr()); -} - -// Offsets the address by a compile time amount, this allows propagating -// alignment whenever possible. -template -static auto offsetAddr(AddrT addr) { - static_assert(IsAddressType::value); - return addr.template offset(ByteOffset); -} - -// Offsets the address by a runtime amount but assuming that the resulting -// address will be Alignment aligned. -template -static auto offsetAddrAssumeAligned(AddrT addr, size_t byte_offset) { - static_assert(IsAddressType::value); - return Address(addr.ptr_ + - byte_offset); -} - -// Offsets the address by a runtime amount that is assumed to be a multiple of -// ByteOffset. This allows to propagate the address alignment whenever possible. -template -static auto offsetAddrMultiplesOf(AddrT addr, ptrdiff_t byte_offset) { - static_assert(IsAddressType::value); - return addr.template offset(byte_offset); -} - -// User friendly aliases for common address types. -template -using SrcAddr = Address; -template -using DstAddr = Address; -template -using NtSrcAddr = - Address; -template -using NtDstAddr = - Address; - -} // namespace __llvm_libc - -#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_COMMON_H diff --git a/libc/src/string/memory_utils/algorithm.h b/libc/src/string/memory_utils/algorithm.h deleted file mode 100644 index 6355ffe04562f7..00000000000000 --- a/libc/src/string/memory_utils/algorithm.h +++ /dev/null @@ -1,463 +0,0 @@ -//===-- Algorithms to compose sized memory operations ---------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Higher order primitives that build upon the SizedOpT facility. -// They constitute the basic blocks for composing memory functions. -// This file defines the following operations: -// - Skip -// - Tail -// - HeadTail -// - Loop -// - Align -// -// See each class for documentation. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ALGORITHM_H -#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ALGORITHM_H - -#include "src/string/memory_utils/address.h" // Address -#include "src/string/memory_utils/utils.h" // offset_to_next_aligned - -#include // ptrdiff_t - -namespace __llvm_libc { - -// We are not yet allowed to use asserts in low level memory operations as -// assert itself could depend on them. -// We define this empty macro so we can enable them as soon as possible and keep -// track of invariants. -#define LIBC_ASSERT(COND) - -// An operation that allows to skip the specified amount of bytes. -template struct Skip { - template struct Then { - template - static inline void set(DstAddrT dst, ubyte value) { - static_assert(NextT::IS_FIXED_SIZE); - NextT::set(offsetAddr(dst), value); - } - - template - static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2) { - static_assert(NextT::IS_FIXED_SIZE); - return NextT::isDifferent(offsetAddr(src1), - offsetAddr(src2)); - } - - template - static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2) { - static_assert(NextT::IS_FIXED_SIZE); - return NextT::threeWayCmp(offsetAddr(src1), - offsetAddr(src2)); - } - - template - static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2, - size_t runtime_size) { - static_assert(NextT::IS_RUNTIME_SIZE); - return NextT::threeWayCmp(offsetAddr(src1), - offsetAddr(src2), runtime_size - Bytes); - } - }; -}; - -// Compute the address of a tail operation. -// Because of the runtime size, we loose the alignment information. -template -static auto tailAddr(AddrT addr, size_t runtime_size) { - static_assert(IsAddressType::value); - return offsetAddrAssumeAligned<1>(addr, runtime_size - Size); -} - -// Perform the operation on the last 'Size' bytes of the buffer. -// -// e.g. with -// [1234567812345678123] -// [__XXXXXXXXXXXXXX___] -// [________XXXXXXXX___] -// -// Precondition: `runtime_size >= Size`. -template struct Tail { - static_assert(SizedOpT::IS_FIXED_SIZE); - static constexpr bool IS_RUNTIME_SIZE = true; - static constexpr size_t SIZE = SizedOpT::SIZE; - - template - static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) { - SizedOpT::copy(tailAddr(dst, runtime_size), - tailAddr(src, runtime_size)); - } - - template - static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) { - SizedOpT::move(tailAddr(dst, runtime_size), - tailAddr(src, runtime_size)); - } - - template - static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) { - SizedOpT::set(tailAddr(dst, runtime_size), value); - } - - template - static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2, - size_t runtime_size) { - return SizedOpT::isDifferent(tailAddr(src1, runtime_size), - tailAddr(src2, runtime_size)); - } - - template - static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2, - size_t runtime_size) { - return SizedOpT::threeWayCmp(tailAddr(src1, runtime_size), - tailAddr(src2, runtime_size)); - } -}; - -// Perform the operation on the first and the last `SizedOpT::Size` bytes of the -// buffer. This is useful for overlapping operations. -// -// e.g. with -// [1234567812345678123] -// [__XXXXXXXXXXXXXX___] -// [__XXXXXXXX_________] -// [________XXXXXXXX___] -// -// Precondition: `runtime_size >= Size && runtime_size <= 2 x Size`. -template struct HeadTail { - static_assert(SizedOpT::IS_FIXED_SIZE); - static constexpr bool IS_RUNTIME_SIZE = true; - - template - static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) { - LIBC_ASSERT(runtime_size >= SizedOpT::SIZE); - SizedOpT::copy(dst, src); - Tail::copy(dst, src, runtime_size); - } - - template - static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) { - LIBC_ASSERT(runtime_size >= SizedOpT::SIZE); - static constexpr size_t BLOCK_SIZE = SizedOpT::SIZE; - // The load and store operations can be performed in any order as long as - // they are not interleaved. More investigations are needed to determine the - // best order. - auto head = SizedOpT::load(src); - auto tail = SizedOpT::load(tailAddr(src, runtime_size)); - SizedOpT::store(tailAddr(dst, runtime_size), tail); - SizedOpT::store(dst, head); - } - - template - static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) { - LIBC_ASSERT(runtime_size >= SizedOpT::SIZE); - SizedOpT::set(dst, value); - Tail::set(dst, value, runtime_size); - } - - template - static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2, - size_t runtime_size) { - LIBC_ASSERT(runtime_size >= SizedOpT::SIZE); - // Two strategies can be applied here: - // 1. Compute head and tail and compose them with a bitwise or operation. - // 2. Stop early if head is different. - // We chose the later because HeadTail operations are typically performed - // with sizes ranging from 4 to 256 bytes. The cost of the loads is then - // significantly larger than the cost of the branch. - if (const uint64_t res = SizedOpT::isDifferent(src1, src2)) - return res; - return Tail::isDifferent(src1, src2, runtime_size); - } - - template - static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2, - size_t runtime_size) { - LIBC_ASSERT(runtime_size >= SizedOpT::SIZE && - runtime_size <= 2 * SizedOpT::SIZE); - if (const int32_t res = SizedOpT::threeWayCmp(src1, src2)) - return res; - return Tail::threeWayCmp(src1, src2, runtime_size); - } -}; - -// Simple loop ending with a Tail operation. -// -// e.g. with -// [12345678123456781234567812345678] -// [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___] -// [__XXXXXXXX_______________________] -// [__________XXXXXXXX_______________] -// [__________________XXXXXXXX_______] -// [______________________XXXXXXXX___] -// -// Precondition: -// - runtime_size >= Size -template struct Loop { - static_assert(SizedOpT::IS_FIXED_SIZE); - static constexpr bool IS_RUNTIME_SIZE = true; - static constexpr size_t BLOCK_SIZE = SizedOpT::SIZE; - - template - static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) { - size_t offset = 0; - do { - SizedOpT::copy(offsetAddrMultiplesOf(dst, offset), - offsetAddrMultiplesOf(src, offset)); - offset += BLOCK_SIZE; - } while (offset < runtime_size - BLOCK_SIZE); - Tail::copy(dst, src, runtime_size); - } - - // Move forward suitable when dst < src. We load the tail bytes before - // handling the loop. - // - // e.g. Moving two bytes - // [ | | | | |] - // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___] - // [_________________________LLLLLLLL___] - // [___LLLLLLLL_________________________] - // [_SSSSSSSS___________________________] - // [___________LLLLLLLL_________________] - // [_________SSSSSSSS___________________] - // [___________________LLLLLLLL_________] - // [_________________SSSSSSSS___________] - // [_______________________SSSSSSSS_____] - template - static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) { - const auto tail_value = - SizedOpT::load(tailAddr(src, runtime_size)); - size_t offset = 0; - do { - SizedOpT::move(offsetAddrMultiplesOf(dst, offset), - offsetAddrMultiplesOf(src, offset)); - offset += BLOCK_SIZE; - } while (offset < runtime_size - BLOCK_SIZE); - SizedOpT::store(tailAddr(dst, runtime_size), tail_value); - } - - // Move backward suitable when dst > src. We load the head bytes before - // handling the loop. - // - // e.g. Moving two bytes - // [ | | | | |] - // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___] - // [___LLLLLLLL_________________________] - // [_________________________LLLLLLLL___] - // [___________________________SSSSSSSS_] - // [_________________LLLLLLLL___________] - // [___________________SSSSSSSS_________] - // [_________LLLLLLLL___________________] - // [___________SSSSSSSS_________________] - // [_____SSSSSSSS_______________________] - template - static inline void move_backward(DstAddrT dst, SrcAddrT src, - size_t runtime_size) { - const auto head_value = SizedOpT::load(src); - ptrdiff_t offset = runtime_size - BLOCK_SIZE; - do { - SizedOpT::move(offsetAddrMultiplesOf(dst, offset), - offsetAddrMultiplesOf(src, offset)); - offset -= BLOCK_SIZE; - } while (offset >= 0); - SizedOpT::store(dst, head_value); - } - - template - static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) { - size_t offset = 0; - do { - SizedOpT::set(offsetAddrMultiplesOf(dst, offset), value); - offset += BLOCK_SIZE; - } while (offset < runtime_size - BLOCK_SIZE); - Tail::set(dst, value, runtime_size); - } - - template - static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2, - size_t runtime_size) { - size_t offset = 0; - do { - if (uint64_t res = SizedOpT::isDifferent( - offsetAddrMultiplesOf(src1, offset), - offsetAddrMultiplesOf(src2, offset))) - return res; - offset += BLOCK_SIZE; - } while (offset < runtime_size - BLOCK_SIZE); - return Tail::isDifferent(src1, src2, runtime_size); - } - - template - static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2, - size_t runtime_size) { - size_t offset = 0; - do { - if (int32_t res = SizedOpT::threeWayCmp( - offsetAddrMultiplesOf(src1, offset), - offsetAddrMultiplesOf(src2, offset))) - return res; - offset += BLOCK_SIZE; - } while (offset < runtime_size - BLOCK_SIZE); - return Tail::threeWayCmp(src1, src2, runtime_size); - } -}; - -// Aligns using a statically-sized operation, then calls the subsequent NextT -// operation. -// -// e.g. A 16-byte Destination Aligned 32-byte Loop Copy can be written as: -// Align<_16, Arg::Dst>::Then>::copy(dst, src, runtime_size); -enum class Arg { _1, _2, Dst = _1, Src = _2, Lhs = _1, Rhs = _2 }; -template struct Align { - static_assert(SizedOpT::IS_FIXED_SIZE); - - template struct Then { - static_assert(NextT::IS_RUNTIME_SIZE); - - template - static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) { - SizedOpT::copy(dst, src); - auto aligned = align(dst, src, runtime_size); - NextT::copy(aligned.arg1, aligned.arg2, aligned.size); - } - - // Move forward suitable when dst < src. The alignment is performed with - // an HeadTail operation of size ∈ [Alignment, 2 x Alignment]. - // - // e.g. Moving two bytes and making sure src is then aligned. - // [ | | | | ] - // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_] - // [____LLLLLLLL_____________________] - // [___________LLLLLLLL______________] - // [_SSSSSSSS________________________] - // [________SSSSSSSS_________________] - // - // e.g. Moving two bytes and making sure dst is then aligned. - // [ | | | | ] - // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_] - // [____LLLLLLLL_____________________] - // [______LLLLLLLL___________________] - // [_SSSSSSSS________________________] - // [___SSSSSSSS______________________] - template - static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) { - auto aligned_after_begin = align(dst, src, runtime_size); - // We move pointers forward by Size so we can perform HeadTail. - auto aligned = aligned_after_begin.stepForward(); - HeadTail::move(dst, src, runtime_size - aligned.size); - NextT::move(aligned.arg1, aligned.arg2, aligned.size); - } - - // Move backward suitable when dst > src. The alignment is performed with - // an HeadTail operation of size ∈ [Alignment, 2 x Alignment]. - // - // e.g. Moving two bytes backward and making sure src is then aligned. - // [ | | | | ] - // [____XXXXXXXXXXXXXXXXXXXXXXXX_____] - // [ _________________LLLLLLLL_______] - // [ ___________________LLLLLLLL_____] - // [____________________SSSSSSSS_____] - // [______________________SSSSSSSS___] - // - // e.g. Moving two bytes and making sure dst is then aligned. - // [ | | | | ] - // [____XXXXXXXXXXXXXXXXXXXXXXXX_____] - // [ _______________LLLLLLLL_________] - // [ ___________________LLLLLLLL_____] - // [__________________SSSSSSSS_______] - // [______________________SSSSSSSS___] - template - static inline void move_backward(DstAddrT dst, SrcAddrT src, - size_t runtime_size) { - const auto dst_end = offsetAddrAssumeAligned<1>(dst, runtime_size); - const auto src_end = offsetAddrAssumeAligned<1>(src, runtime_size); - auto aligned_after_end = align(dst_end, src_end, 0); - // We move pointers back by 2 x Size so we can perform HeadTail. - auto aligned = aligned_after_end.stepBack().stepBack(); - HeadTail::move(aligned.arg1, aligned.arg2, aligned.size); - NextT::move_backward(dst, src, runtime_size - aligned.size); - } - - template - static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) { - SizedOpT::set(dst, value); - DstAddrT _(nullptr); - auto aligned = align(dst, _, runtime_size); - NextT::set(aligned.arg1, value, aligned.size); - } - - template - static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2, - size_t runtime_size) { - if (const uint64_t res = SizedOpT::isDifferent(src1, src2)) - return res; - auto aligned = align(src1, src2, runtime_size); - return NextT::isDifferent(aligned.arg1, aligned.arg2, aligned.size); - } - - template - static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2, - size_t runtime_size) { - if (const int32_t res = SizedOpT::threeWayCmp(src1, src2)) - return res; - auto aligned = align(src1, src2, runtime_size); - return NextT::threeWayCmp(aligned.arg1, aligned.arg2, aligned.size); - } - }; - -private: - static constexpr size_t ALIGN_OP_SIZE = SizedOpT::SIZE; - static_assert(ALIGN_OP_SIZE > 1); - - template struct Aligned { - Arg1AddrT arg1; - Arg2AddrT arg2; - size_t size; - - Aligned stepForward() const { - return Aligned{offsetAddrMultiplesOf(arg1, ALIGN_OP_SIZE), - offsetAddrMultiplesOf(arg2, ALIGN_OP_SIZE), - size - ALIGN_OP_SIZE}; - } - - Aligned stepBack() const { - return Aligned{offsetAddrMultiplesOf(arg1, -ALIGN_OP_SIZE), - offsetAddrMultiplesOf(arg2, -ALIGN_OP_SIZE), - size + ALIGN_OP_SIZE}; - } - }; - - template - static auto makeAligned(Arg1AddrT arg1, Arg2AddrT arg2, size_t size) { - return Aligned{arg1, arg2, size}; - } - - template - static auto align(Arg1AddrT arg1, Arg2AddrT arg2, size_t runtime_size) { - static_assert(IsAddressType::value); - static_assert(IsAddressType::value); - if constexpr (AlignOn == Arg::_1) { - auto offset = offset_to_next_aligned(arg1.ptr_); - return makeAligned(offsetAddrAssumeAligned(arg1, offset), - offsetAddrAssumeAligned<1>(arg2, offset), - runtime_size - offset); - } else if constexpr (AlignOn == Arg::_2) { - auto offset = offset_to_next_aligned(arg2.ptr_); - return makeAligned(offsetAddrAssumeAligned<1>(arg1, offset), - offsetAddrAssumeAligned(arg2, offset), - runtime_size - offset); - } else { - DeferredStaticAssert("AlignOn must be either Arg::_1 or Arg::_2"); - } - } -}; - -} // namespace __llvm_libc - -#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ALGORITHM_H diff --git a/libc/src/string/memory_utils/backend_aarch64.h b/libc/src/string/memory_utils/backend_aarch64.h deleted file mode 100644 index 8077a098ff9c08..00000000000000 --- a/libc/src/string/memory_utils/backend_aarch64.h +++ /dev/null @@ -1,71 +0,0 @@ -//===-- Elementary operations for aarch64 ---------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_AARCH64_H -#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_AARCH64_H - -#if !defined(LLVM_LIBC_ARCH_AARCH64) -#include "src/string/memory_utils/backend_scalar.h" - -#ifdef __ARM_NEON -#include -#endif - -namespace __llvm_libc { - -struct Aarch64Backend : public Scalar64BitBackend { - static constexpr bool IS_BACKEND_TYPE = true; - - template , bool> = true> - static inline T load(const T *src) { - return Scalar64BitBackend::template load(src); - } -}; - -// Implementation of the SizedOp abstraction for the set operation. -struct Zva64 { - static constexpr size_t SIZE = 64; - - template - static inline void set(DstAddrT dst, ubyte value) { -#if __SIZEOF_POINTER__ == 4 - asm("dc zva, %w[dst]" : : [dst] "r"(dst) : "memory"); -#else - asm("dc zva, %[dst]" : : [dst] "r"(dst) : "memory"); -#endif - } -}; - -inline static bool hasZva() { - uint64_t zva_val; - asm("mrs %[zva_val], dczid_el0" : [zva_val] "=r"(zva_val)); - // DC ZVA is permitted if DZP, bit [4] is zero. - // BS, bits [3:0] is log2 of the block size in words. - // So the next line checks whether the instruction is permitted and block size - // is 16 words (i.e. 64 bytes). - return (zva_val & 0b11111) == 0b00100; -} - -namespace aarch64 { -using _1 = SizedOp; -using _2 = SizedOp; -using _3 = SizedOp; -using _4 = SizedOp; -using _8 = SizedOp; -using _16 = SizedOp; -using _32 = SizedOp; -using _64 = SizedOp; -using _128 = SizedOp; -} // namespace aarch64 - -} // namespace __llvm_libc - -#endif // LLVM_LIBC_ARCH_AARCH64 - -#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_AARCH64_H diff --git a/libc/src/string/memory_utils/backend_scalar.h b/libc/src/string/memory_utils/backend_scalar.h deleted file mode 100644 index dba36b159baa6e..00000000000000 --- a/libc/src/string/memory_utils/backend_scalar.h +++ /dev/null @@ -1,104 +0,0 @@ -//===-- Elementary operations for native scalar types ---------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_SCALAR_H -#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_SCALAR_H - -#include "src/__support/CPP/type_traits.h" // ConditionalType, enable_if_t -#include "src/__support/endian.h" - -namespace __llvm_libc { - -struct Scalar64BitBackend { - static constexpr bool IS_BACKEND_TYPE = true; - - template - static constexpr bool IsScalarType = - cpp::is_same_v || cpp::is_same_v || - cpp::is_same_v || cpp::is_same_v; - - template - static inline T load(const T *src) { - static_assert(IsScalarType); - static_assert(TS == Temporality::TEMPORAL, - "Scalar load does not support non-temporal access"); - return *src; - } - - template - static inline void store(T *dst, T value) { - static_assert(IsScalarType); - static_assert(TS == Temporality::TEMPORAL, - "Scalar store does not support non-temporal access"); - *dst = value; - } - - template static inline T splat(ubyte value) { - static_assert(IsScalarType); - return (T(~0ULL) / T(0xFF)) * T(value); - } - - template static inline uint64_t notEquals(T v1, T v2) { - static_assert(IsScalarType); - return v1 ^ v2; - } - - template static inline int32_t threeWayCmp(T v1, T v2) { - DeferredStaticAssert("not implemented"); - } - - // Returns the type to use to consume Size bytes. - template - using getNextType = cpp::conditional_t< - Size >= 8, uint64_t, - cpp::conditional_t= 4, uint32_t, - cpp::conditional_t= 2, uint16_t, uint8_t>>>; -}; - -template <> -int32_t inline Scalar64BitBackend::threeWayCmp(uint8_t a, uint8_t b) { - const int16_t la = Endian::to_big_endian(a); - const int16_t lb = Endian::to_big_endian(b); - return la - lb; -} -template <> -int32_t inline Scalar64BitBackend::threeWayCmp(uint16_t a, - uint16_t b) { - const int32_t la = Endian::to_big_endian(a); - const int32_t lb = Endian::to_big_endian(b); - return la - lb; -} -template <> -int32_t inline Scalar64BitBackend::threeWayCmp(uint32_t a, - uint32_t b) { - const uint32_t la = Endian::to_big_endian(a); - const uint32_t lb = Endian::to_big_endian(b); - return la > lb ? 1 : la < lb ? -1 : 0; -} -template <> -int32_t inline Scalar64BitBackend::threeWayCmp(uint64_t a, - uint64_t b) { - const uint64_t la = Endian::to_big_endian(a); - const uint64_t lb = Endian::to_big_endian(b); - return la > lb ? 1 : la < lb ? -1 : 0; -} - -namespace scalar { -using _1 = SizedOp; -using _2 = SizedOp; -using _3 = SizedOp; -using _4 = SizedOp; -using _8 = SizedOp; -using _16 = SizedOp; -using _32 = SizedOp; -using _64 = SizedOp; -using _128 = SizedOp; -} // namespace scalar - -} // namespace __llvm_libc - -#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_SCALAR_H diff --git a/libc/src/string/memory_utils/backend_x86.h b/libc/src/string/memory_utils/backend_x86.h deleted file mode 100644 index cfdfcdf90131c3..00000000000000 --- a/libc/src/string/memory_utils/backend_x86.h +++ /dev/null @@ -1,219 +0,0 @@ -//===-- Elementary operations for x86 -------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_X86_H -#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_X86_H - -#if defined(LLVM_LIBC_ARCH_X86) -#include "src/__support/CPP/type_traits.h" // ConditionalType, enable_if_t -#include "src/string/memory_utils/backend_scalar.h" - -#ifdef __SSE2__ -#include -#endif // __SSE2__ - -#if defined(__SSE2__) -#define HAS_M128 true -#else -#define HAS_M128 false -#endif - -#if defined(__AVX2__) -#define HAS_M256 true -#else -#define HAS_M256 false -#endif - -#if defined(__AVX512F__) and defined(__AVX512BW__) -#define HAS_M512 true -#else -#define HAS_M512 false -#endif - -namespace __llvm_libc { -struct X86Backend : public Scalar64BitBackend { - static constexpr bool IS_BACKEND_TYPE = true; - - // Scalar types use base class implementations. - template , bool> = true> - static inline T load(const T *src) { - return Scalar64BitBackend::template load(src); - } - - // Scalar types use base class implementations. - template , bool> = true> - static inline void store(T *dst, T value) { - Scalar64BitBackend::template store(dst, value); - } - - // Scalar types use base class implementations. - template , bool> = true> - static inline uint64_t notEquals(T v1, T v2) { - return Scalar64BitBackend::template notEquals(v1, v2); - } - - // Scalar types use base class implementations. - template , bool> = true> - static inline T splat(ubyte value) { - return Scalar64BitBackend::template splat(value); - } - - // Scalar types use base class implementations. - template , bool> = true> - static inline int32_t threeWayCmp(T v1, T v2) { - return Scalar64BitBackend::template threeWayCmp(v1, v2); - } - - // X86 types are specialized below. - template , bool> = true> - static inline T load(const T *src); - - // X86 types are specialized below. - template , bool> = true> - static inline void store(T *dst, T value); - - // X86 types are specialized below. - template , bool> = true> - static inline T splat(ubyte value); - - // X86 types are specialized below. - template , bool> = true> - static inline uint64_t notEquals(T v1, T v2); - - template , bool> = true> - static inline int32_t threeWayCmp(T v1, T v2) { - return char_diff(reinterpret_cast(&v1), - reinterpret_cast(&v2), notEquals(v1, v2)); - } - - // Returns the type to use to consume Size bytes. - template - using getNextType = cpp::conditional_t< - (HAS_M512 && Size >= 64), __m512i, - cpp::conditional_t< - (HAS_M256 && Size >= 32), __m256i, - cpp::conditional_t<(HAS_M128 && Size >= 16), __m128i, - Scalar64BitBackend::getNextType>>>; - -private: - static inline int32_t char_diff(const char *a, const char *b, uint64_t mask) { - const size_t diff_index = mask == 0 ? 0 : __builtin_ctzll(mask); - const int16_t ca = (unsigned char)a[diff_index]; - const int16_t cb = (unsigned char)b[diff_index]; - return ca - cb; - } -}; - -static inline void repmovsb(void *dst, const void *src, size_t runtime_size) { - asm volatile("rep movsb" - : "+D"(dst), "+S"(src), "+c"(runtime_size) - : - : "memory"); -} - -#define SPECIALIZE_LOAD(T, OS, AS, INTRISIC) \ - template <> inline T X86Backend::load(const T *src) { \ - return INTRISIC(const_cast(src)); \ - } -#define SPECIALIZE_STORE(T, OS, AS, INTRISIC) \ - template <> inline void X86Backend::store(T * dst, T value) { \ - INTRISIC(dst, value); \ - } - -#if HAS_M128 -SPECIALIZE_LOAD(__m128i, Temporality::TEMPORAL, Aligned::YES, _mm_load_si128) -SPECIALIZE_LOAD(__m128i, Temporality::TEMPORAL, Aligned::NO, _mm_loadu_si128) -SPECIALIZE_LOAD(__m128i, Temporality::NON_TEMPORAL, Aligned::YES, - _mm_stream_load_si128) -// X86 non-temporal load needs aligned access -SPECIALIZE_STORE(__m128i, Temporality::TEMPORAL, Aligned::YES, _mm_store_si128) -SPECIALIZE_STORE(__m128i, Temporality::TEMPORAL, Aligned::NO, _mm_storeu_si128) -SPECIALIZE_STORE(__m128i, Temporality::NON_TEMPORAL, Aligned::YES, - _mm_stream_si128) -// X86 non-temporal store needs aligned access -template <> inline __m128i X86Backend::splat<__m128i>(ubyte value) { - return _mm_set1_epi8(__builtin_bit_cast(char, value)); -} -template <> -inline uint64_t X86Backend::notEquals<__m128i>(__m128i a, __m128i b) { - using T = char __attribute__((__vector_size__(16))); - return _mm_movemask_epi8(T(a) != T(b)); -} -#endif // HAS_M128 - -#if HAS_M256 -SPECIALIZE_LOAD(__m256i, Temporality::TEMPORAL, Aligned::YES, _mm256_load_si256) -SPECIALIZE_LOAD(__m256i, Temporality::TEMPORAL, Aligned::NO, _mm256_loadu_si256) -SPECIALIZE_LOAD(__m256i, Temporality::NON_TEMPORAL, Aligned::YES, - _mm256_stream_load_si256) -// X86 non-temporal load needs aligned access -SPECIALIZE_STORE(__m256i, Temporality::TEMPORAL, Aligned::YES, - _mm256_store_si256) -SPECIALIZE_STORE(__m256i, Temporality::TEMPORAL, Aligned::NO, - _mm256_storeu_si256) -SPECIALIZE_STORE(__m256i, Temporality::NON_TEMPORAL, Aligned::YES, - _mm256_stream_si256) -// X86 non-temporal store needs aligned access -template <> inline __m256i X86Backend::splat<__m256i>(ubyte value) { - return _mm256_set1_epi8(__builtin_bit_cast(char, value)); -} -template <> -inline uint64_t X86Backend::notEquals<__m256i>(__m256i a, __m256i b) { - using T = char __attribute__((__vector_size__(32))); - return _mm256_movemask_epi8(T(a) != T(b)); -} -#endif // HAS_M256 - -#if HAS_M512 -SPECIALIZE_LOAD(__m512i, Temporality::TEMPORAL, Aligned::YES, _mm512_load_si512) -SPECIALIZE_LOAD(__m512i, Temporality::TEMPORAL, Aligned::NO, _mm512_loadu_si512) -SPECIALIZE_LOAD(__m512i, Temporality::NON_TEMPORAL, Aligned::YES, - _mm512_stream_load_si512) -// X86 non-temporal load needs aligned access -SPECIALIZE_STORE(__m512i, Temporality::TEMPORAL, Aligned::YES, - _mm512_store_si512) -SPECIALIZE_STORE(__m512i, Temporality::TEMPORAL, Aligned::NO, - _mm512_storeu_si512) -SPECIALIZE_STORE(__m512i, Temporality::NON_TEMPORAL, Aligned::YES, - _mm512_stream_si512) -// X86 non-temporal store needs aligned access -template <> inline __m512i X86Backend::splat<__m512i>(ubyte value) { - return _mm512_broadcastb_epi8(_mm_set1_epi8(__builtin_bit_cast(char, value))); -} -template <> -inline uint64_t X86Backend::notEquals<__m512i>(__m512i a, __m512i b) { - return _mm512_cmpneq_epi8_mask(a, b); -} -#endif // HAS_M512 - -namespace x86 { -using _1 = SizedOp; -using _2 = SizedOp; -using _3 = SizedOp; -using _4 = SizedOp; -using _8 = SizedOp; -using _16 = SizedOp; -using _32 = SizedOp; -using _64 = SizedOp; -using _128 = SizedOp; -} // namespace x86 - -} // namespace __llvm_libc - -#endif // defined(LLVM_LIBC_ARCH_X86) - -#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_X86_H diff --git a/libc/src/string/memory_utils/backends.h b/libc/src/string/memory_utils/backends.h deleted file mode 100644 index 6d241fa5eb2898..00000000000000 --- a/libc/src/string/memory_utils/backends.h +++ /dev/null @@ -1,60 +0,0 @@ -//===-- Elementary operations to compose memory primitives ----------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the concept of a Backend. -// It constitutes the lowest level of the framework and is akin to instruction -// selection. It defines how to implement aligned/unaligned, -// temporal/non-temporal native loads and stores for a particular architecture -// as well as efficient ways to fill and compare types. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKENDS_H -#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKENDS_H - -#include "src/string/memory_utils/address.h" // Temporality, Aligned -#include "src/string/memory_utils/sized_op.h" // SizedOp -#include // size_t -#include // uint##_t - -namespace __llvm_libc { - -// Backends must implement the following interface. -struct NoBackend { - static constexpr bool IS_BACKEND_TYPE = true; - - // Loads a T from `src` honoring Temporality and Alignment. - template static T load(const T *src); - - // Stores a T to `dst` honoring Temporality and Alignment. - template - static void store(T *dst, T value); - - // Returns a T filled with `value` bytes. - template static T splat(ubyte value); - - // Returns zero iff v1 == v2. - template static uint64_t notEquals(T v1, T v2); - - // Returns zero iff v1 == v2, a negative number if v1 < v2 and a positive - // number otherwise. - template static int32_t threeWayCmp(T v1, T v2); - - // Returns the type to use to consume Size bytes. - // If no type handles Size bytes at once - template using getNextType = void; -}; - -} // namespace __llvm_libc - -// We inline all backend implementations here to simplify the build system. -// Each file need to be guarded with the appropriate LLVM_LIBC_ARCH_XXX ifdef. -#include "src/string/memory_utils/backend_aarch64.h" -#include "src/string/memory_utils/backend_scalar.h" -#include "src/string/memory_utils/backend_x86.h" - -#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKENDS_H diff --git a/libc/src/string/memory_utils/bzero_implementations.h b/libc/src/string/memory_utils/bzero_implementations.h new file mode 100644 index 00000000000000..168fdd7e531d25 --- /dev/null +++ b/libc/src/string/memory_utils/bzero_implementations.h @@ -0,0 +1,24 @@ +//===-- Implementation of bzero -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BZERO_IMPLEMENTATIONS_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BZERO_IMPLEMENTATIONS_H + +#include "src/string/memory_utils/memset_implementations.h" + +#include // size_t + +namespace __llvm_libc { + +inline static void inline_bzero(char *dst, size_t count) { + inline_memset(dst, 0, count); +} + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BZERO_IMPLEMENTATIONS_H diff --git a/libc/src/string/memory_utils/sized_op.h b/libc/src/string/memory_utils/sized_op.h deleted file mode 100644 index 2bca50d6c56d1f..00000000000000 --- a/libc/src/string/memory_utils/sized_op.h +++ /dev/null @@ -1,180 +0,0 @@ -//===-- Sized Operations --------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the SizedOp struct that serves as the middle end of the -// framework. It implements sized memory operations by breaking them down into -// simpler types whose availability is described in the Backend. It also -// provides a way to load and store sized chunks of memory (necessary for the -// move operation). SizedOp are the building blocks of higher order algorithms -// like HeadTail, Align or Loop. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H -#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H - -#include // size_t - -#ifndef LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE -#define LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE \ - __has_builtin(__builtin_memcpy_inline) -#endif // LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE - -#ifndef LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE -#define LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE \ - __has_builtin(__builtin_memset_inline) -#endif // LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE - -namespace __llvm_libc { - -template struct SizedOp { - static constexpr size_t SIZE = Size; - // Define instantiations of SizedOp as a fixed size operation. - // i.e. an operation that is composable by types in algorithm.h - static constexpr bool IS_FIXED_SIZE = true; - -private: - static_assert(Backend::IS_BACKEND_TYPE); - static_assert(SIZE > 0); - using type = typename Backend::template getNextType; - static constexpr size_t TYPE_SIZE = sizeof(type); - static_assert(SIZE >= TYPE_SIZE); - static constexpr size_t NEXT_SIZE = Size - TYPE_SIZE; - using NextBlock = SizedOp; - - // Returns whether we can use an aligned operations. - // This is possible because the address type carries known compile-time - // alignment informations. - template static constexpr Aligned isAligned() { - static_assert(IsAddressType::value); - return AddrT::ALIGNMENT > 1 && AddrT::ALIGNMENT >= sizeof(T) ? Aligned::YES - : Aligned::NO; - } - - // Loads a value of the current `type` from `src`. - // This function is responsible for extracting Temporality and Alignment from - // the Address type. - template static inline auto nativeLoad(SrcAddrT src) { - static_assert(IsAddressType::value && SrcAddrT::IS_READ); - constexpr auto AS = isAligned(); - constexpr auto TS = SrcAddrT::TEMPORALITY; - return Backend::template load(as(src)); - } - - // Stores a value of the current `type` to `dst`. - // This function is responsible for extracting Temporality and Alignment from - // the Address type. - template - static inline void nativeStore(type value, DstAddrT dst) { - static_assert(IsAddressType::value && DstAddrT::IS_WRITE); - constexpr auto AS = isAligned(); - constexpr auto TS = DstAddrT::TEMPORALITY; - return Backend::template store(as(dst), value); - } - - // A well aligned POD structure to store Size bytes. - // This is used to implement the move operations. - struct Value { - alignas(alignof(type)) ubyte payload[Size]; - }; - -public: - template - static inline void copy(DstAddrT dst, SrcAddrT src) { - static_assert(IsAddressType::value && DstAddrT::IS_WRITE); - static_assert(IsAddressType::value && SrcAddrT::IS_READ); - if constexpr (LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE && - DstAddrT::TEMPORALITY == Temporality::TEMPORAL && - SrcAddrT::TEMPORALITY == Temporality::TEMPORAL) { - // delegate optimized copy to compiler. - __builtin_memcpy_inline(dst.ptr(), src.ptr(), Size); - return; - } - nativeStore(nativeLoad(src), dst); - if constexpr (NEXT_SIZE > 0) - NextBlock::copy(offsetAddr(dst), offsetAddr(src)); - } - - template - static inline void move(DstAddrT dst, SrcAddrT src) { - const auto payload = nativeLoad(src); - if constexpr (NEXT_SIZE > 0) - NextBlock::move(offsetAddr(dst), offsetAddr(src)); - nativeStore(payload, dst); - } - - template - static inline void set(DstAddrT dst, ubyte value) { - if constexpr (LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE && - DstAddrT::TEMPORALITY == Temporality::TEMPORAL) { - // delegate optimized set to compiler. - __builtin_memset_inline(dst.ptr(), static_cast(value), Size); - return; - } - nativeStore(Backend::template splat(value), dst); - if constexpr (NEXT_SIZE > 0) - NextBlock::set(offsetAddr(dst), value); - } - - template - static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2) { - const uint64_t current = - Backend::template notEquals(nativeLoad(src1), nativeLoad(src2)); - if constexpr (NEXT_SIZE > 0) { - // In the case where we cannot handle Size with single operation (e.g. - // Size == 3) we can either return early if current is non zero or - // aggregate all the operations through the bitwise or operator. - // We chose the later to reduce branching. - return current | (NextBlock::isDifferent(offsetAddr(src1), - offsetAddr(src2))); - } else { - return current; - } - } - - template - static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2) { - const auto a = nativeLoad(src1); - const auto b = nativeLoad(src2); - // If we cannot handle Size as a single operation we have two choices: - // - Either use Backend's threeWayCmp directly and return it is non - // zero. - // - // if (int32_t res = Backend::template threeWayCmp(a, b)) - // return res; - // - // - Or use Backend's notEquals first and use threeWayCmp only if - // different, the assumption here is that notEquals is faster than - // threeWayCmp and that we can save cycles when the Size needs to be - // decomposed in many sizes (e.g. Size == 7 => 4 + 2 + 1) - // - // if (Backend::template notEquals(a, b)) - // return Backend::template threeWayCmp(a, b); - // - // We chose the former to reduce code bloat and branching. - if (int32_t res = Backend::template threeWayCmp(a, b)) - return res; - if constexpr (NEXT_SIZE > 0) - return NextBlock::threeWayCmp(offsetAddr(src1), - offsetAddr(src2)); - return 0; - } - - template static Value load(SrcAddrT src) { - Value output; - copy(DstAddr(output.payload), src); - return output; - } - - template static void store(DstAddrT dst, Value value) { - copy(dst, SrcAddr(value.payload)); - } -}; - -} // namespace __llvm_libc - -#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H diff --git a/libc/src/string/stpncpy.cpp b/libc/src/string/stpncpy.cpp index 25e916251bad89..cc4d89d8e2bbcf 100644 --- a/libc/src/string/stpncpy.cpp +++ b/libc/src/string/stpncpy.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "src/string/stpncpy.h" -#include "src/string/memory_utils/memset_implementations.h" +#include "src/string/memory_utils/bzero_implementations.h" #include "src/__support/common.h" @@ -22,7 +22,7 @@ LLVM_LIBC_FUNCTION(char *, stpncpy, dest[i] = src[i]; // When n>strlen(src), n-strlen(src) \0 are appended. if (n > i) - inline_memset(dest + i, 0, n - i); + inline_bzero(dest + i, n - i); return dest + i; } diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h index 708475e4e97f58..b1b434dbf17235 100644 --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -12,7 +12,7 @@ #include "src/__support/CPP/bitset.h" #include "src/__support/common.h" #include "src/string/memory_utils/memcpy_implementations.h" -#include "src/string/memory_utils/memset_implementations.h" +#include "src/string/memory_utils/bzero_implementations.h" #include // size_t namespace __llvm_libc { @@ -94,7 +94,7 @@ static inline size_t strlcpy(char *__restrict dst, const char *__restrict src, return len; size_t n = len < size - 1 ? len : size - 1; inline_memcpy(dst, src, n); - inline_memset(dst + n, 0, size - n); + inline_bzero(dst + n, size - n); return len; } diff --git a/libc/test/src/string/memory_utils/CMakeLists.txt b/libc/test/src/string/memory_utils/CMakeLists.txt index 4d8e45d8cdce55..8f926273de5d57 100644 --- a/libc/test/src/string/memory_utils/CMakeLists.txt +++ b/libc/test/src/string/memory_utils/CMakeLists.txt @@ -3,8 +3,6 @@ add_libc_unittest( SUITE libc_string_unittests SRCS - address_test.cpp - backend_test.cpp elements_test.cpp memory_access_test.cpp utils_test.cpp @@ -17,19 +15,3 @@ add_libc_unittest( libc.src.__support.CPP.array libc.src.__support.CPP.span ) - -if(NOT LLVM_LIBC_FULL_BUILD) -# Disabling this unittest in fullbuild mode as #include is pulling an -# incomplete pthread implementation from llvm-libc. -add_libc_unittest( - algorithm_test - SUITE - libc_string_unittests - SRCS - algorithm_test.cpp - DEPENDS - libc.src.string.memory_utils.memory_utils - libc.src.__support.CPP.array - libc.src.__support.CPP.span -) -endif() diff --git a/libc/test/src/string/memory_utils/address_test.cpp b/libc/test/src/string/memory_utils/address_test.cpp deleted file mode 100644 index fe9361ba573e53..00000000000000 --- a/libc/test/src/string/memory_utils/address_test.cpp +++ /dev/null @@ -1,80 +0,0 @@ -#include "utils/UnitTest/Test.h" -#include - -namespace __llvm_libc { - -TEST(LlvmLibcAddress, AliasAreAddresses) { - ASSERT_TRUE(IsAddressType>::value); - ASSERT_TRUE(IsAddressType>::value); - ASSERT_TRUE(IsAddressType>::value); - ASSERT_TRUE(IsAddressType>::value); -} - -TEST(LlvmLibcAddress, AliasHaveRightPermissions) { - ASSERT_TRUE(SrcAddr<1>::IS_READ); - ASSERT_TRUE(NtSrcAddr<1>::IS_READ); - ASSERT_TRUE(DstAddr<1>::IS_WRITE); - ASSERT_TRUE(NtDstAddr<1>::IS_WRITE); -} - -TEST(LlvmLibcAddress, AliasHaveRightSemantic) { - ASSERT_EQ(SrcAddr<1>::TEMPORALITY, Temporality::TEMPORAL); - ASSERT_EQ(DstAddr<1>::TEMPORALITY, Temporality::TEMPORAL); - ASSERT_EQ(NtSrcAddr<1>::TEMPORALITY, Temporality::NON_TEMPORAL); - ASSERT_EQ(NtDstAddr<1>::TEMPORALITY, Temporality::NON_TEMPORAL); -} - -TEST(LlvmLibcAddress, AliasHaveRightAlignment) { - ASSERT_EQ(SrcAddr<1>::ALIGNMENT, size_t(1)); - ASSERT_EQ(SrcAddr<4>::ALIGNMENT, size_t(4)); -} - -TEST(LlvmLibcAddress, NarrowAlignment) { - // Address 8-byte aligned, offset by 8. - ASSERT_EQ(offsetAddr<8>(SrcAddr<8>(nullptr)).ALIGNMENT, size_t(8)); - // Address 16-byte aligned, offset by 4. - ASSERT_EQ(offsetAddr<4>(SrcAddr<16>(nullptr)).ALIGNMENT, size_t(4)); - // Address 4-byte aligned, offset by 16. - ASSERT_EQ(offsetAddr<16>(SrcAddr<4>(nullptr)).ALIGNMENT, size_t(4)); - // Address 4-byte aligned, offset by 1. - ASSERT_EQ(offsetAddr<1>(SrcAddr<4>(nullptr)).ALIGNMENT, size_t(1)); - // Address 4-byte aligned, offset by 2. - ASSERT_EQ(offsetAddr<2>(SrcAddr<4>(nullptr)).ALIGNMENT, size_t(2)); - // Address 4-byte aligned, offset by 6. - ASSERT_EQ(offsetAddr<6>(SrcAddr<4>(nullptr)).ALIGNMENT, size_t(2)); - // Address 4-byte aligned, offset by 10. - ASSERT_EQ(offsetAddr<10>(SrcAddr<4>(nullptr)).ALIGNMENT, size_t(2)); - // Address 8-byte aligned, offset by 6. - ASSERT_EQ(offsetAddr<6>(SrcAddr<8>(nullptr)).ALIGNMENT, size_t(2)); -} - -TEST(LlvmLibcAddress, OffsetAddr) { - ubyte a; - SrcAddr<1> addr(&a); - ASSERT_EQ((const void *)offsetAddr<4>(addr).ptr(), (const void *)(&a + 4)); - ASSERT_EQ((const void *)offsetAddr<32>(addr).ptr(), (const void *)(&a + 32)); -} - -TEST(LlvmLibcAddress, AssumeAligned) { - SrcAddr<16> addr(nullptr); - ASSERT_EQ(offsetAddrAssumeAligned<8>(addr, 0).ALIGNMENT, size_t(8)); - ASSERT_EQ(offsetAddrAssumeAligned<1>(addr, 0).ALIGNMENT, size_t(1)); - ASSERT_EQ(offsetAddrMultiplesOf<4>(addr, 0).ALIGNMENT, size_t(4)); - ASSERT_EQ(offsetAddrMultiplesOf<32>(addr, 0).ALIGNMENT, size_t(16)); -} - -TEST(LlvmLibcAddress, offsetAddrAssumeAligned) { - ubyte a; - SrcAddr<1> addr(&a); - ASSERT_EQ((const void *)offsetAddrAssumeAligned<1>(addr, 17).ptr(), - (const void *)(&a + 17)); -} - -TEST(LlvmLibcAddress, offsetAddrMultiplesOf) { - ubyte a; - SrcAddr<1> addr(&a); - ASSERT_EQ((const void *)offsetAddrMultiplesOf<4>(addr, 16).ptr(), - (const void *)(&a + 16)); -} - -} // namespace __llvm_libc diff --git a/libc/test/src/string/memory_utils/algorithm_test.cpp b/libc/test/src/string/memory_utils/algorithm_test.cpp deleted file mode 100644 index d973fbcd5c19a9..00000000000000 --- a/libc/test/src/string/memory_utils/algorithm_test.cpp +++ /dev/null @@ -1,566 +0,0 @@ -#define LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE 0 -#define LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE 0 - -#include "utils/UnitTest/Test.h" -#include -#include -#include - -#include - -namespace __llvm_libc { - -struct alignas(64) Buffer : cpp::array { - bool contains(const char *ptr) const { - return ptr >= data() && ptr < (data() + size()); - } - size_t getOffset(const char *ptr) const { return ptr - data(); } - void fill(char c) { - for (auto itr = begin(); itr != end(); ++itr) - *itr = c; - } -}; - -static Buffer buffer1; -static Buffer buffer2; -static std::ostringstream LOG; - -struct TestBackend { - static constexpr bool IS_BACKEND_TYPE = true; - - template static void log(const char *Action, const char *ptr) { - LOG << Action << "<" << sizeof(T) << "> "; - if (buffer1.contains(ptr)) - LOG << "a[" << buffer1.getOffset(ptr) << "]"; - else if (buffer2.contains(ptr)) - LOG << "b[" << buffer2.getOffset(ptr) << "]"; - LOG << "\n"; - } - - template - static T load(const T *src) { - log((AS == Aligned::YES ? "LdA" : "LdU"), - reinterpret_cast(src)); - return Scalar64BitBackend::load(src); - } - - template - static void store(T *dst, T value) { - log((AS == Aligned::YES ? "StA" : "StU"), - reinterpret_cast(dst)); - Scalar64BitBackend::store(dst, value); - } - - template static inline T splat(ubyte value) { - LOG << "Splat<" << sizeof(T) << "> " << (unsigned)value << '\n'; - return Scalar64BitBackend::splat(value); - } - - template static inline uint64_t notEquals(T v1, T v2) { - LOG << "Neq<" << sizeof(T) << ">\n"; - return Scalar64BitBackend::notEquals(v1, v2); - } - - template static inline int32_t threeWayCmp(T v1, T v2) { - LOG << "Diff<" << sizeof(T) << ">\n"; - return Scalar64BitBackend::threeWayCmp(v1, v2); - } - - template - using getNextType = Scalar64BitBackend::getNextType; -}; - -struct LlvmLibcAlgorithm : public testing::Test { - void SetUp() override { - LOG = std::ostringstream(); - LOG << '\n'; - } - - void fillEqual() { - buffer1.fill('a'); - buffer2.fill('a'); - } - - void fillDifferent() { - buffer1.fill('a'); - buffer2.fill('b'); - } - - const char *getTrace() { - trace_ = LOG.str(); - return trace_.c_str(); - } - - const char *stripComments(const char *expected) { - expected_.clear(); - std::stringstream ss(expected); - std::string line; - while (std::getline(ss, line, '\n')) { - const auto pos = line.find('#'); - if (pos == std::string::npos) { - expected_ += line; - } else { - auto log = line.substr(0, pos); - while (!log.empty() && std::isspace(log.back())) - log.pop_back(); - expected_ += log; - } - expected_ += '\n'; - } - return expected_.c_str(); - } - - template SrcAddr buf1(size_t offset = 0) const { - return buffer1.data() + offset; - } - template SrcAddr buf2(size_t offset = 0) const { - return buffer2.data() + offset; - } - template DstAddr dst(size_t offset = 0) const { - return buffer1.data() + offset; - } - template SrcAddr src(size_t offset = 0) const { - return buffer2.data() + offset; - } - -private: - std::string trace_; - std::string expected_; -}; - -using _8 = SizedOp; - -/////////////////////////////////////////////////////////////////////////////// -//// Testing fixed fized forward operations -/////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////// -// Copy - -TEST_F(LlvmLibcAlgorithm, copy_1) { - SizedOp::copy(dst(), src()); - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<1> b[0] -StU<1> a[0] -)")); -} - -TEST_F(LlvmLibcAlgorithm, copy_15) { - SizedOp::copy(dst(), src()); - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<8> b[0] -StU<8> a[0] -LdU<4> b[8] -StU<4> a[8] -LdU<2> b[12] -StU<2> a[12] -LdU<1> b[14] -StU<1> a[14] -)")); -} - -TEST_F(LlvmLibcAlgorithm, copy_16) { - SizedOp::copy(dst(), src()); - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<8> b[0] -StU<8> a[0] -LdU<8> b[8] -StU<8> a[8] -)")); -} - -/////////////////////////////////////////////////////////////////////////////// -// Move - -TEST_F(LlvmLibcAlgorithm, move_1) { - SizedOp::move(dst(), src()); - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<1> b[0] -StU<1> a[0] -)")); -} - -TEST_F(LlvmLibcAlgorithm, move_15) { - SizedOp::move(dst(), src()); - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<8> b[0] -LdU<4> b[8] -LdU<2> b[12] -LdU<1> b[14] -StU<1> a[14] -StU<2> a[12] -StU<4> a[8] -StU<8> a[0] -)")); -} - -TEST_F(LlvmLibcAlgorithm, move_16) { - SizedOp::move(dst(), src()); - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<8> b[0] -LdU<8> b[8] -StU<8> a[8] -StU<8> a[0] -)")); -} - -/////////////////////////////////////////////////////////////////////////////// -// set - -TEST_F(LlvmLibcAlgorithm, set_1) { - SizedOp::set(dst(), ubyte{42}); - EXPECT_STREQ(getTrace(), stripComments(R"( -Splat<1> 42 -StU<1> a[0] -)")); -} - -TEST_F(LlvmLibcAlgorithm, set_15) { - SizedOp::set(dst(), ubyte{42}); - EXPECT_STREQ(getTrace(), stripComments(R"( -Splat<8> 42 -StU<8> a[0] -Splat<4> 42 -StU<4> a[8] -Splat<2> 42 -StU<2> a[12] -Splat<1> 42 -StU<1> a[14] -)")); -} - -TEST_F(LlvmLibcAlgorithm, set_16) { - SizedOp::set(dst(), ubyte{42}); - EXPECT_STREQ(getTrace(), stripComments(R"( -Splat<8> 42 -StU<8> a[0] -Splat<8> 42 -StU<8> a[8] -)")); -} - -/////////////////////////////////////////////////////////////////////////////// -// different - -TEST_F(LlvmLibcAlgorithm, different_1) { - fillEqual(); - SizedOp::isDifferent(buf1(), buf2()); - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<1> a[0] -LdU<1> b[0] -Neq<1> -)")); -} - -TEST_F(LlvmLibcAlgorithm, different_15) { - fillEqual(); - SizedOp::isDifferent(buf1(), buf2()); - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<8> a[0] -LdU<8> b[0] -Neq<8> -LdU<4> a[8] -LdU<4> b[8] -Neq<4> -LdU<2> a[12] -LdU<2> b[12] -Neq<2> -LdU<1> a[14] -LdU<1> b[14] -Neq<1> -)")); -} - -TEST_F(LlvmLibcAlgorithm, different_15_no_shortcircuit) { - fillDifferent(); - SizedOp::isDifferent(buf1(), buf2()); - // If buffer compare isDifferent we continue to aggregate. - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<8> a[0] -LdU<8> b[0] -Neq<8> -LdU<4> a[8] -LdU<4> b[8] -Neq<4> -LdU<2> a[12] -LdU<2> b[12] -Neq<2> -LdU<1> a[14] -LdU<1> b[14] -Neq<1> -)")); -} - -TEST_F(LlvmLibcAlgorithm, different_16) { - fillEqual(); - SizedOp::isDifferent(buf1(), buf2()); - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<8> a[0] -LdU<8> b[0] -Neq<8> -LdU<8> a[8] -LdU<8> b[8] -Neq<8> -)")); -} - -/////////////////////////////////////////////////////////////////////////////// -// three_way_cmp - -TEST_F(LlvmLibcAlgorithm, three_way_cmp_eq_1) { - fillEqual(); - SizedOp::threeWayCmp(buf1(), buf2()); - // Buffer compare equal, returning 0 and no call to Diff. - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<1> a[0] -LdU<1> b[0] -Diff<1> -)")); -} - -TEST_F(LlvmLibcAlgorithm, three_way_cmp_eq_15) { - fillEqual(); - SizedOp::threeWayCmp(buf1(), buf2()); - // Buffer compare equal, returning 0 and no call to Diff. - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<8> a[0] -LdU<8> b[0] -Diff<8> -LdU<4> a[8] -LdU<4> b[8] -Diff<4> -LdU<2> a[12] -LdU<2> b[12] -Diff<2> -LdU<1> a[14] -LdU<1> b[14] -Diff<1> -)")); -} - -TEST_F(LlvmLibcAlgorithm, three_way_cmp_neq_15_shortcircuit) { - fillDifferent(); - SizedOp::threeWayCmp(buf1(), buf2()); - // If buffer compare isDifferent we stop early. - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<8> a[0] -LdU<8> b[0] -Diff<8> -)")); -} - -TEST_F(LlvmLibcAlgorithm, three_way_cmp_eq_16) { - fillEqual(); - SizedOp::threeWayCmp(buf1(), buf2()); - // Buffer compare equal, returning 0 and no call to Diff. - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<8> a[0] -LdU<8> b[0] -Diff<8> -LdU<8> a[8] -LdU<8> b[8] -Diff<8> -)")); -} - -/////////////////////////////////////////////////////////////////////////////// -//// Testing skip operations -/////////////////////////////////////////////////////////////////////////////// - -TEST_F(LlvmLibcAlgorithm, skip_and_set) { - Skip<11>::Then>::set(dst(), ubyte{42}); - EXPECT_STREQ(getTrace(), stripComments(R"( -Splat<1> 42 -StU<1> a[11] -)")); -} - -TEST_F(LlvmLibcAlgorithm, skip_and_different_1) { - Skip<11>::Then>::isDifferent(buf1(), buf2()); - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<1> a[11] -LdU<1> b[11] -Neq<1> -)")); -} - -TEST_F(LlvmLibcAlgorithm, skip_and_three_way_cmp_8) { - Skip<11>::Then>::threeWayCmp(buf1(), buf2()); - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<1> a[11] -LdU<1> b[11] -Diff<1> -)")); -} - -/////////////////////////////////////////////////////////////////////////////// -//// Testing tail operations -/////////////////////////////////////////////////////////////////////////////// - -TEST_F(LlvmLibcAlgorithm, tail_copy_8) { - Tail<_8>::copy(dst(), src(), 16); - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<8> b[8] -StU<8> a[8] -)")); -} - -TEST_F(LlvmLibcAlgorithm, tail_move_8) { - Tail<_8>::move(dst(), src(), 16); - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<8> b[8] -StU<8> a[8] -)")); -} - -TEST_F(LlvmLibcAlgorithm, tail_set_8) { - Tail<_8>::set(dst(), ubyte{42}, 16); - EXPECT_STREQ(getTrace(), stripComments(R"( -Splat<8> 42 -StU<8> a[8] -)")); -} - -TEST_F(LlvmLibcAlgorithm, tail_different_8) { - fillEqual(); - Tail<_8>::isDifferent(buf1(), buf2(), 16); - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<8> a[8] -LdU<8> b[8] -Neq<8> -)")); -} - -TEST_F(LlvmLibcAlgorithm, tail_three_way_cmp_8) { - fillEqual(); - Tail<_8>::threeWayCmp(buf1(), buf2(), 16); - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<8> a[8] -LdU<8> b[8] -Diff<8> -)")); -} - -/////////////////////////////////////////////////////////////////////////////// -//// Testing HeadTail operations -/////////////////////////////////////////////////////////////////////////////// - -TEST_F(LlvmLibcAlgorithm, head_tail_copy_8) { - HeadTail<_8>::copy(dst(), src(), 16); - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<8> b[0] -StU<8> a[0] -LdU<8> b[8] -StU<8> a[8] -)")); -} - -/////////////////////////////////////////////////////////////////////////////// -//// Testing Loop operations -/////////////////////////////////////////////////////////////////////////////// - -TEST_F(LlvmLibcAlgorithm, loop_copy_one_iteration_and_tail) { - Loop<_8>::copy(dst(), src(), 10); - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<8> b[0] -StU<8> a[0] # covers 0-7 -LdU<8> b[2] -StU<8> a[2] # covers 2-9 -)")); -} - -TEST_F(LlvmLibcAlgorithm, loop_copy_two_iteration_and_tail) { - Loop<_8>::copy(dst(), src(), 17); - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<8> b[0] -StU<8> a[0] # covers 0-7 -LdU<8> b[8] -StU<8> a[8] # covers 8-15 -LdU<8> b[9] -StU<8> a[9] # covers 9-16 -)")); -} - -TEST_F(LlvmLibcAlgorithm, loop_with_one_turn_is_inefficient_but_ok) { - Loop<_8>::copy(dst(), src(), 8); - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<8> b[0] -StU<8> a[0] # first iteration covers 0-7 -LdU<8> b[0] # tail also covers 0-7 but since Loop is supposed to be used -StU<8> a[0] # with a sufficient number of iterations the tail cost is amortised -)")); -} - -TEST_F(LlvmLibcAlgorithm, loop_with_round_number_of_turn) { - Loop<_8>::copy(dst(), src(), 24); - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<8> b[0] -StU<8> a[0] # first iteration covers 0-7 -LdU<8> b[8] -StU<8> a[8] # second iteration covers 8-15 -LdU<8> b[16] -StU<8> a[16] -)")); -} - -TEST_F(LlvmLibcAlgorithm, dst_aligned_loop) { - Loop<_8>::copy(dst<16>(), src(), 23); - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<8> b[0] -StA<8> a[0] # store is aligned on 16B -LdU<8> b[8] -StA<8> a[8] # subsequent stores are aligned -LdU<8> b[15] -StU<8> a[15] # Tail is always unaligned -)")); -} - -TEST_F(LlvmLibcAlgorithm, aligned_loop) { - Loop<_8>::copy(dst<16>(), src<8>(), 23); - EXPECT_STREQ(getTrace(), stripComments(R"( -LdA<8> b[0] # load is aligned on 8B -StA<8> a[0] # store is aligned on 16B -LdA<8> b[8] # subsequent loads are aligned -StA<8> a[8] # subsequent stores are aligned -LdU<8> b[15] # Tail is always unaligned -StU<8> a[15] # Tail is always unaligned -)")); -} - -/////////////////////////////////////////////////////////////////////////////// -//// Testing Align operations -/////////////////////////////////////////////////////////////////////////////// - -TEST_F(LlvmLibcAlgorithm, align_dst_copy_8) { - Align<_8, Arg::Dst>::Then>::copy(dst(2), src(3), 31); - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<8> b[3] -StU<8> a[2] # First store covers unaligned bytes -LdU<8> b[9] -StA<8> a[8] # First aligned store -LdU<8> b[17] -StA<8> a[16] # Subsequent stores are aligned -LdU<8> b[25] -StA<8> a[24] # Subsequent stores are aligned -LdU<8> b[26] -StU<8> a[25] # Last store covers remaining bytes -)")); -} - -TEST_F(LlvmLibcAlgorithm, align_src_copy_8) { - Align<_8, Arg::Src>::Then>::copy(dst(2), src(3), 31); - EXPECT_STREQ(getTrace(), stripComments(R"( -LdU<8> b[3] # First load covers unaligned bytes -StU<8> a[2] -LdA<8> b[8] # First aligned load -StU<8> a[7] -LdA<8> b[16] # Subsequent loads are aligned -StU<8> a[15] -LdA<8> b[24] # Subsequent loads are aligned -StU<8> a[23] -LdU<8> b[26] # Last load covers remaining bytes -StU<8> a[25] -)")); -} - -} // namespace __llvm_libc diff --git a/libc/test/src/string/memory_utils/backend_test.cpp b/libc/test/src/string/memory_utils/backend_test.cpp deleted file mode 100644 index 72fb7c4cf53b1c..00000000000000 --- a/libc/test/src/string/memory_utils/backend_test.cpp +++ /dev/null @@ -1,200 +0,0 @@ -//===-- Unittests for backends --------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "src/__support/CPP/array.h" -#include "src/__support/CPP/bit.h" -#include "src/__support/CPP/span.h" -#include "src/__support/architectures.h" -#include "src/string/memory_utils/backends.h" -#include "utils/UnitTest/Test.h" -#include - -namespace __llvm_libc { - -template using Buffer = cpp::array; - -static char GetRandomChar() { - // Implementation of C++ minstd_rand seeded with 123456789. - // https://en.cppreference.com/w/cpp/numeric/random - // "Minimum standard", recommended by Park, Miller, and Stockmeyer in 1993 - static constexpr const uint64_t a = 48271; - static constexpr const uint64_t c = 0; - static constexpr const uint64_t m = 2147483647; - static uint64_t seed = 123456789; - seed = (a * seed + c) % m; - return seed; -} - -static void Randomize(cpp::span buffer) { - for (auto ¤t : buffer) - current = GetRandomChar(); -} - -template static Buffer GetRandomBuffer() { - Buffer buffer; - Randomize(buffer); - return buffer; -} - -template struct Conf { - static_assert(Backend::IS_BACKEND_TYPE); - using BufferT = Buffer; - using T = typename Backend::template getNextType; - static_assert(sizeof(T) == Size); - static constexpr size_t SIZE = Size; - - static BufferT splat(ubyte value) { - return cpp::bit_cast(Backend::template splat(value)); - } - - static uint64_t notEquals(const BufferT &v1, const BufferT &v2) { - return Backend::template notEquals(cpp::bit_cast(v1), - cpp::bit_cast(v2)); - } - - static int32_t threeWayCmp(const BufferT &v1, const BufferT &v2) { - return Backend::template threeWayCmp(cpp::bit_cast(v1), - cpp::bit_cast(v2)); - } -}; - -using FunctionTypes = testing::TypeList< // -#if defined(LLVM_LIBC_ARCH_X86) // - Conf, // - Conf, // - Conf, // - Conf, // -#if HAS_M128 - Conf, // -#endif -#if HAS_M256 - Conf, // -#endif -#if HAS_M512 - Conf, // -#endif -#endif // defined(LLVM_LIBC_ARCH_X86) - Conf, // - Conf, // - Conf, // - Conf // - >; - -TYPED_TEST(LlvmLibcMemoryBackend, splat, FunctionTypes) { - for (auto value : cpp::array{0u, 1u, 255u}) { - alignas(64) const auto stored = - ParamType::splat(cpp::bit_cast(value)); - for (size_t i = 0; i < ParamType::SIZE; ++i) - EXPECT_EQ(cpp::bit_cast(stored[i]), value); - } -} - -TYPED_TEST(LlvmLibcMemoryBackend, notEquals, FunctionTypes) { - alignas(64) const auto a = GetRandomBuffer(); - EXPECT_EQ(ParamType::notEquals(a, a), uint64_t(0)); - for (size_t i = 0; i < a.size(); ++i) { - alignas(64) auto b = a; - ++b[i]; - EXPECT_NE(ParamType::notEquals(a, b), uint64_t(0)); - EXPECT_NE(ParamType::notEquals(b, a), uint64_t(0)); - } -} - -TYPED_TEST(LlvmLibcMemoryBackend, threeWayCmp, FunctionTypes) { - alignas(64) const auto a = GetRandomBuffer(); - EXPECT_EQ(ParamType::threeWayCmp(a, a), 0); - for (size_t i = 0; i < a.size(); ++i) { - alignas(64) auto b = a; - ++b[i]; - const auto cmp = memcmp(&a, &b, sizeof(a)); - ASSERT_NE(cmp, 0); - if (cmp > 0) { - EXPECT_GT(ParamType::threeWayCmp(a, b), 0); - EXPECT_LT(ParamType::threeWayCmp(b, a), 0); - } else { - EXPECT_LT(ParamType::threeWayCmp(a, b), 0); - EXPECT_GT(ParamType::threeWayCmp(b, a), 0); - } - } -} - -template -struct LoadStoreConf { - static_assert(Backend::IS_BACKEND_TYPE); - using BufferT = Buffer; - using T = typename Backend::template getNextType; - static_assert(sizeof(T) == Size); - static constexpr size_t SIZE = Size; - - static BufferT load(const BufferT &ref) { - const auto *ptr = cpp::bit_cast(ref.data()); - const T value = Backend::template load(ptr); - return cpp::bit_cast(value); - } - - static void store(BufferT &ref, const BufferT value) { - auto *ptr = cpp::bit_cast(ref.data()); - Backend::template store(ptr, cpp::bit_cast(value)); - } -}; - -using LoadStoreTypes = testing::TypeList< // -#if defined(LLVM_LIBC_ARCH_X86) // - LoadStoreConf, // - LoadStoreConf, // - LoadStoreConf, // - LoadStoreConf, // - LoadStoreConf, // - LoadStoreConf, // - LoadStoreConf, // - LoadStoreConf, // -#if HAS_M128 - LoadStoreConf, // - LoadStoreConf, // - LoadStoreConf, // -#endif -#if HAS_M256 - LoadStoreConf, // - LoadStoreConf, // - LoadStoreConf, // -#endif -#if HAS_M512 - LoadStoreConf, // - LoadStoreConf, // - LoadStoreConf, // -#endif -#endif // defined(LLVM_LIBC_ARCH_X86) - LoadStoreConf, // - LoadStoreConf, // - LoadStoreConf, // - LoadStoreConf, // - LoadStoreConf, // - LoadStoreConf, // - LoadStoreConf, // - LoadStoreConf // - >; - -TYPED_TEST(LlvmLibcMemoryBackend, load, LoadStoreTypes) { - alignas(64) const auto expected = GetRandomBuffer(); - const auto loaded = ParamType::load(expected); - for (size_t i = 0; i < ParamType::SIZE; ++i) - EXPECT_EQ(loaded[i], expected[i]); -} - -TYPED_TEST(LlvmLibcMemoryBackend, store, LoadStoreTypes) { - alignas(64) const auto expected = GetRandomBuffer(); - alignas(64) typename ParamType::BufferT stored; - ParamType::store(stored, expected); - for (size_t i = 0; i < ParamType::SIZE; ++i) - EXPECT_EQ(stored[i], expected[i]); -} - -} // namespace __llvm_libc diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index d64b051946aecb..926edb71460cee 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -1140,16 +1140,20 @@ size_t Writer::addEntryToStringTable(StringRef str) { Optional Writer::createSymbol(Defined *def) { coff_symbol16 sym; switch (def->kind()) { - case Symbol::DefinedAbsoluteKind: - sym.Value = def->getRVA(); + case Symbol::DefinedAbsoluteKind: { + auto *da = dyn_cast(def); + // Note: COFF symbol can only store 32-bit values, so 64-bit absolute + // values will be truncated. + sym.Value = da->getVA(); sym.SectionNumber = IMAGE_SYM_ABSOLUTE; break; - case Symbol::DefinedSyntheticKind: - // Relative symbols are unrepresentable in a COFF symbol table. - return None; + } default: { // Don't write symbols that won't be written to the output to the symbol // table. + // We also try to write DefinedSynthetic as a normal symbol. Some of these + // symbols do point to an actual chunk, like __safe_se_handler_table. Others + // like __ImageBase are outside of sections and thus cannot be represented. Chunk *c = def->getChunk(); if (!c) return None; diff --git a/lld/test/COFF/symtab-DefinedSynthetic.s b/lld/test/COFF/symtab-DefinedSynthetic.s new file mode 100644 index 00000000000000..f6d8387652f3c1 --- /dev/null +++ b/lld/test/COFF/symtab-DefinedSynthetic.s @@ -0,0 +1,53 @@ +# REQUIRES: x86 + +# The __guard_fids_table is a DefinedSynthetic when control flow guard is +# enabled and there are entries to be added to the fids table. This test uses +# this to check that DefinedSynthetic symbols are being written to the COFF +# symbol table. + +# RUN: llvm-mc -triple x86_64-windows-msvc %s -filetype=obj -o %t.obj +# RUN: lld-link %t.obj -guard:cf -out:%t.exe -entry:main -debug:symtab +# RUN: llvm-readobj --symbols %t.exe | FileCheck --check-prefix=CHECK %s + +# CHECK: Name: __guard_fids_table +# CHECK-NEXT: Value: +# CHECK-NEXT: Section: .rdata (2) + + +# We need @feat.00 to have 0x800 to indicate /guard:cf. + .def @feat.00; + .scl 3; + .type 0; + .endef + .globl @feat.00 +@feat.00 = 0x800 + .def main; .scl 2; .type 32; .endef + .globl main # -- Begin function main + .p2align 4, 0x90 +main: + retq + # -- End function + .section .gfids$y,"dr" + .symidx main + .section .giats$y,"dr" + .section .gljmp$y,"dr" + .addrsig_sym main + .section .rdata,"dr" + +.globl _load_config_used + .p2align 3 +_load_config_used: + .long 312 + .fill 124, 1, 0 + .quad __guard_fids_table + .quad __guard_fids_count + .long __guard_flags + .fill 12, 1, 0 + .quad __guard_iat_table + .quad __guard_iat_count + .quad __guard_longjmp_table + .quad __guard_longjmp_count + .fill 72, 1, 0 + .quad __guard_eh_cont_table + .quad __guard_eh_cont_count + .fill 32, 1, 0 diff --git a/lld/test/COFF/symtab.test b/lld/test/COFF/symtab.test index ccf26fde2027b8..41419a942d87bc 100644 --- a/lld/test/COFF/symtab.test +++ b/lld/test/COFF/symtab.test @@ -75,7 +75,7 @@ # CHECK-NEXT: } # CHECK-NEXT: Symbol { # CHECK-NEXT: Name: abs_symbol -# CHECK-NEXT: Value: 2662186735 +# CHECK-NEXT: Value: 3735928559 # CHECK-NEXT: Section: IMAGE_SYM_ABSOLUTE (-1) # CHECK-NEXT: BaseType: Null (0x0) # CHECK-NEXT: ComplexType: Null (0x0) diff --git a/lld/test/COFF/wrap-i386.s b/lld/test/COFF/wrap-i386.s index fd1710f8c3cc5c..4684e3b25e470c 100644 --- a/lld/test/COFF/wrap-i386.s +++ b/lld/test/COFF/wrap-i386.s @@ -16,16 +16,16 @@ // RUN: FileCheck --check-prefix=SYM2 %s < %t.dump // RUN: FileCheck --check-prefix=SYM3 %s < %t.dump -// _foo = 0xffc11000 = 4290842624 -// ___wrap_foo = ffc11010 = 4290842640 +// _foo = 0x00011000 = 69632 +// ___wrap_foo = 0x00011010 = 69648 // SYM1: Name: _foo -// SYM1-NEXT: Value: 4290842624 +// SYM1-NEXT: Value: 69632 // SYM1-NEXT: Section: IMAGE_SYM_ABSOLUTE // SYM1-NEXT: BaseType: Null // SYM1-NEXT: ComplexType: Null // SYM1-NEXT: StorageClass: External // SYM2: Name: ___wrap_foo -// SYM2-NEXT: Value: 4290842640 +// SYM2-NEXT: Value: 69648 // SYM2-NEXT: Section: IMAGE_SYM_ABSOLUTE // SYM2-NEXT: BaseType: Null // SYM2-NEXT: ComplexType: Null diff --git a/lld/test/COFF/wrap.s b/lld/test/COFF/wrap.s index d0afb7f14cdc5f..7a964fa61ce8ae 100644 --- a/lld/test/COFF/wrap.s +++ b/lld/test/COFF/wrap.s @@ -18,16 +18,16 @@ // RUN: FileCheck --check-prefix=SYM2 %s < %t.dump // RUN: FileCheck --check-prefix=SYM3 %s < %t.dump -// foo = 0xC0011000 = 3221295104 -// __wrap_foo = 0xC0011010 = 3221295120 +// foo = 0x00011000 = 69632 +// __wrap_foo = 0x00011010 = 69648 // SYM1: Name: foo -// SYM1-NEXT: Value: 3221295104 +// SYM1-NEXT: Value: 69632 // SYM1-NEXT: Section: IMAGE_SYM_ABSOLUTE // SYM1-NEXT: BaseType: Null // SYM1-NEXT: ComplexType: Null // SYM1-NEXT: StorageClass: External // SYM2: Name: __wrap_foo -// SYM2-NEXT: Value: 3221295120 +// SYM2-NEXT: Value: 69648 // SYM2-NEXT: Section: IMAGE_SYM_ABSOLUTE // SYM2-NEXT: BaseType: Null // SYM2-NEXT: ComplexType: Null diff --git a/lldb/include/lldb/Core/EmulateInstruction.h b/lldb/include/lldb/Core/EmulateInstruction.h index a710c866d9803c..fa049d4180fbf0 100644 --- a/lldb/include/lldb/Core/EmulateInstruction.h +++ b/lldb/include/lldb/Core/EmulateInstruction.h @@ -375,8 +375,11 @@ class EmulateInstruction : public PluginInterface { virtual bool TestEmulation(Stream *out_stream, ArchSpec &arch, OptionValueDictionary *test_data) = 0; - virtual bool GetRegisterInfo(lldb::RegisterKind reg_kind, uint32_t reg_num, - RegisterInfo ®_info) = 0; + bool GetRegisterInfo(lldb::RegisterKind reg_kind, uint32_t reg_num, + RegisterInfo ®_info); + + virtual llvm::Optional + GetRegisterInfo(lldb::RegisterKind reg_kind, uint32_t reg_num) = 0; // Optional overrides virtual bool SetInstruction(const Opcode &insn_opcode, diff --git a/lldb/source/Core/EmulateInstruction.cpp b/lldb/source/Core/EmulateInstruction.cpp index 1320e8925553ed..271301b9d3831c 100644 --- a/lldb/source/Core/EmulateInstruction.cpp +++ b/lldb/source/Core/EmulateInstruction.cpp @@ -582,3 +582,12 @@ bool EmulateInstruction::CreateFunctionEntryUnwind(UnwindPlan &unwind_plan) { unwind_plan.Clear(); return false; } + +bool EmulateInstruction::GetRegisterInfo(lldb::RegisterKind reg_kind, + uint32_t reg_num, + RegisterInfo ®_info) { + llvm::Optional info = GetRegisterInfo(reg_kind, reg_num); + if (info) + reg_info = *info; + return info.has_value(); +} \ No newline at end of file diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp index fa6511635e287c..5c11b87dcbe03c 100644 --- a/lldb/source/Interpreter/CommandInterpreter.cpp +++ b/lldb/source/Interpreter/CommandInterpreter.cpp @@ -2471,8 +2471,12 @@ bool CommandInterpreter::DidProcessStopAbnormally() const { for (const auto &thread_sp : process_sp->GetThreadList().Threads()) { StopInfoSP stop_info = thread_sp->GetStopInfo(); - if (!stop_info) - return false; + if (!stop_info) { + // If there's no stop_info, keep iterating through the other threads; + // it's enough that any thread has got a stop_info that indicates + // an abnormal stop, to consider the process to be stopped abnormally. + continue; + } const StopReason reason = stop_info->GetStopReason(); if (reason == eStopReasonException || diff --git a/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp b/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp index 0abfefa43e099b..54aec79d24773e 100644 --- a/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp +++ b/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp @@ -42,7 +42,8 @@ LLDB_PLUGIN_DEFINE_ADV(EmulateInstructionARM, InstructionARM) // ITSession implementation // -static bool GetARMDWARFRegisterInfo(unsigned reg_num, RegisterInfo ®_info) { +static llvm::Optional GetARMDWARFRegisterInfo(unsigned reg_num) { + RegisterInfo reg_info; ::memset(®_info, 0, sizeof(RegisterInfo)); ::memset(reg_info.kinds, LLDB_INVALID_REGNUM, sizeof(reg_info.kinds)); @@ -594,9 +595,9 @@ static bool GetARMDWARFRegisterInfo(unsigned reg_num, RegisterInfo ®_info) { break; default: - return false; + return {}; } - return true; + return reg_info; } // A8.6.50 @@ -782,9 +783,9 @@ bool EmulateInstructionARM::WriteBits32Unknown(int n) { return true; } -bool EmulateInstructionARM::GetRegisterInfo(lldb::RegisterKind reg_kind, - uint32_t reg_num, - RegisterInfo ®_info) { +llvm::Optional +EmulateInstructionARM::GetRegisterInfo(lldb::RegisterKind reg_kind, + uint32_t reg_num) { if (reg_kind == eRegisterKindGeneric) { switch (reg_num) { case LLDB_REGNUM_GENERIC_PC: @@ -808,13 +809,13 @@ bool EmulateInstructionARM::GetRegisterInfo(lldb::RegisterKind reg_kind, reg_num = dwarf_cpsr; break; default: - return false; + return {}; } } if (reg_kind == eRegisterKindDWARF) - return GetARMDWARFRegisterInfo(reg_num, reg_info); - return false; + return GetARMDWARFRegisterInfo(reg_num); + return {}; } uint32_t EmulateInstructionARM::GetFramePointerRegisterNumber() const { diff --git a/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.h b/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.h index c877724a9d3054..9a51445f9c1a96 100644 --- a/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.h +++ b/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.h @@ -135,8 +135,9 @@ class EmulateInstructionARM : public EmulateInstruction { bool TestEmulation(Stream *out_stream, ArchSpec &arch, OptionValueDictionary *test_data) override; - bool GetRegisterInfo(lldb::RegisterKind reg_kind, uint32_t reg_num, - RegisterInfo ®_info) override; + using EmulateInstruction::GetRegisterInfo; + llvm::Optional GetRegisterInfo(lldb::RegisterKind reg_kind, + uint32_t reg_num) override; bool CreateFunctionEntryUnwind(UnwindPlan &unwind_plan) override; diff --git a/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp b/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp index 6ab77d30564b9a..96a7caa29981a0 100644 --- a/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp +++ b/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp @@ -51,11 +51,10 @@ using namespace lldb_private; LLDB_PLUGIN_DEFINE_ADV(EmulateInstructionARM64, InstructionARM64) -static bool LLDBTableGetRegisterInfo(uint32_t reg_num, RegisterInfo ®_info) { +static llvm::Optional LLDBTableGetRegisterInfo(uint32_t reg_num) { if (reg_num >= std::size(g_register_infos_arm64_le)) - return false; - reg_info = g_register_infos_arm64_le[reg_num]; - return true; + return {}; + return g_register_infos_arm64_le[reg_num]; } #define No_VFP 0 @@ -144,9 +143,9 @@ bool EmulateInstructionARM64::SetTargetTriple(const ArchSpec &arch) { return false; } -bool EmulateInstructionARM64::GetRegisterInfo(RegisterKind reg_kind, - uint32_t reg_num, - RegisterInfo ®_info) { +llvm::Optional +EmulateInstructionARM64::GetRegisterInfo(RegisterKind reg_kind, + uint32_t reg_num) { if (reg_kind == eRegisterKindGeneric) { switch (reg_num) { case LLDB_REGNUM_GENERIC_PC: @@ -171,13 +170,13 @@ bool EmulateInstructionARM64::GetRegisterInfo(RegisterKind reg_kind, break; default: - return false; + return {}; } } if (reg_kind == eRegisterKindLLDB) - return LLDBTableGetRegisterInfo(reg_num, reg_info); - return false; + return LLDBTableGetRegisterInfo(reg_num); + return {}; } EmulateInstructionARM64::Opcode * diff --git a/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.h b/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.h index 4f11f7387a2ec5..20b1c33c66cda2 100644 --- a/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.h +++ b/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.h @@ -65,8 +65,10 @@ class EmulateInstructionARM64 : public lldb_private::EmulateInstruction { return false; } - bool GetRegisterInfo(lldb::RegisterKind reg_kind, uint32_t reg_num, - lldb_private::RegisterInfo ®_info) override; + using EmulateInstruction::GetRegisterInfo; + + llvm::Optional + GetRegisterInfo(lldb::RegisterKind reg_kind, uint32_t reg_num) override; bool CreateFunctionEntryUnwind(lldb_private::UnwindPlan &unwind_plan) override; diff --git a/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp b/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp index 7aff11ede400dc..37096a5cc67047 100644 --- a/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp +++ b/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp @@ -585,9 +585,9 @@ const char *EmulateInstructionMIPS::GetRegisterName(unsigned reg_num, return nullptr; } -bool EmulateInstructionMIPS::GetRegisterInfo(RegisterKind reg_kind, - uint32_t reg_num, - RegisterInfo ®_info) { +llvm::Optional +EmulateInstructionMIPS::GetRegisterInfo(RegisterKind reg_kind, + uint32_t reg_num) { if (reg_kind == eRegisterKindGeneric) { switch (reg_num) { case LLDB_REGNUM_GENERIC_PC: @@ -611,11 +611,12 @@ bool EmulateInstructionMIPS::GetRegisterInfo(RegisterKind reg_kind, reg_num = dwarf_sr_mips; break; default: - return false; + return {}; } } if (reg_kind == eRegisterKindDWARF) { + RegisterInfo reg_info; ::memset(®_info, 0, sizeof(RegisterInfo)); ::memset(reg_info.kinds, LLDB_INVALID_REGNUM, sizeof(reg_info.kinds)); @@ -636,7 +637,7 @@ bool EmulateInstructionMIPS::GetRegisterInfo(RegisterKind reg_kind, reg_info.format = eFormatVectorOfUInt8; reg_info.encoding = eEncodingVector; } else { - return false; + return {}; } reg_info.name = GetRegisterName(reg_num, false); @@ -662,9 +663,9 @@ bool EmulateInstructionMIPS::GetRegisterInfo(RegisterKind reg_kind, default: break; } - return true; + return reg_info; } - return false; + return {}; } EmulateInstructionMIPS::MipsOpcode * diff --git a/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.h b/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.h index 4862f6c7e0dc54..e771bda2e1dea5 100644 --- a/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.h +++ b/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.h @@ -80,8 +80,10 @@ class EmulateInstructionMIPS : public lldb_private::EmulateInstruction { return false; } - bool GetRegisterInfo(lldb::RegisterKind reg_kind, uint32_t reg_num, - lldb_private::RegisterInfo ®_info) override; + using EmulateInstruction::GetRegisterInfo; + + llvm::Optional + GetRegisterInfo(lldb::RegisterKind reg_kind, uint32_t reg_num) override; bool CreateFunctionEntryUnwind(lldb_private::UnwindPlan &unwind_plan) override; diff --git a/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp b/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp index b4a860af54bd90..341d954e74be6f 100644 --- a/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp +++ b/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp @@ -572,9 +572,9 @@ const char *EmulateInstructionMIPS64::GetRegisterName(unsigned reg_num, return nullptr; } -bool EmulateInstructionMIPS64::GetRegisterInfo(RegisterKind reg_kind, - uint32_t reg_num, - RegisterInfo ®_info) { +llvm::Optional +EmulateInstructionMIPS64::GetRegisterInfo(RegisterKind reg_kind, + uint32_t reg_num) { if (reg_kind == eRegisterKindGeneric) { switch (reg_num) { case LLDB_REGNUM_GENERIC_PC: @@ -598,11 +598,12 @@ bool EmulateInstructionMIPS64::GetRegisterInfo(RegisterKind reg_kind, reg_num = dwarf_sr_mips64; break; default: - return false; + return {}; } } if (reg_kind == eRegisterKindDWARF) { + RegisterInfo reg_info; ::memset(®_info, 0, sizeof(RegisterInfo)); ::memset(reg_info.kinds, LLDB_INVALID_REGNUM, sizeof(reg_info.kinds)); @@ -623,7 +624,7 @@ bool EmulateInstructionMIPS64::GetRegisterInfo(RegisterKind reg_kind, reg_info.format = eFormatVectorOfUInt8; reg_info.encoding = eEncodingVector; } else { - return false; + return {}; } reg_info.name = GetRegisterName(reg_num, false); @@ -649,9 +650,9 @@ bool EmulateInstructionMIPS64::GetRegisterInfo(RegisterKind reg_kind, default: break; } - return true; + return reg_info; } - return false; + return {}; } EmulateInstructionMIPS64::MipsOpcode * diff --git a/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.h b/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.h index 3f56bc658c16e6..9c8a95a64f942c 100644 --- a/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.h +++ b/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.h @@ -72,8 +72,10 @@ class EmulateInstructionMIPS64 : public lldb_private::EmulateInstruction { return false; } - bool GetRegisterInfo(lldb::RegisterKind reg_kind, uint32_t reg_num, - lldb_private::RegisterInfo ®_info) override; + using EmulateInstruction::GetRegisterInfo; + + llvm::Optional + GetRegisterInfo(lldb::RegisterKind reg_kind, uint32_t reg_num) override; bool CreateFunctionEntryUnwind(lldb_private::UnwindPlan &unwind_plan) override; diff --git a/lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.cpp b/lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.cpp index 4b56a9b6b8c51a..19598ebfd4c30a 100644 --- a/lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.cpp +++ b/lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.cpp @@ -58,16 +58,15 @@ bool EmulateInstructionPPC64::SetTargetTriple(const ArchSpec &arch) { return arch.GetTriple().isPPC64(); } -static bool LLDBTableGetRegisterInfo(uint32_t reg_num, RegisterInfo ®_info) { +static llvm::Optional LLDBTableGetRegisterInfo(uint32_t reg_num) { if (reg_num >= std::size(g_register_infos_ppc64le)) - return false; - reg_info = g_register_infos_ppc64le[reg_num]; - return true; + return {}; + return g_register_infos_ppc64le[reg_num]; } -bool EmulateInstructionPPC64::GetRegisterInfo(RegisterKind reg_kind, - uint32_t reg_num, - RegisterInfo ®_info) { +llvm::Optional +EmulateInstructionPPC64::GetRegisterInfo(RegisterKind reg_kind, + uint32_t reg_num) { if (reg_kind == eRegisterKindGeneric) { switch (reg_num) { case LLDB_REGNUM_GENERIC_PC: @@ -88,13 +87,13 @@ bool EmulateInstructionPPC64::GetRegisterInfo(RegisterKind reg_kind, break; default: - return false; + return {}; } } if (reg_kind == eRegisterKindLLDB) - return LLDBTableGetRegisterInfo(reg_num, reg_info); - return false; + return LLDBTableGetRegisterInfo(reg_num); + return {}; } bool EmulateInstructionPPC64::ReadInstruction() { diff --git a/lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.h b/lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.h index 117ff8965eb5c4..b0d9130bfb068b 100644 --- a/lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.h +++ b/lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.h @@ -61,8 +61,10 @@ class EmulateInstructionPPC64 : public EmulateInstruction { return false; } - bool GetRegisterInfo(lldb::RegisterKind reg_kind, uint32_t reg_num, - RegisterInfo ®_info) override; + using EmulateInstruction::GetRegisterInfo; + + llvm::Optional GetRegisterInfo(lldb::RegisterKind reg_kind, + uint32_t reg_num) override; bool CreateFunctionEntryUnwind(UnwindPlan &unwind_plan) override; diff --git a/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp b/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp index bcd18ff63d11ba..f84c1159f254da 100644 --- a/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp +++ b/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp @@ -1286,9 +1286,9 @@ bool EmulateInstructionRISCV::WritePC(lldb::addr_t pc) { LLDB_REGNUM_GENERIC_PC, pc); } -bool EmulateInstructionRISCV::GetRegisterInfo(lldb::RegisterKind reg_kind, - uint32_t reg_index, - RegisterInfo ®_info) { +llvm::Optional +EmulateInstructionRISCV::GetRegisterInfo(lldb::RegisterKind reg_kind, + uint32_t reg_index) { if (reg_kind == eRegisterKindGeneric) { switch (reg_index) { case LLDB_REGNUM_GENERIC_PC: @@ -1320,10 +1320,9 @@ bool EmulateInstructionRISCV::GetRegisterInfo(lldb::RegisterKind reg_kind, RegisterInfoPOSIX_riscv64::GetRegisterInfoCount(m_arch); if (reg_index >= length || reg_kind != eRegisterKindLLDB) - return false; + return {}; - reg_info = array[reg_index]; - return true; + return array[reg_index]; } bool EmulateInstructionRISCV::SetTargetTriple(const ArchSpec &arch) { diff --git a/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.h b/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.h index 1c7cf6cb08d66f..92f5c950c26ad3 100644 --- a/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.h +++ b/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.h @@ -76,8 +76,10 @@ class EmulateInstructionRISCV : public EmulateInstruction { bool EvaluateInstruction(uint32_t options) override; bool TestEmulation(Stream *out_stream, ArchSpec &arch, OptionValueDictionary *test_data) override; - bool GetRegisterInfo(lldb::RegisterKind reg_kind, uint32_t reg_num, - RegisterInfo ®_info) override; + using EmulateInstruction::GetRegisterInfo; + + llvm::Optional GetRegisterInfo(lldb::RegisterKind reg_kind, + uint32_t reg_num) override; lldb::addr_t ReadPC(bool &success); bool WritePC(lldb::addr_t pc); diff --git a/lldb/test/Shell/Driver/CommandOnCrashMultiThreaded.test b/lldb/test/Shell/Driver/CommandOnCrashMultiThreaded.test new file mode 100644 index 00000000000000..b16cfc5763715b --- /dev/null +++ b/lldb/test/Shell/Driver/CommandOnCrashMultiThreaded.test @@ -0,0 +1,5 @@ +# REQUIRES: native && (target-x86 || target-x86_64) +# RUN: %clangxx_host %p/Inputs/CommandOnCrashMultiThreaded.cpp -o %t -pthread +# RUN: %lldb -b -o "process launch" -k "process continue" -k "exit" %t | FileCheck %s + +# CHECK: Process {{[0-9]+}} exited with status = 0 diff --git a/lldb/test/Shell/Driver/Inputs/CommandOnCrashMultiThreaded.cpp b/lldb/test/Shell/Driver/Inputs/CommandOnCrashMultiThreaded.cpp new file mode 100644 index 00000000000000..f469d82fbbef9d --- /dev/null +++ b/lldb/test/Shell/Driver/Inputs/CommandOnCrashMultiThreaded.cpp @@ -0,0 +1,13 @@ +#include + +void t_func() { + asm volatile( + "int3\n\t" + ); +} + +int main() { + std::thread t(t_func); + t.join(); + return 0; +} diff --git a/llvm/include/llvm/ADT/Bitfields.h b/llvm/include/llvm/ADT/Bitfields.h index 045704a470b9cc..4064d716f8a774 100644 --- a/llvm/include/llvm/ADT/Bitfields.h +++ b/llvm/include/llvm/ADT/Bitfields.h @@ -195,7 +195,7 @@ template struct Impl { /// API. template ::value> struct ResolveUnderlyingType { - using type = typename std::underlying_type::type; + using type = std::underlying_type_t; }; template struct ResolveUnderlyingType { using type = T; diff --git a/llvm/include/llvm/ADT/Sequence.h b/llvm/include/llvm/ADT/Sequence.h index 96935c291ec1e4..88a6fa92059836 100644 --- a/llvm/include/llvm/ADT/Sequence.h +++ b/llvm/include/llvm/ADT/Sequence.h @@ -139,7 +139,7 @@ struct CheckedInt { template ::value, bool> = 0> static CheckedInt from(Enum FromValue) { - using type = typename std::underlying_type::type; + using type = std::underlying_type_t; return from(static_cast(FromValue)); } @@ -175,7 +175,7 @@ struct CheckedInt { template ::value, bool> = 0> Enum to() const { - using type = typename std::underlying_type::type; + using type = std::underlying_type_t; return Enum(to()); } diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h index 14f051e383f725..513fce93549fb5 100644 --- a/llvm/include/llvm/ADT/SmallVector.h +++ b/llvm/include/llvm/ADT/SmallVector.h @@ -468,8 +468,7 @@ class SmallVectorTemplateBase : public SmallVectorTemplateCommon { /// Either const T& or T, depending on whether it's cheap enough to take /// parameters by value. - using ValueParamT = - typename std::conditional::type; + using ValueParamT = std::conditional_t; SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon(Size) {} diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h index a0a5cb8bf356c0..6596cb3375e1bf 100644 --- a/llvm/include/llvm/Analysis/InlineCost.h +++ b/llvm/include/llvm/Analysis/InlineCost.h @@ -95,6 +95,9 @@ class InlineCost { /// The adjusted threshold against which this cost was computed. int Threshold = 0; + /// The amount of StaticBonus that has been applied. + int StaticBonusApplied = 0; + /// Must be set for Always and Never instances. const char *Reason = nullptr; @@ -102,27 +105,29 @@ class InlineCost { Optional CostBenefit = None; // Trivial constructor, interesting logic in the factory functions below. - InlineCost(int Cost, int Threshold, const char *Reason = nullptr, + InlineCost(int Cost, int Threshold, int StaticBonusApplied, + const char *Reason = nullptr, Optional CostBenefit = None) - : Cost(Cost), Threshold(Threshold), Reason(Reason), + : Cost(Cost), Threshold(Threshold), + StaticBonusApplied(StaticBonusApplied), Reason(Reason), CostBenefit(CostBenefit) { assert((isVariable() || Reason) && "Reason must be provided for Never or Always"); } public: - static InlineCost get(int Cost, int Threshold) { + static InlineCost get(int Cost, int Threshold, int StaticBonus = 0) { assert(Cost > AlwaysInlineCost && "Cost crosses sentinel value"); assert(Cost < NeverInlineCost && "Cost crosses sentinel value"); - return InlineCost(Cost, Threshold); + return InlineCost(Cost, Threshold, StaticBonus); } static InlineCost getAlways(const char *Reason, Optional CostBenefit = None) { - return InlineCost(AlwaysInlineCost, 0, Reason, CostBenefit); + return InlineCost(AlwaysInlineCost, 0, 0, Reason, CostBenefit); } static InlineCost getNever(const char *Reason, Optional CostBenefit = None) { - return InlineCost(NeverInlineCost, 0, Reason, CostBenefit); + return InlineCost(NeverInlineCost, 0, 0, Reason, CostBenefit); } /// Test whether the inline cost is low enough for inlining. @@ -145,6 +150,12 @@ class InlineCost { return Threshold; } + /// Get the amount of StaticBonus applied. + int getStaticBonusApplied() const { + assert(isVariable() && "Invalid access of InlineCost"); + return StaticBonusApplied; + } + /// Get the cost-benefit pair which was computed by cost-benefit analysis Optional getCostBenefit() const { return CostBenefit; } diff --git a/llvm/include/llvm/DebugInfo/CodeView/CodeView.h b/llvm/include/llvm/DebugInfo/CodeView/CodeView.h index b7a3e1561a0794..010a82dd0e232c 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/CodeView.h +++ b/llvm/include/llvm/DebugInfo/CodeView/CodeView.h @@ -51,18 +51,15 @@ enum SymbolKind : uint16_t { #define CV_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(Class) \ inline Class operator|(Class a, Class b) { \ - return static_cast( \ - static_cast::type>(a) | \ - static_cast::type>(b)); \ + return static_cast(static_cast>(a) | \ + static_cast>(b)); \ } \ inline Class operator&(Class a, Class b) { \ - return static_cast( \ - static_cast::type>(a) & \ - static_cast::type>(b)); \ + return static_cast(static_cast>(a) & \ + static_cast>(b)); \ } \ inline Class operator~(Class a) { \ - return static_cast( \ - ~static_cast::type>(a)); \ + return static_cast(~static_cast>(a)); \ } \ inline Class &operator|=(Class &a, Class b) { \ a = a | b; \ diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index e193ceea86f767..e17b90844c9a5c 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -549,6 +549,9 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { /// for speculative "expected profit" of the inlining decision. int Threshold = 0; + /// The amount of StaticBonus applied. + int StaticBonusApplied = 0; + /// Attempt to evaluate indirect calls to boost its inline cost. const bool BoostIndirectCalls; @@ -1058,6 +1061,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { virtual ~InlineCostCallAnalyzer() = default; int getThreshold() const { return Threshold; } int getCost() const { return Cost; } + int getStaticBonusApplied() const { return StaticBonusApplied; } Optional getCostBenefitPair() { return CostBenefit; } bool wasDecidedByCostBenefit() const { return DecidedByCostBenefit; } bool wasDecidedByCostThreshold() const { return DecidedByCostThreshold; } @@ -1922,8 +1926,10 @@ void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) { // If there is only one call of the function, and it has internal linkage, // the cost of inlining it drops dramatically. It may seem odd to update // Cost in updateThreshold, but the bonus depends on the logic in this method. - if (isSoleCallToLocalFunction(Call, F)) + if (isSoleCallToLocalFunction(Call, F)) { Cost -= LastCallToStaticBonus; + StaticBonusApplied = LastCallToStaticBonus; + } } bool CallAnalyzer::visitCmpInst(CmpInst &I) { @@ -2970,7 +2976,8 @@ InlineCost llvm::getInlineCost( } if (CA.wasDecidedByCostThreshold()) - return InlineCost::get(CA.getCost(), CA.getThreshold()); + return InlineCost::get(CA.getCost(), CA.getThreshold(), + CA.getStaticBonusApplied()); // No details on how the decision was made, simply return always or never. return ShouldInline.isSuccess() diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/OrcError.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/OrcError.cpp index 2cc2bddeb21a10..ec53338570db27 100644 --- a/llvm/lib/ExecutionEngine/Orc/Shared/OrcError.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Shared/OrcError.cpp @@ -82,7 +82,7 @@ char DuplicateDefinition::ID = 0; char JITSymbolNotFound::ID = 0; std::error_code orcError(OrcErrorCode ErrCode) { - typedef std::underlying_type::type UT; + typedef std::underlying_type_t UT; return std::error_code(static_cast(ErrCode), getOrcErrCat()); } @@ -105,7 +105,7 @@ JITSymbolNotFound::JITSymbolNotFound(std::string SymbolName) : SymbolName(std::move(SymbolName)) {} std::error_code JITSymbolNotFound::convertToErrorCode() const { - typedef std::underlying_type::type UT; + typedef std::underlying_type_t UT; return std::error_code(static_cast(OrcErrorCode::JITSymbolNotFound), getOrcErrCat()); } diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index e37e1d58983040..68d33669060cfd 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -668,6 +668,7 @@ def TuneA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", def TuneA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", "Cortex-A53 ARM processors", [ FeatureFuseAES, + FeatureFuseAdrpAdd, FeatureBalanceFPOps, FeatureCustomCheapAsMoveHandling, FeaturePostRAScheduler]>; @@ -675,12 +676,14 @@ def TuneA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", def TuneA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55", "Cortex-A55 ARM processors", [ FeatureFuseAES, + FeatureFuseAdrpAdd, FeaturePostRAScheduler, FeatureFuseAddress]>; def TuneA510 : SubtargetFeature<"a510", "ARMProcFamily", "CortexA510", "Cortex-A510 ARM processors", [ FeatureFuseAES, + FeatureFuseAdrpAdd, FeaturePostRAScheduler ]>; @@ -709,27 +712,32 @@ def TuneA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", def TuneA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", "Cortex-A73 ARM processors", [ - FeatureFuseAES]>; + FeatureFuseAES, + FeatureFuseAdrpAdd]>; def TuneA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75", "Cortex-A75 ARM processors", [ - FeatureFuseAES]>; + FeatureFuseAES, + FeatureFuseAdrpAdd]>; def TuneA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76", "Cortex-A76 ARM processors", [ FeatureFuseAES, + FeatureFuseAdrpAdd, FeatureLSLFast]>; def TuneA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77", "Cortex-A77 ARM processors", [ FeatureCmpBccFusion, FeatureFuseAES, + FeatureFuseAdrpAdd, FeatureLSLFast]>; def TuneA78 : SubtargetFeature<"a78", "ARMProcFamily", "CortexA78", "Cortex-A78 ARM processors", [ FeatureCmpBccFusion, FeatureFuseAES, + FeatureFuseAdrpAdd, FeatureLSLFast, FeaturePostRAScheduler]>; @@ -738,6 +746,7 @@ def TuneA78C : SubtargetFeature<"a78c", "ARMProcFamily", "Cortex-A78C ARM processors", [ FeatureCmpBccFusion, FeatureFuseAES, + FeatureFuseAdrpAdd, FeatureLSLFast, FeaturePostRAScheduler]>; @@ -745,6 +754,7 @@ def TuneA710 : SubtargetFeature<"a710", "ARMProcFamily", "CortexA710", "Cortex-A710 ARM processors", [ FeatureCmpBccFusion, FeatureFuseAES, + FeatureFuseAdrpAdd, FeatureLSLFast, FeaturePostRAScheduler]>; @@ -757,6 +767,7 @@ def TuneX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1", "Cortex-X1 ARM processors", [ FeatureCmpBccFusion, FeatureFuseAES, + FeatureFuseAdrpAdd, FeatureLSLFast, FeaturePostRAScheduler]>; @@ -764,6 +775,7 @@ def TuneX2 : SubtargetFeature<"cortex-x2", "ARMProcFamily", "CortexX2", "Cortex-X2 ARM processors", [ FeatureCmpBccFusion, FeatureFuseAES, + FeatureFuseAdrpAdd, FeatureLSLFast, FeaturePostRAScheduler]>; @@ -941,6 +953,7 @@ def TuneFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor", def TuneNeoverseE1 : SubtargetFeature<"neoversee1", "ARMProcFamily", "NeoverseE1", "Neoverse E1 ARM processors", [ FeatureFuseAES, + FeatureFuseAdrpAdd, FeaturePostRAScheduler]>; def TuneNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily", "NeoverseN1", @@ -953,18 +966,21 @@ def TuneNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily", "NeoverseN1 def TuneNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily", "NeoverseN2", "Neoverse N2 ARM processors", [ FeatureFuseAES, + FeatureFuseAdrpAdd, FeatureLSLFast, FeaturePostRAScheduler]>; def TuneNeoverse512TVB : SubtargetFeature<"neoverse512tvb", "ARMProcFamily", "Neoverse512TVB", "Neoverse 512-TVB ARM processors", [ FeatureFuseAES, + FeatureFuseAdrpAdd, FeatureLSLFast, FeaturePostRAScheduler]>; def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1", "Neoverse V1 ARM processors", [ FeatureFuseAES, + FeatureFuseAdrpAdd, FeatureLSLFast, FeaturePostRAScheduler]>; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 0cc97e3b9e0f19..544fd0efd05bcf 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -21191,6 +21191,21 @@ bool ARMTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const { return Subtarget->hasV6T2Ops(); } +bool ARMTargetLowering::isMaskAndCmp0FoldingBeneficial( + const Instruction &AndI) const { + if (!Subtarget->hasV7Ops()) + return false; + + // Sink the `and` instruction only if the mask would fit into a modified + // immediate operand. + ConstantInt *Mask = dyn_cast(AndI.getOperand(1)); + if (!Mask || Mask->getValue().getBitWidth() > 32u) + return false; + auto MaskVal = unsigned(Mask->getValue().getZExtValue()); + return (Subtarget->isThumb2() ? ARM_AM::getT2SOImmVal(MaskVal) + : ARM_AM::getSOImmVal(MaskVal)) != -1; +} + bool ARMTargetLowering::shouldExpandShift(SelectionDAG &DAG, SDNode *N) const { return !Subtarget->hasMinSize() || Subtarget->isTargetWindows(); } diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 9ff920f230e22c..1403e4c8c0a114 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -584,6 +584,8 @@ class VectorType; bool preferZeroCompareBranch() const override { return true; } + bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; + bool isShuffleMaskLegal(ArrayRef M, EVT VT) const override; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td index d9993e0a616f76..9a901593c523c7 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td @@ -157,11 +157,8 @@ def : Pat<(fcanonicalize FPR32:$fj), (FMAX_S $fj, $fj)>; // Match non-signaling comparison -// TODO: change setcc to any_fsetcc after call is supported because -// we need to call llvm.experimental.constrained.fcmp.f32 in testcase. -// See RISCV float-fcmp-strict.ll for reference. class PatFPSetcc - : Pat<(setcc RegTy:$fj, RegTy:$fk, cc), + : Pat<(any_fsetcc RegTy:$fj, RegTy:$fk, cc), (MOVCF2GR (CmpInst RegTy:$fj, RegTy:$fk))>; // SETOGT/SETOGE/SETUGT/SETUGE will expand into SETOLT/SETOLE/SETULT/SETULE. def : PatFPSetcc; @@ -196,7 +193,22 @@ defm : PatFPBrcond; defm : PatFPBrcond; defm : PatFPBrcond; -// TODO: Match signaling comparison strict_fsetccs with FCMP_S*_S instructions. +// Match signaling comparison + +class PatStrictFsetccs + : Pat<(strict_fsetccs RegTy:$fj, RegTy:$fk, cc), + (MOVCF2GR (CmpInst RegTy:$fj, RegTy:$fk))>; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; /// Select diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td index 42a02514ffacb2..9fb9b99d32f3a3 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td @@ -168,10 +168,6 @@ def : Pat<(fcanonicalize FPR64:$fj), (FMAX_D $fj, $fj)>; // Match non-signaling comparison -// TODO: Change setcc to any_fsetcc after call is supported because -// we need to call llvm.experimental.constrained.fcmp.f64 in testcase. -// See RISCV float-fcmp-strict.ll for reference. - // SETOGT/SETOGE/SETUGT/SETUGE will expand into SETOLT/SETOLE/SETULT/SETULE. def : PatFPSetcc; def : PatFPSetcc; @@ -197,7 +193,19 @@ defm : PatFPBrcond; defm : PatFPBrcond; defm : PatFPBrcond; -// TODO: Match signaling comparison strict_fsetccs with FCMP_S*_D instructions. +// Match signaling comparison + +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; +def : PatStrictFsetccs; /// Select diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 920a9da58b859a..c2b1b443444e0c 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -118,11 +118,15 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FMA, MVT::f32, Legal); setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal); setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal); } if (Subtarget.hasBasicD()) { setCondCodeAction(FPCCToExpand, MVT::f64, Expand); setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); setOperationAction(ISD::BR_CC, MVT::f64, Expand); + setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); setOperationAction(ISD::FMA, MVT::f64, Legal); diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp index 8c2763cb7aff16..2c87492b8242f8 100644 --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -201,6 +201,10 @@ void VETargetLowering::initSPUActions() { setOperationAction(ISD::AND, IntVT, Act); setOperationAction(ISD::OR, IntVT, Act); setOperationAction(ISD::XOR, IntVT, Act); + + // Legal smax and smin + setOperationAction(ISD::SMAX, IntVT, Legal); + setOperationAction(ISD::SMIN, IntVT, Legal); } /// } Int Ops diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td index 528bebd4276a05..d8eb65185a7024 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -1237,14 +1237,14 @@ defm CMPSL : RRNCm<"cmps.l", 0x6A, I64, i64>; // Section 8.4.17 - CMS (Compare and Select Maximum/Minimum Single) // cx: sx/zx, cw: max/min -defm MAXSWSX : RRm<"maxs.w.sx", 0x78, I32, i32>; +defm MAXSWSX : RRm<"maxs.w.sx", 0x78, I32, i32, smax>; let cx = 1 in defm MAXSWZX : RRm<"maxs.w.zx", 0x78, I32, i32>; -let cw = 1 in defm MINSWSX : RRm<"mins.w.sx", 0x78, I32, i32>; +let cw = 1 in defm MINSWSX : RRm<"mins.w.sx", 0x78, I32, i32, smin>; let cx = 1, cw = 1 in defm MINSWZX : RRm<"mins.w.zx", 0x78, I32, i32>; // Section 8.4.18 - CMX (Compare and Select Maximum/Minimum) -defm MAXSL : RRm<"maxs.l", 0x68, I64, i64>; -let cw = 1 in defm MINSL : RRm<"mins.l", 0x68, I64, i64>; +defm MAXSL : RRm<"maxs.l", 0x68, I64, i64, smax>; +let cw = 1 in defm MINSL : RRm<"mins.l", 0x68, I64, i64, smin>; } // isReMaterializable, isAsCheapAsAMove @@ -2052,45 +2052,6 @@ def : Pat<(i32 (setcc f64:$l, f64:$r, cond:$cond)), def : Pat<(i32 (setcc f128:$l, f128:$r, cond:$cond)), (setccrr (fcond2cc $cond), (FCMPQrr $l, $r))>; -// Special SELECTCC pattern matches -// Use min/max for better performance. -// -// MAX/MIN %res, %lhs, %rhs - -def : Pat<(f64 (selectcc f64:$LHS, f64:$RHS, f64:$LHS, f64:$RHS, SETOGT)), - (FMAXDrr $LHS, $RHS)>; -def : Pat<(f32 (selectcc f32:$LHS, f32:$RHS, f32:$LHS, f32:$RHS, SETOGT)), - (FMAXSrr $LHS, $RHS)>; -def : Pat<(i64 (selectcc i64:$LHS, i64:$RHS, i64:$LHS, i64:$RHS, SETGT)), - (MAXSLrr $LHS, $RHS)>; -def : Pat<(i32 (selectcc i32:$LHS, i32:$RHS, i32:$LHS, i32:$RHS, SETGT)), - (MAXSWSXrr $LHS, $RHS)>; -def : Pat<(f64 (selectcc f64:$LHS, f64:$RHS, f64:$LHS, f64:$RHS, SETOGE)), - (FMAXDrr $LHS, $RHS)>; -def : Pat<(f32 (selectcc f32:$LHS, f32:$RHS, f32:$LHS, f32:$RHS, SETOGE)), - (FMAXSrr $LHS, $RHS)>; -def : Pat<(i64 (selectcc i64:$LHS, i64:$RHS, i64:$LHS, i64:$RHS, SETGE)), - (MAXSLrr $LHS, $RHS)>; -def : Pat<(i32 (selectcc i32:$LHS, i32:$RHS, i32:$LHS, i32:$RHS, SETGE)), - (MAXSWSXrr $LHS, $RHS)>; - -def : Pat<(f64 (selectcc f64:$LHS, f64:$RHS, f64:$LHS, f64:$RHS, SETOLT)), - (FMINDrr $LHS, $RHS)>; -def : Pat<(f32 (selectcc f32:$LHS, f32:$RHS, f32:$LHS, f32:$RHS, SETOLT)), - (FMINSrr $LHS, $RHS)>; -def : Pat<(i64 (selectcc i64:$LHS, i64:$RHS, i64:$LHS, i64:$RHS, SETLT)), - (MINSLrr $LHS, $RHS)>; -def : Pat<(i32 (selectcc i32:$LHS, i32:$RHS, i32:$LHS, i32:$RHS, SETLT)), - (MINSWSXrr $LHS, $RHS)>; -def : Pat<(f64 (selectcc f64:$LHS, f64:$RHS, f64:$LHS, f64:$RHS, SETOLE)), - (FMINDrr $LHS, $RHS)>; -def : Pat<(f32 (selectcc f32:$LHS, f32:$RHS, f32:$LHS, f32:$RHS, SETOLE)), - (FMINSrr $LHS, $RHS)>; -def : Pat<(i64 (selectcc i64:$LHS, i64:$RHS, i64:$LHS, i64:$RHS, SETLE)), - (MINSLrr $LHS, $RHS)>; -def : Pat<(i32 (selectcc i32:$LHS, i32:$RHS, i32:$LHS, i32:$RHS, SETLE)), - (MINSWSXrr $LHS, $RHS)>; - // Helper classes to construct cmov patterns for the ease. // // Hiding INSERT_SUBREG/EXTRACT_SUBREG patterns. diff --git a/llvm/test/Analysis/MustExecute/pr57780.ll b/llvm/test/Analysis/MustExecute/pr57780.ll new file mode 100644 index 00000000000000..a7b47a1fb8b5f7 --- /dev/null +++ b/llvm/test/Analysis/MustExecute/pr57780.ll @@ -0,0 +1,55 @@ +; RUN: opt -disable-output -print-mustexecute < %s 2>&1 | FileCheck %s + +@c = global i16 0, align 2 + +; FIXME: miscompile +; CHECK-LABEL: define void @latch_cycle_irreducible +; CHECK: store i16 5, ptr @c, align 2 ; (mustexec in: loop) +define void @latch_cycle_irreducible() { +entry: + br label %loop + +loop: ; preds = %loop.latch, %entry + %v = phi i32 [ 10, %entry ], [ 0, %loop.latch ] + %c = icmp eq i32 %v, 0 + br i1 %c, label %loop.exit, label %loop.cont + +loop.cont: ; preds = %loop + br i1 false, label %loop.irreducible, label %loop.latch + +loop.irreducible: ; preds = %loop.latch, %loop.cont + store i16 5, ptr @c, align 2 + br label %loop.latch + +loop.latch: ; preds = %loop.irreducible, %loop.cont + br i1 false, label %loop.irreducible, label %loop + +loop.exit: ; preds = %loop + ret void +} + +; FIXME: miscompile +; CHECK-LABEL: define void @latch_cycle_reducible +; CHECK: store i16 5, ptr @c, align 2 ; (mustexec in: loop) +define void @latch_cycle_reducible() { +entry: + br label %loop + +loop: ; preds = %loop.latch, %entry + %v = phi i32 [ 10, %entry ], [ 0, %loop.latch ] + %c = icmp eq i32 %v, 0 + br i1 %c, label %loop.exit, label %loop2 + +loop2: ; preds = %loop.latch, %loop + br i1 false, label %loop2.cont, label %loop.latch + +loop2.cont: ; preds = %loop2 + store i16 5, ptr @c, align 2 + br label %loop.latch + +loop.latch: ; preds = %loop2.cont, %loop2 + br i1 false, label %loop2, label %loop + +loop.exit: ; preds = %loop + ret void +} diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-addadrp.ll b/llvm/test/CodeGen/AArch64/misched-fusion-addadrp.ll new file mode 100644 index 00000000000000..ed53c77e4c88c5 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/misched-fusion-addadrp.ll @@ -0,0 +1,37 @@ +; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=+fuse-adrp-add | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=generic | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a53 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a55 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a510 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a73 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a75 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a76 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a77 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a78 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a710 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-n1 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-v1 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-n2 | FileCheck %s + +@g = common local_unnamed_addr global i8* null, align 8 + +define dso_local i8* @addldr(i32 %a, i32 %b) { +; CHECK-LABEL: addldr: +; CHECK: adrp [[R:x[0-9]+]], addldr +; CHECK-NEXT: add {{x[0-9]+}}, [[R]], :lo12:addldr +entry: + %add = add nsw i32 %b, %a + %idx.ext = sext i32 %add to i64 + %add.ptr = getelementptr i8, i8* bitcast (i8* (i32, i32)* @addldr to i8*), i64 %idx.ext + store i8* %add.ptr, i8** @g, align 8 + ret i8* %add.ptr +} + + +define double @litf() { +; CHECK-LABEL: litf: +; CHECK: adrp [[ADDR:x[0-9]+]], [[CSTLABEL:.LCP.*]] +; CHECK-NEXT: ldr {{d[0-9]+}}, {{[[]}}[[ADDR]], :lo12:[[CSTLABEL]]{{[]]}} +entry: + ret double 0x400921FB54442D18 +} diff --git a/llvm/test/CodeGen/ARM/and-cmp0-sink.ll b/llvm/test/CodeGen/ARM/and-cmp0-sink.ll new file mode 100644 index 00000000000000..27203e274a4aa6 --- /dev/null +++ b/llvm/test/CodeGen/ARM/and-cmp0-sink.ll @@ -0,0 +1,396 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s --check-prefix V7M +; RUN: llc -mtriple=armv7a-eabi %s -o - | FileCheck %s --check-prefix V7A +; RUN: llc -mtriple=thumbv7a-eabi %s -o - | FileCheck %s --check-prefix V7A-T +; RUN: llc -mtriple=armv6m-eabi %s -o - | FileCheck %s --check-prefix V6M + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + +; Test sink of `and` instructions to fold in to `tst`, `lsls`, do cmov-bfi combine, etc. +define void @f(i32 %v, ptr noalias %outp) { +; V7M-LABEL: f: +; V7M: @ %bb.0: @ %entry +; V7M-NEXT: movs r2, #0 +; V7M-NEXT: str r2, [r1] +; V7M-NEXT: lsls r2, r0, #31 +; V7M-NEXT: bne .LBB0_3 +; V7M-NEXT: @ %bb.1: @ %if.then +; V7M-NEXT: tst.w r0, #14 +; V7M-NEXT: beq .LBB0_6 +; V7M-NEXT: @ %bb.2: +; V7M-NEXT: lsls r2, r0, #30 +; V7M-NEXT: mov.w r3, #33024 +; V7M-NEXT: and.w r2, r3, r2, asr #31 +; V7M-NEXT: lsrs r0, r0, #2 +; V7M-NEXT: bfi r2, r0, #7, #1 +; V7M-NEXT: bfi r2, r0, #14, #1 +; V7M-NEXT: b .LBB0_5 +; V7M-NEXT: .LBB0_3: @ %if.else +; V7M-NEXT: tst.w r0, #14 +; V7M-NEXT: it eq +; V7M-NEXT: bxeq lr +; V7M-NEXT: .LBB0_4: +; V7M-NEXT: lsls r2, r0, #30 +; V7M-NEXT: mov.w r3, #8256 +; V7M-NEXT: and.w r2, r3, r2, asr #31 +; V7M-NEXT: lsrs r0, r0, #2 +; V7M-NEXT: bfi r2, r0, #5, #1 +; V7M-NEXT: bfi r2, r0, #12, #1 +; V7M-NEXT: .LBB0_5: @ %if.end +; V7M-NEXT: str r2, [r1] +; V7M-NEXT: .LBB0_6: @ %exit +; V7M-NEXT: bx lr +; +; V7A-LABEL: f: +; V7A: @ %bb.0: @ %entry +; V7A-NEXT: mov r2, #0 +; V7A-NEXT: tst r0, #1 +; V7A-NEXT: str r2, [r1] +; V7A-NEXT: bne .LBB0_3 +; V7A-NEXT: @ %bb.1: @ %if.then +; V7A-NEXT: tst r0, #14 +; V7A-NEXT: beq .LBB0_6 +; V7A-NEXT: @ %bb.2: +; V7A-NEXT: lsl r2, r0, #30 +; V7A-NEXT: mov r3, #33024 +; V7A-NEXT: and r2, r3, r2, asr #31 +; V7A-NEXT: lsr r0, r0, #2 +; V7A-NEXT: bfi r2, r0, #7, #1 +; V7A-NEXT: bfi r2, r0, #14, #1 +; V7A-NEXT: b .LBB0_5 +; V7A-NEXT: .LBB0_3: @ %if.else +; V7A-NEXT: tst r0, #14 +; V7A-NEXT: bxeq lr +; V7A-NEXT: .LBB0_4: +; V7A-NEXT: lsl r2, r0, #30 +; V7A-NEXT: mov r3, #8256 +; V7A-NEXT: and r2, r3, r2, asr #31 +; V7A-NEXT: lsr r0, r0, #2 +; V7A-NEXT: bfi r2, r0, #5, #1 +; V7A-NEXT: bfi r2, r0, #12, #1 +; V7A-NEXT: .LBB0_5: @ %if.end +; V7A-NEXT: str r2, [r1] +; V7A-NEXT: .LBB0_6: @ %exit +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: f: +; V7A-T: @ %bb.0: @ %entry +; V7A-T-NEXT: movs r2, #0 +; V7A-T-NEXT: str r2, [r1] +; V7A-T-NEXT: lsls r2, r0, #31 +; V7A-T-NEXT: bne .LBB0_3 +; V7A-T-NEXT: @ %bb.1: @ %if.then +; V7A-T-NEXT: tst.w r0, #14 +; V7A-T-NEXT: beq .LBB0_6 +; V7A-T-NEXT: @ %bb.2: +; V7A-T-NEXT: lsls r2, r0, #30 +; V7A-T-NEXT: mov.w r3, #33024 +; V7A-T-NEXT: and.w r2, r3, r2, asr #31 +; V7A-T-NEXT: lsrs r0, r0, #2 +; V7A-T-NEXT: bfi r2, r0, #7, #1 +; V7A-T-NEXT: bfi r2, r0, #14, #1 +; V7A-T-NEXT: b .LBB0_5 +; V7A-T-NEXT: .LBB0_3: @ %if.else +; V7A-T-NEXT: tst.w r0, #14 +; V7A-T-NEXT: it eq +; V7A-T-NEXT: bxeq lr +; V7A-T-NEXT: .LBB0_4: +; V7A-T-NEXT: lsls r2, r0, #30 +; V7A-T-NEXT: mov.w r3, #8256 +; V7A-T-NEXT: and.w r2, r3, r2, asr #31 +; V7A-T-NEXT: lsrs r0, r0, #2 +; V7A-T-NEXT: bfi r2, r0, #5, #1 +; V7A-T-NEXT: bfi r2, r0, #12, #1 +; V7A-T-NEXT: .LBB0_5: @ %if.end +; V7A-T-NEXT: str r2, [r1] +; V7A-T-NEXT: .LBB0_6: @ %exit +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: f: +; V6M: @ %bb.0: @ %entry +; V6M-NEXT: .save {r4, lr} +; V6M-NEXT: push {r4, lr} +; V6M-NEXT: movs r2, #0 +; V6M-NEXT: str r2, [r1] +; V6M-NEXT: movs r3, #14 +; V6M-NEXT: ands r3, r0 +; V6M-NEXT: movs r4, #4 +; V6M-NEXT: ands r4, r0 +; V6M-NEXT: movs r2, #2 +; V6M-NEXT: ands r2, r0 +; V6M-NEXT: lsls r0, r0, #31 +; V6M-NEXT: bne .LBB0_5 +; V6M-NEXT: @ %bb.1: @ %if.then +; V6M-NEXT: movs r0, #129 +; V6M-NEXT: cmp r2, #0 +; V6M-NEXT: beq .LBB0_3 +; V6M-NEXT: @ %bb.2: +; V6M-NEXT: lsls r2, r0, #8 +; V6M-NEXT: .LBB0_3: @ %if.then +; V6M-NEXT: cmp r4, #0 +; V6M-NEXT: beq .LBB0_10 +; V6M-NEXT: @ %bb.4: @ %if.then +; V6M-NEXT: lsls r0, r0, #7 +; V6M-NEXT: b .LBB0_9 +; V6M-NEXT: .LBB0_5: @ %if.else +; V6M-NEXT: movs r0, #129 +; V6M-NEXT: cmp r2, #0 +; V6M-NEXT: beq .LBB0_7 +; V6M-NEXT: @ %bb.6: +; V6M-NEXT: lsls r2, r0, #6 +; V6M-NEXT: .LBB0_7: @ %if.else +; V6M-NEXT: cmp r4, #0 +; V6M-NEXT: beq .LBB0_10 +; V6M-NEXT: @ %bb.8: @ %if.else +; V6M-NEXT: lsls r0, r0, #5 +; V6M-NEXT: .LBB0_9: @ %if.else +; V6M-NEXT: adds r2, r2, r0 +; V6M-NEXT: .LBB0_10: @ %if.else +; V6M-NEXT: cmp r3, #0 +; V6M-NEXT: beq .LBB0_12 +; V6M-NEXT: @ %bb.11: @ %if.end +; V6M-NEXT: str r2, [r1] +; V6M-NEXT: .LBB0_12: @ %exit +; V6M-NEXT: pop {r4, pc} +entry: + store i32 0, ptr %outp, align 4 + %and = and i32 %v, 1 + %cmp = icmp eq i32 %and, 0 + %and1 = and i32 %v, 2 + %tobool.not = icmp eq i32 %and1, 0 + %and2 = and i32 %v, 4 + %tobool1.not = icmp eq i32 %and2, 0 + %and3 = and i32 %v, 14 + %tobool2.not = icmp eq i32 %and3, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %select = select i1 %tobool.not, i32 0, i32 33024 + %or = or i32 %select, 16512 + %spec.select = select i1 %tobool1.not, i32 %select, i32 %or + br i1 %tobool2.not, label %exit, label %if.end + +if.else: + %select1 = select i1 %tobool.not, i32 0, i32 8256 + %or1 = or i32 %select1, 4128 + %spec.select1 = select i1 %tobool1.not, i32 %select1, i32 %or1 + br i1 %tobool2.not, label %exit, label %if.end + +if.end: + %spec.select.sink = phi i32 [ %spec.select, %if.then ], [ %spec.select1, %if.else ] + store i32 %spec.select.sink, ptr %outp, align 4 + br label %exit + +exit: + ret void +} + +; Test with a mask that can be encoded with T32 instruction set, but not with A32. +define i32 @f0(i1 %c0, i32 %v) { +; V7M-LABEL: f0: +; V7M: @ %bb.0: @ %E +; V7M-NEXT: lsls r0, r0, #31 +; V7M-NEXT: beq .LBB1_2 +; V7M-NEXT: @ %bb.1: @ %A +; V7M-NEXT: tst.w r1, #16843009 +; V7M-NEXT: itt eq +; V7M-NEXT: moveq r0, #0 +; V7M-NEXT: bxeq lr +; V7M-NEXT: b .LBB1_3 +; V7M-NEXT: .LBB1_2: @ %B +; V7M-NEXT: tst.w r1, #16843009 +; V7M-NEXT: itt ne +; V7M-NEXT: movne r0, #0 +; V7M-NEXT: bxne lr +; V7M-NEXT: .LBB1_3: @ %D +; V7M-NEXT: movs r0, #1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: f0: +; V7A: @ %bb.0: @ %E +; V7A-NEXT: movw r2, #257 +; V7A-NEXT: tst r0, #1 +; V7A-NEXT: movt r2, #257 +; V7A-NEXT: and r1, r1, r2 +; V7A-NEXT: beq .LBB1_3 +; V7A-NEXT: @ %bb.1: @ %A +; V7A-NEXT: cmp r1, #0 +; V7A-NEXT: moveq r0, #0 +; V7A-NEXT: bxeq lr +; V7A-NEXT: .LBB1_2: @ %D +; V7A-NEXT: mov r0, #1 +; V7A-NEXT: bx lr +; V7A-NEXT: .LBB1_3: @ %B +; V7A-NEXT: mov r0, #0 +; V7A-NEXT: cmp r1, #0 +; V7A-NEXT: moveq r0, #1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: f0: +; V7A-T: @ %bb.0: @ %E +; V7A-T-NEXT: lsls r0, r0, #31 +; V7A-T-NEXT: beq .LBB1_2 +; V7A-T-NEXT: @ %bb.1: @ %A +; V7A-T-NEXT: tst.w r1, #16843009 +; V7A-T-NEXT: itt eq +; V7A-T-NEXT: moveq r0, #0 +; V7A-T-NEXT: bxeq lr +; V7A-T-NEXT: b .LBB1_3 +; V7A-T-NEXT: .LBB1_2: @ %B +; V7A-T-NEXT: tst.w r1, #16843009 +; V7A-T-NEXT: itt ne +; V7A-T-NEXT: movne r0, #0 +; V7A-T-NEXT: bxne lr +; V7A-T-NEXT: .LBB1_3: @ %D +; V7A-T-NEXT: movs r0, #1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: f0: +; V6M: @ %bb.0: @ %E +; V6M-NEXT: ldr r2, .LCPI1_0 +; V6M-NEXT: ands r2, r1 +; V6M-NEXT: lsls r0, r0, #31 +; V6M-NEXT: beq .LBB1_3 +; V6M-NEXT: @ %bb.1: @ %A +; V6M-NEXT: cmp r2, #0 +; V6M-NEXT: bne .LBB1_5 +; V6M-NEXT: @ %bb.2: +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: bx lr +; V6M-NEXT: .LBB1_3: @ %B +; V6M-NEXT: cmp r2, #0 +; V6M-NEXT: beq .LBB1_5 +; V6M-NEXT: @ %bb.4: +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: bx lr +; V6M-NEXT: .LBB1_5: @ %D +; V6M-NEXT: movs r0, #1 +; V6M-NEXT: bx lr +; V6M-NEXT: .p2align 2 +; V6M-NEXT: @ %bb.6: +; V6M-NEXT: .LCPI1_0: +; V6M-NEXT: .long 16843009 @ 0x1010101 +E: + %a = and i32 %v, 16843009 + br i1 %c0, label %A, label %B + +A: + %c1 = icmp eq i32 %a, 0 + br i1 %c1, label %C, label %D + +B: + %c2 = icmp eq i32 %a, 0 + br i1 %c2, label %D, label %C + +C: + br label %X + +D: + br label %X + +X: + %x = phi i32 [0, %C], [1, %D] + ret i32 %x +} + +; Test with a mask that can be encoded both with T32 and A32 instruction sets. +define i32 @f1(i1 %c0, i32 %v) { +; V7M-LABEL: f1: +; V7M: @ %bb.0: @ %E +; V7M-NEXT: lsls r0, r0, #31 +; V7M-NEXT: beq .LBB2_2 +; V7M-NEXT: @ %bb.1: @ %A +; V7M-NEXT: tst.w r1, #100663296 +; V7M-NEXT: itt eq +; V7M-NEXT: moveq r0, #0 +; V7M-NEXT: bxeq lr +; V7M-NEXT: b .LBB2_3 +; V7M-NEXT: .LBB2_2: @ %B +; V7M-NEXT: tst.w r1, #100663296 +; V7M-NEXT: itt ne +; V7M-NEXT: movne r0, #0 +; V7M-NEXT: bxne lr +; V7M-NEXT: .LBB2_3: @ %D +; V7M-NEXT: movs r0, #1 +; V7M-NEXT: bx lr +; +; V7A-LABEL: f1: +; V7A: @ %bb.0: @ %E +; V7A-NEXT: tst r0, #1 +; V7A-NEXT: beq .LBB2_3 +; V7A-NEXT: @ %bb.1: @ %A +; V7A-NEXT: tst r1, #100663296 +; V7A-NEXT: moveq r0, #0 +; V7A-NEXT: bxeq lr +; V7A-NEXT: .LBB2_2: @ %D +; V7A-NEXT: mov r0, #1 +; V7A-NEXT: bx lr +; V7A-NEXT: .LBB2_3: @ %B +; V7A-NEXT: mov r0, #0 +; V7A-NEXT: tst r1, #100663296 +; V7A-NEXT: moveq r0, #1 +; V7A-NEXT: bx lr +; +; V7A-T-LABEL: f1: +; V7A-T: @ %bb.0: @ %E +; V7A-T-NEXT: lsls r0, r0, #31 +; V7A-T-NEXT: beq .LBB2_2 +; V7A-T-NEXT: @ %bb.1: @ %A +; V7A-T-NEXT: tst.w r1, #100663296 +; V7A-T-NEXT: itt eq +; V7A-T-NEXT: moveq r0, #0 +; V7A-T-NEXT: bxeq lr +; V7A-T-NEXT: b .LBB2_3 +; V7A-T-NEXT: .LBB2_2: @ %B +; V7A-T-NEXT: tst.w r1, #100663296 +; V7A-T-NEXT: itt ne +; V7A-T-NEXT: movne r0, #0 +; V7A-T-NEXT: bxne lr +; V7A-T-NEXT: .LBB2_3: @ %D +; V7A-T-NEXT: movs r0, #1 +; V7A-T-NEXT: bx lr +; +; V6M-LABEL: f1: +; V6M: @ %bb.0: @ %E +; V6M-NEXT: movs r2, #3 +; V6M-NEXT: lsls r2, r2, #25 +; V6M-NEXT: ands r2, r1 +; V6M-NEXT: lsls r0, r0, #31 +; V6M-NEXT: beq .LBB2_3 +; V6M-NEXT: @ %bb.1: @ %A +; V6M-NEXT: cmp r2, #0 +; V6M-NEXT: bne .LBB2_5 +; V6M-NEXT: @ %bb.2: +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: bx lr +; V6M-NEXT: .LBB2_3: @ %B +; V6M-NEXT: cmp r2, #0 +; V6M-NEXT: beq .LBB2_5 +; V6M-NEXT: @ %bb.4: +; V6M-NEXT: movs r0, #0 +; V6M-NEXT: bx lr +; V6M-NEXT: .LBB2_5: @ %D +; V6M-NEXT: movs r0, #1 +; V6M-NEXT: bx lr +E: + %a = and i32 %v, 100663296 + br i1 %c0, label %A, label %B + +A: + %c1 = icmp eq i32 %a, 0 + br i1 %c1, label %C, label %D + +B: + %c2 = icmp eq i32 %a, 0 + br i1 %c2, label %D, label %C + +C: + br label %X + +D: + br label %X + +X: + %x = phi i32 [0, %C], [1, %D] + ret i32 %x +} diff --git a/llvm/test/CodeGen/LoongArch/double-fcmp-strict.ll b/llvm/test/CodeGen/LoongArch/double-fcmp-strict.ll new file mode 100644 index 00000000000000..066f60752e2a75 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/double-fcmp-strict.ll @@ -0,0 +1,243 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 + +declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata) + +define i32 @fcmp_oeq(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_oeq: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_oeq: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ogt(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ogt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ogt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ogt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_oge(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_oge: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_oge: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oge", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_olt(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_olt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_olt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"olt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ole(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ole: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ole: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ole", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_one(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_one: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_one: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"one", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ord(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ord: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ord: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ord", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ueq(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ueq: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ueq: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ueq", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ugt(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ugt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ugt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ugt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_uge(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_uge: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_uge: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"uge", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ult(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ult: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ult: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ult", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ule(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ule: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ule: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ule", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_une(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_une: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_une: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"une", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_uno(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_uno: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_uno: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"uno", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} diff --git a/llvm/test/CodeGen/LoongArch/double-fcmps-strict.ll b/llvm/test/CodeGen/LoongArch/double-fcmps-strict.ll new file mode 100644 index 00000000000000..c8974fb946222a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/double-fcmps-strict.ll @@ -0,0 +1,482 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 + +declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata) + +define i32 @fcmps_oeq(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_oeq: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.seq.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_oeq: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.seq.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ogt(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_ogt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.slt.d $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ogt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.slt.d $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ogt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_oge(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_oge: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sle.d $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_oge: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sle.d $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oge", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_olt(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_olt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.slt.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_olt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.slt.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"olt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ole(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_ole: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sle.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ole: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sle.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ole", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_one(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_one: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sne.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_one: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sne.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"one", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ord(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_ord: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sor.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ord: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sor.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ord", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ueq(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_ueq: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sueq.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ueq: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sueq.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ueq", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ugt(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_ugt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sult.d $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ugt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sult.d $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ugt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_uge(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_uge: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sule.d $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_uge: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sule.d $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"uge", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ult(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_ult: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sult.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ult: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sult.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ult", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ule(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_ule: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sule.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ule: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sule.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ule", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_une(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_une: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sune.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_une: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sune.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"une", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_uno(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmps_uno: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sun.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_uno: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sun.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"uno", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_oeq(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_oeq: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_oeq: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ogt(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ogt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ogt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ogt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_oge(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_oge: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_oge: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oge", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_olt(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_olt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_olt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"olt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ole(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ole: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ole: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ole", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_one(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_one: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_one: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"one", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ord(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ord: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ord: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ord", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ueq(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ueq: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ueq: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ueq", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ugt(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ugt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ugt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ugt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_uge(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_uge: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_uge: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"uge", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ult(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ult: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ult: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ult", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ule(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_ule: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ule: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ule", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_une(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_une: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_une: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"une", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_uno(double %a, double %b) nounwind strictfp { +; LA32-LABEL: fcmp_uno: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_uno: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"uno", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} diff --git a/llvm/test/CodeGen/LoongArch/float-fcmp-strict.ll b/llvm/test/CodeGen/LoongArch/float-fcmp-strict.ll new file mode 100644 index 00000000000000..0459d5019378f2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/float-fcmp-strict.ll @@ -0,0 +1,243 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64 + +declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata) + +define i32 @fcmp_oeq(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_oeq: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_oeq: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"oeq", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ogt(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ogt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ogt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ogt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_oge(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_oge: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_oge: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"oge", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_olt(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_olt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_olt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"olt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ole(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ole: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ole: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ole", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_one(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_one: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_one: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"one", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ord(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ord: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ord: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ord", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ueq(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ueq: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ueq: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ueq", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ugt(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ugt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ugt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ugt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_uge(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_uge: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_uge: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"uge", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ult(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ult: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ult: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ult", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ule(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ule: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ule: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ule", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_une(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_une: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_une: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"une", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_uno(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_uno: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_uno: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"uno", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} diff --git a/llvm/test/CodeGen/LoongArch/float-fcmps-strict.ll b/llvm/test/CodeGen/LoongArch/float-fcmps-strict.ll new file mode 100644 index 00000000000000..cad4d45c147ee4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/float-fcmps-strict.ll @@ -0,0 +1,482 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64 + +declare i1 @llvm.experimental.constrained.fcmps.f32(float, float, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata) + +define i32 @fcmps_oeq(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_oeq: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.seq.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_oeq: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.seq.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"oeq", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ogt(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_ogt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.slt.s $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ogt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.slt.s $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ogt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_oge(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_oge: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sle.s $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_oge: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sle.s $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"oge", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_olt(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_olt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.slt.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_olt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.slt.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"olt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ole(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_ole: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sle.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ole: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sle.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ole", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_one(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_one: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sne.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_one: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sne.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"one", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ord(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_ord: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sor.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ord: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sor.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ord", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ueq(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_ueq: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sueq.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ueq: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sueq.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ueq", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ugt(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_ugt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sult.s $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ugt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sult.s $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ugt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_uge(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_uge: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sule.s $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_uge: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sule.s $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"uge", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ult(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_ult: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sult.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ult: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sult.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ult", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_ule(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_ule: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sule.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_ule: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sule.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ule", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_une(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_une: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sune.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_une: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sune.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"une", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmps_uno(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmps_uno: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.sun.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmps_uno: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.sun.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"uno", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_oeq(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_oeq: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_oeq: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"oeq", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ogt(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ogt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ogt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ogt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_oge(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_oge: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_oge: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"oge", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_olt(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_olt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_olt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"olt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ole(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ole: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ole: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ole", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_one(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_one: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_one: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"one", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ord(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ord: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ord: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ord", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ueq(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ueq: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ueq: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ueq", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ugt(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ugt: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ugt: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ugt", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_uge(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_uge: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_uge: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"uge", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ult(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ult: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ult: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ult", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ule(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_ule: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_ule: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ule", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_une(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_une: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_une: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"une", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_uno(float %a, float %b) nounwind strictfp { +; LA32-LABEL: fcmp_uno: +; LA32: # %bb.0: +; LA32-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: ret +; +; LA64-LABEL: fcmp_uno: +; LA64: # %bb.0: +; LA64-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: ret + %1 = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"uno", metadata !"fpexcept.strict") strictfp + %2 = zext i1 %1 to i32 + ret i32 %2 +} diff --git a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll index c089034f932655..f25ef272e483b1 100644 --- a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll @@ -1426,12 +1426,11 @@ define void @arm_biquad_cascade_df2T_f16(%struct.arm_biquad_cascade_df2T_instanc ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} -; CHECK-NEXT: ldrd r12, r6, [r0, #4] ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: ldrb r0, [r0] -; CHECK-NEXT: and r8, r3, #1 +; CHECK-NEXT: ldrd r12, r6, [r0, #4] +; CHECK-NEXT: ldrb.w r9, [r0] ; CHECK-NEXT: vldr.16 s0, .LCPI17_0 -; CHECK-NEXT: lsr.w r9, r3, #1 +; CHECK-NEXT: lsr.w r8, r3, #1 ; CHECK-NEXT: b .LBB17_3 ; CHECK-NEXT: .LBB17_1: @ %if.else ; CHECK-NEXT: @ in Loop: Header=BB17_3 Depth=1 @@ -1441,7 +1440,7 @@ define void @arm_biquad_cascade_df2T_f16(%struct.arm_biquad_cascade_df2T_instanc ; CHECK-NEXT: @ in Loop: Header=BB17_3 Depth=1 ; CHECK-NEXT: vstr.16 s5, [r12, #2] ; CHECK-NEXT: adds r6, #10 -; CHECK-NEXT: subs r0, #1 +; CHECK-NEXT: subs.w r9, r9, #1 ; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: mov r1, r2 ; CHECK-NEXT: beq .LBB17_8 @@ -1458,7 +1457,7 @@ define void @arm_biquad_cascade_df2T_f16(%struct.arm_biquad_cascade_df2T_instanc ; CHECK-NEXT: vldrh.u16 q1, [r12] ; CHECK-NEXT: vmov.f32 s5, s1 ; CHECK-NEXT: mov r5, r2 -; CHECK-NEXT: wls lr, r9, .LBB17_6 +; CHECK-NEXT: wls lr, r8, .LBB17_6 ; CHECK-NEXT: @ %bb.4: @ %while.body.preheader ; CHECK-NEXT: @ in Loop: Header=BB17_3 Depth=1 ; CHECK-NEXT: mov r5, r2 @@ -1466,7 +1465,7 @@ define void @arm_biquad_cascade_df2T_f16(%struct.arm_biquad_cascade_df2T_instanc ; CHECK-NEXT: @ Parent Loop BB17_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: ldrh r7, [r1], #4 -; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vfma.f16 q1, q2, r7 ; CHECK-NEXT: ldrh r4, [r1, #-2] ; CHECK-NEXT: vmov.u16 r7, q1[0] @@ -1478,19 +1477,19 @@ define void @arm_biquad_cascade_df2T_f16(%struct.arm_biquad_cascade_df2T_instanc ; CHECK-NEXT: strh r4, [r5, #2] ; CHECK-NEXT: vmov.f32 s4, s5 ; CHECK-NEXT: strh r7, [r5], #4 -; CHECK-NEXT: vmov.16 q1[2], r3 +; CHECK-NEXT: vmov.16 q1[2], r0 ; CHECK-NEXT: le lr, .LBB17_5 ; CHECK-NEXT: .LBB17_6: @ %while.end ; CHECK-NEXT: @ in Loop: Header=BB17_3 Depth=1 -; CHECK-NEXT: cmp.w r8, #0 +; CHECK-NEXT: lsls r0, r3, #31 ; CHECK-NEXT: beq .LBB17_1 ; CHECK-NEXT: @ %bb.7: @ %if.then ; CHECK-NEXT: @ in Loop: Header=BB17_3 Depth=1 -; CHECK-NEXT: ldrh r1, [r1] -; CHECK-NEXT: vfma.f16 q1, q2, r1 -; CHECK-NEXT: vmov.u16 r1, q1[0] -; CHECK-NEXT: vfma.f16 q1, q3, r1 -; CHECK-NEXT: strh r1, [r5] +; CHECK-NEXT: ldrh r0, [r1] +; CHECK-NEXT: vfma.f16 q1, q2, r0 +; CHECK-NEXT: vmov.u16 r0, q1[0] +; CHECK-NEXT: vfma.f16 q1, q3, r0 +; CHECK-NEXT: strh r0, [r5] ; CHECK-NEXT: vmovx.f16 s2, s4 ; CHECK-NEXT: vstr.16 s2, [r12] ; CHECK-NEXT: b .LBB17_2 diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll index ee91dcce9a7c8a..aff4bb32901f9f 100644 --- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -2015,9 +2015,8 @@ define void @arm_biquad_cascade_df2T_f32(%struct.arm_biquad_cascade_df2T_instanc ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: ldrd r12, r6, [r0, #4] -; CHECK-NEXT: and r8, r3, #1 +; CHECK-NEXT: lsr.w r8, r3, #1 ; CHECK-NEXT: ldrb r0, [r0] -; CHECK-NEXT: lsrs r3, r3, #1 ; CHECK-NEXT: vldr s0, .LCPI20_0 ; CHECK-NEXT: b .LBB20_3 ; CHECK-NEXT: .LBB20_1: @ %if.else @@ -2046,7 +2045,7 @@ define void @arm_biquad_cascade_df2T_f32(%struct.arm_biquad_cascade_df2T_instanc ; CHECK-NEXT: vmov.f32 s6, s0 ; CHECK-NEXT: mov r5, r2 ; CHECK-NEXT: vmov.f32 s7, s0 -; CHECK-NEXT: wls lr, r3, .LBB20_6 +; CHECK-NEXT: wls lr, r8, .LBB20_6 ; CHECK-NEXT: @ %bb.4: @ %while.body.preheader ; CHECK-NEXT: @ in Loop: Header=BB20_3 Depth=1 ; CHECK-NEXT: vmov q6, q1 @@ -2073,7 +2072,7 @@ define void @arm_biquad_cascade_df2T_f32(%struct.arm_biquad_cascade_df2T_instanc ; CHECK-NEXT: le lr, .LBB20_5 ; CHECK-NEXT: .LBB20_6: @ %while.end ; CHECK-NEXT: @ in Loop: Header=BB20_3 Depth=1 -; CHECK-NEXT: cmp.w r8, #0 +; CHECK-NEXT: lsls r7, r3, #31 ; CHECK-NEXT: beq .LBB20_1 ; CHECK-NEXT: @ %bb.7: @ %if.then ; CHECK-NEXT: @ in Loop: Header=BB20_3 Depth=1 diff --git a/llvm/test/CodeGen/VE/Scalar/atomic.ll b/llvm/test/CodeGen/VE/Scalar/atomic.ll index f01bf0ff66c3f9..405cdc3f369376 100644 --- a/llvm/test/CodeGen/VE/Scalar/atomic.ll +++ b/llvm/test/CodeGen/VE/Scalar/atomic.ll @@ -184,13 +184,12 @@ define signext i32 @test_atomic_fetch_max_4() { ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s1, i@hi(, %s0) ; CHECK-NEXT: ldl.sx %s0, (, %s1) -; CHECK-NEXT: or %s2, 1, (0)1 ; CHECK-NEXT: .LBB6_1: # %atomicrmw.start ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: or %s3, 0, %s0 -; CHECK-NEXT: maxs.w.sx %s0, %s0, %s2 -; CHECK-NEXT: cas.w %s0, (%s1), %s3 -; CHECK-NEXT: brne.w %s0, %s3, .LBB6_1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: maxs.w.sx %s0, 1, %s0 +; CHECK-NEXT: cas.w %s0, (%s1), %s2 +; CHECK-NEXT: brne.w %s0, %s2, .LBB6_1 ; CHECK-NEXT: # %bb.2: # %atomicrmw.end ; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: fencem 3 @@ -207,19 +206,16 @@ define signext i32 @test_atomic_fetch_min_4() { ; CHECK-NEXT: fencem 3 ; CHECK-NEXT: lea %s0, i@lo ; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lea.sl %s0, i@hi(, %s0) -; CHECK-NEXT: ldl.sx %s1, (, %s0) -; CHECK-NEXT: or %s2, 2, (0)1 +; CHECK-NEXT: lea.sl %s1, i@hi(, %s0) +; CHECK-NEXT: ldl.sx %s0, (, %s1) ; CHECK-NEXT: .LBB7_1: # %atomicrmw.start ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: or %s3, 0, %s1 -; CHECK-NEXT: cmps.w.sx %s4, %s1, %s2 -; CHECK-NEXT: or %s1, 1, (0)1 -; CHECK-NEXT: cmov.w.lt %s1, %s3, %s4 -; CHECK-NEXT: cas.w %s1, (%s0), %s3 -; CHECK-NEXT: brne.w %s1, %s3, .LBB7_1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: mins.w.sx %s0, 1, %s0 +; CHECK-NEXT: cas.w %s0, (%s1), %s2 +; CHECK-NEXT: brne.w %s0, %s2, .LBB7_1 ; CHECK-NEXT: # %bb.2: # %atomicrmw.end -; CHECK-NEXT: adds.w.sx %s0, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: fencem 3 ; CHECK-NEXT: b.l.t (, %s10) entry: diff --git a/llvm/test/CodeGen/VE/Scalar/max.ll b/llvm/test/CodeGen/VE/Scalar/max.ll index 5b2834ef087313..12aa101cb48c4d 100644 --- a/llvm/test/CodeGen/VE/Scalar/max.ll +++ b/llvm/test/CodeGen/VE/Scalar/max.ll @@ -1,11 +1,20 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s +; RUN: llc < %s -mtriple=ve-unknown-unknown -enable-no-signed-zeros-fp-math \ +; RUN: -enable-no-nans-fp-math | FileCheck %s -check-prefix=OPT define double @maxf64(double, double) { ; CHECK-LABEL: maxf64: ; CHECK: # %bb.0: -; CHECK-NEXT: fmax.d %s0, %s0, %s1 +; CHECK-NEXT: fcmp.d %s2, %s0, %s1 +; CHECK-NEXT: cmov.d.gt %s1, %s0, %s2 +; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: maxf64: +; OPT: # %bb.0: +; OPT-NEXT: fmax.d %s0, %s0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = fcmp ogt double %0, %1 %4 = select i1 %3, double %0, double %1 ret double %4 @@ -14,8 +23,15 @@ define double @maxf64(double, double) { define double @max2f64(double, double) { ; CHECK-LABEL: max2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: fmax.d %s0, %s0, %s1 +; CHECK-NEXT: fcmp.d %s2, %s0, %s1 +; CHECK-NEXT: cmov.d.ge %s1, %s0, %s2 +; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: max2f64: +; OPT: # %bb.0: +; OPT-NEXT: fmax.d %s0, %s0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = fcmp oge double %0, %1 %4 = select i1 %3, double %0, double %1 ret double %4 @@ -29,6 +45,11 @@ define double @maxuf64(double, double) { ; CHECK-NEXT: cmov.d.gtnan %s1, %s0, %s2 ; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: maxuf64: +; OPT: # %bb.0: +; OPT-NEXT: fmax.d %s0, %s0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = fcmp ugt double %0, %1 %4 = select i1 %3, double %0, double %1 ret double %4 @@ -42,6 +63,11 @@ define double @max2uf64(double, double) { ; CHECK-NEXT: cmov.d.genan %s1, %s0, %s2 ; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: max2uf64: +; OPT: # %bb.0: +; OPT-NEXT: fmax.d %s0, %s0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = fcmp uge double %0, %1 %4 = select i1 %3, double %0, double %1 ret double %4 @@ -50,8 +76,15 @@ define double @max2uf64(double, double) { define float @maxf32(float, float) { ; CHECK-LABEL: maxf32: ; CHECK: # %bb.0: -; CHECK-NEXT: fmax.s %s0, %s0, %s1 +; CHECK-NEXT: fcmp.s %s2, %s0, %s1 +; CHECK-NEXT: cmov.s.gt %s1, %s0, %s2 +; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: maxf32: +; OPT: # %bb.0: +; OPT-NEXT: fmax.s %s0, %s0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = fcmp ogt float %0, %1 %4 = select i1 %3, float %0, float %1 ret float %4 @@ -60,8 +93,15 @@ define float @maxf32(float, float) { define float @max2f32(float, float) { ; CHECK-LABEL: max2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: fmax.s %s0, %s0, %s1 +; CHECK-NEXT: fcmp.s %s2, %s0, %s1 +; CHECK-NEXT: cmov.s.ge %s1, %s0, %s2 +; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: max2f32: +; OPT: # %bb.0: +; OPT-NEXT: fmax.s %s0, %s0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = fcmp oge float %0, %1 %4 = select i1 %3, float %0, float %1 ret float %4 @@ -74,6 +114,11 @@ define float @maxuf32(float, float) { ; CHECK-NEXT: cmov.s.gtnan %s1, %s0, %s2 ; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: maxuf32: +; OPT: # %bb.0: +; OPT-NEXT: fmax.s %s0, %s0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = fcmp ugt float %0, %1 %4 = select i1 %3, float %0, float %1 ret float %4 @@ -86,6 +131,11 @@ define float @max2uf32(float, float) { ; CHECK-NEXT: cmov.s.genan %s1, %s0, %s2 ; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: max2uf32: +; OPT: # %bb.0: +; OPT-NEXT: fmax.s %s0, %s0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = fcmp uge float %0, %1 %4 = select i1 %3, float %0, float %1 ret float %4 @@ -96,6 +146,11 @@ define i64 @maxi64(i64, i64) { ; CHECK: # %bb.0: ; CHECK-NEXT: maxs.l %s0, %s0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: maxi64: +; OPT: # %bb.0: +; OPT-NEXT: maxs.l %s0, %s0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = icmp sgt i64 %0, %1 %4 = select i1 %3, i64 %0, i64 %1 ret i64 %4 @@ -106,6 +161,11 @@ define i64 @max2i64(i64, i64) { ; CHECK: # %bb.0: ; CHECK-NEXT: maxs.l %s0, %s0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: max2i64: +; OPT: # %bb.0: +; OPT-NEXT: maxs.l %s0, %s0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = icmp sge i64 %0, %1 %4 = select i1 %3, i64 %0, i64 %1 ret i64 %4 @@ -118,6 +178,13 @@ define i64 @maxu64(i64, i64) { ; CHECK-NEXT: cmov.l.gt %s1, %s0, %s2 ; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: maxu64: +; OPT: # %bb.0: +; OPT-NEXT: cmpu.l %s2, %s0, %s1 +; OPT-NEXT: cmov.l.gt %s1, %s0, %s2 +; OPT-NEXT: or %s0, 0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = icmp ugt i64 %0, %1 %4 = select i1 %3, i64 %0, i64 %1 ret i64 %4 @@ -130,6 +197,13 @@ define i64 @max2u64(i64, i64) { ; CHECK-NEXT: cmov.l.ge %s1, %s0, %s2 ; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: max2u64: +; OPT: # %bb.0: +; OPT-NEXT: cmpu.l %s2, %s0, %s1 +; OPT-NEXT: cmov.l.ge %s1, %s0, %s2 +; OPT-NEXT: or %s0, 0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = icmp uge i64 %0, %1 %4 = select i1 %3, i64 %0, i64 %1 ret i64 %4 @@ -140,6 +214,11 @@ define i32 @maxi32(i32, i32) { ; CHECK: # %bb.0: ; CHECK-NEXT: maxs.w.sx %s0, %s0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: maxi32: +; OPT: # %bb.0: +; OPT-NEXT: maxs.w.sx %s0, %s0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = icmp sgt i32 %0, %1 %4 = select i1 %3, i32 %0, i32 %1 ret i32 %4 @@ -150,6 +229,11 @@ define i32 @max2i32(i32, i32) { ; CHECK: # %bb.0: ; CHECK-NEXT: maxs.w.sx %s0, %s0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: max2i32: +; OPT: # %bb.0: +; OPT-NEXT: maxs.w.sx %s0, %s0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = icmp sge i32 %0, %1 %4 = select i1 %3, i32 %0, i32 %1 ret i32 %4 @@ -162,6 +246,13 @@ define i32 @maxu32(i32, i32) { ; CHECK-NEXT: cmov.w.gt %s1, %s0, %s2 ; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: maxu32: +; OPT: # %bb.0: +; OPT-NEXT: cmpu.w %s2, %s0, %s1 +; OPT-NEXT: cmov.w.gt %s1, %s0, %s2 +; OPT-NEXT: or %s0, 0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = icmp ugt i32 %0, %1 %4 = select i1 %3, i32 %0, i32 %1 ret i32 %4 @@ -174,6 +265,13 @@ define i32 @max2u32(i32, i32) { ; CHECK-NEXT: cmov.w.ge %s1, %s0, %s2 ; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: max2u32: +; OPT: # %bb.0: +; OPT-NEXT: cmpu.w %s2, %s0, %s1 +; OPT-NEXT: cmov.w.ge %s1, %s0, %s2 +; OPT-NEXT: or %s0, 0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = icmp uge i32 %0, %1 %4 = select i1 %3, i32 %0, i32 %1 ret i32 %4 @@ -184,6 +282,11 @@ define zeroext i1 @maxi1(i1 zeroext, i1 zeroext) { ; CHECK: # %bb.0: ; CHECK-NEXT: or %s0, %s0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: maxi1: +; OPT: # %bb.0: +; OPT-NEXT: or %s0, %s0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = xor i1 %1, true %4 = and i1 %3, %0 %5 = select i1 %4, i1 %0, i1 %1 diff --git a/llvm/test/CodeGen/VE/Scalar/min.ll b/llvm/test/CodeGen/VE/Scalar/min.ll index 866a5d6c2b914a..da92ebafd05903 100644 --- a/llvm/test/CodeGen/VE/Scalar/min.ll +++ b/llvm/test/CodeGen/VE/Scalar/min.ll @@ -1,10 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s +; RUN: llc < %s -mtriple=ve-unknown-unknown -enable-no-signed-zeros-fp-math \ +; RUN: -enable-no-nans-fp-math | FileCheck %s -check-prefix=OPT define double @minf64(double, double) { ; CHECK-LABEL: minf64: ; CHECK: # %bb.0: -; CHECK-NEXT: fmin.d %s0, %s0, %s1 +; CHECK-NEXT: fcmp.d %s2, %s0, %s1 +; CHECK-NEXT: cmov.d.lt %s1, %s0, %s2 +; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: minf64: +; OPT: # %bb.0: +; OPT-NEXT: fmin.d %s0, %s0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = fcmp olt double %0, %1 %4 = select i1 %3, double %0, double %1 ret double %4 @@ -13,8 +23,15 @@ define double @minf64(double, double) { define double @min2f64(double, double) { ; CHECK-LABEL: min2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: fmin.d %s0, %s0, %s1 +; CHECK-NEXT: fcmp.d %s2, %s0, %s1 +; CHECK-NEXT: cmov.d.le %s1, %s0, %s2 +; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: min2f64: +; OPT: # %bb.0: +; OPT-NEXT: fmin.d %s0, %s0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = fcmp ole double %0, %1 %4 = select i1 %3, double %0, double %1 ret double %4 @@ -27,6 +44,11 @@ define double @minuf64(double, double) { ; CHECK-NEXT: cmov.d.ltnan %s1, %s0, %s2 ; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: minuf64: +; OPT: # %bb.0: +; OPT-NEXT: fmin.d %s0, %s0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = fcmp ult double %0, %1 %4 = select i1 %3, double %0, double %1 ret double %4 @@ -39,6 +61,11 @@ define double @min2uf64(double, double) { ; CHECK-NEXT: cmov.d.lenan %s1, %s0, %s2 ; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: min2uf64: +; OPT: # %bb.0: +; OPT-NEXT: fmin.d %s0, %s0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = fcmp ule double %0, %1 %4 = select i1 %3, double %0, double %1 ret double %4 @@ -47,8 +74,15 @@ define double @min2uf64(double, double) { define float @minf32(float, float) { ; CHECK-LABEL: minf32: ; CHECK: # %bb.0: -; CHECK-NEXT: fmin.s %s0, %s0, %s1 +; CHECK-NEXT: fcmp.s %s2, %s0, %s1 +; CHECK-NEXT: cmov.s.lt %s1, %s0, %s2 +; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: minf32: +; OPT: # %bb.0: +; OPT-NEXT: fmin.s %s0, %s0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = fcmp olt float %0, %1 %4 = select i1 %3, float %0, float %1 ret float %4 @@ -57,8 +91,15 @@ define float @minf32(float, float) { define float @min2f32(float, float) { ; CHECK-LABEL: min2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: fmin.s %s0, %s0, %s1 +; CHECK-NEXT: fcmp.s %s2, %s0, %s1 +; CHECK-NEXT: cmov.s.le %s1, %s0, %s2 +; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: min2f32: +; OPT: # %bb.0: +; OPT-NEXT: fmin.s %s0, %s0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = fcmp ole float %0, %1 %4 = select i1 %3, float %0, float %1 ret float %4 @@ -71,6 +112,11 @@ define float @minuf32(float, float) { ; CHECK-NEXT: cmov.s.ltnan %s1, %s0, %s2 ; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: minuf32: +; OPT: # %bb.0: +; OPT-NEXT: fmin.s %s0, %s0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = fcmp ult float %0, %1 %4 = select i1 %3, float %0, float %1 ret float %4 @@ -83,6 +129,11 @@ define float @min2uf32(float, float) { ; CHECK-NEXT: cmov.s.lenan %s1, %s0, %s2 ; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: min2uf32: +; OPT: # %bb.0: +; OPT-NEXT: fmin.s %s0, %s0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = fcmp ule float %0, %1 %4 = select i1 %3, float %0, float %1 ret float %4 @@ -93,6 +144,11 @@ define i64 @mini64(i64, i64) { ; CHECK: # %bb.0: ; CHECK-NEXT: mins.l %s0, %s0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: mini64: +; OPT: # %bb.0: +; OPT-NEXT: mins.l %s0, %s0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = icmp slt i64 %0, %1 %4 = select i1 %3, i64 %0, i64 %1 ret i64 %4 @@ -103,6 +159,11 @@ define i64 @min2i64(i64, i64) { ; CHECK: # %bb.0: ; CHECK-NEXT: mins.l %s0, %s0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: min2i64: +; OPT: # %bb.0: +; OPT-NEXT: mins.l %s0, %s0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = icmp sle i64 %0, %1 %4 = select i1 %3, i64 %0, i64 %1 ret i64 %4 @@ -115,6 +176,13 @@ define i64 @minu64(i64, i64) { ; CHECK-NEXT: cmov.l.lt %s1, %s0, %s2 ; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: minu64: +; OPT: # %bb.0: +; OPT-NEXT: cmpu.l %s2, %s0, %s1 +; OPT-NEXT: cmov.l.lt %s1, %s0, %s2 +; OPT-NEXT: or %s0, 0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = icmp ult i64 %0, %1 %4 = select i1 %3, i64 %0, i64 %1 ret i64 %4 @@ -127,6 +195,13 @@ define i64 @min2u64(i64, i64) { ; CHECK-NEXT: cmov.l.le %s1, %s0, %s2 ; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: min2u64: +; OPT: # %bb.0: +; OPT-NEXT: cmpu.l %s2, %s0, %s1 +; OPT-NEXT: cmov.l.le %s1, %s0, %s2 +; OPT-NEXT: or %s0, 0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = icmp ule i64 %0, %1 %4 = select i1 %3, i64 %0, i64 %1 ret i64 %4 @@ -137,6 +212,11 @@ define i32 @mini32(i32, i32) { ; CHECK: # %bb.0: ; CHECK-NEXT: mins.w.sx %s0, %s0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: mini32: +; OPT: # %bb.0: +; OPT-NEXT: mins.w.sx %s0, %s0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = icmp slt i32 %0, %1 %4 = select i1 %3, i32 %0, i32 %1 ret i32 %4 @@ -147,6 +227,11 @@ define i32 @min2i32(i32, i32) { ; CHECK: # %bb.0: ; CHECK-NEXT: mins.w.sx %s0, %s0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: min2i32: +; OPT: # %bb.0: +; OPT-NEXT: mins.w.sx %s0, %s0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = icmp sle i32 %0, %1 %4 = select i1 %3, i32 %0, i32 %1 ret i32 %4 @@ -159,6 +244,13 @@ define i32 @minu32(i32, i32) { ; CHECK-NEXT: cmov.w.lt %s1, %s0, %s2 ; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: minu32: +; OPT: # %bb.0: +; OPT-NEXT: cmpu.w %s2, %s0, %s1 +; OPT-NEXT: cmov.w.lt %s1, %s0, %s2 +; OPT-NEXT: or %s0, 0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = icmp ult i32 %0, %1 %4 = select i1 %3, i32 %0, i32 %1 ret i32 %4 @@ -171,6 +263,13 @@ define i32 @min2u32(i32, i32) { ; CHECK-NEXT: cmov.w.le %s1, %s0, %s2 ; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: min2u32: +; OPT: # %bb.0: +; OPT-NEXT: cmpu.w %s2, %s0, %s1 +; OPT-NEXT: cmov.w.le %s1, %s0, %s2 +; OPT-NEXT: or %s0, 0, %s1 +; OPT-NEXT: b.l.t (, %s10) %3 = icmp ule i32 %0, %1 %4 = select i1 %3, i32 %0, i32 %1 ret i32 %4 @@ -183,6 +282,13 @@ define zeroext i1 @mini1(i1 zeroext, i1 zeroext) { ; CHECK-NEXT: cmov.w.ne %s2, %s1, %s0 ; CHECK-NEXT: adds.w.zx %s0, %s2, (0)1 ; CHECK-NEXT: b.l.t (, %s10) +; +; OPT-LABEL: mini1: +; OPT: # %bb.0: +; OPT-NEXT: and %s2, %s1, %s0 +; OPT-NEXT: cmov.w.ne %s2, %s1, %s0 +; OPT-NEXT: adds.w.zx %s0, %s2, (0)1 +; OPT-NEXT: b.l.t (, %s10) %3 = xor i1 %0, true %4 = and i1 %3, %1 %5 = select i1 %4, i1 %0, i1 %1 diff --git a/llvm/test/CodeGen/VE/Scalar/smax.ll b/llvm/test/CodeGen/VE/Scalar/smax.ll new file mode 100644 index 00000000000000..f989e0434b59d7 --- /dev/null +++ b/llvm/test/CodeGen/VE/Scalar/smax.ll @@ -0,0 +1,337 @@ +; RUN: llc < %s -mtriple=ve | FileCheck %s + +;;; Test ‘llvm.smax.*’ intrinsic +;;; +;;; Syntax: +;;; This is an overloaded intrinsic. You can use @llvm.smax on any +;;; integer bit width or any vector of integer elements. +;;; +;;; declare i32 @llvm.smax.i32(i32 %a, i32 %b) +;;; declare <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> %b) +;;; +;;; Overview: +;;; Return the larger of %a and %b comparing the values as signed +;;; integers. Vector intrinsics operate on a per-element basis. +;;; The larger element of %a and %b at a given index is returned +;;; for that index. +;;; +;;; Arguments: +;;; The arguments (%a and %b) may be of any integer type or a vector +;;; with integer element type. The argument types must match each +;;; other, and the return type must match the argument type. +;;; +;;; Note: +;;; We test only i8/i16/i32/i64/i128. + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i8 @func_smax_var_i8(i8 noundef signext %0, i8 noundef signext %1) { +; CHECK-LABEL: func_smax_var_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: maxs.w.sx %s0, %s0, %s1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call i8 @llvm.smax.i8(i8 %0, i8 %1) + ret i8 %3 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i16 @func_smax_var_i16(i16 noundef signext %0, i16 noundef signext %1) { +; CHECK-LABEL: func_smax_var_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: maxs.w.sx %s0, %s0, %s1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call i16 @llvm.smax.i16(i16 %0, i16 %1) + ret i16 %3 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i32 @func_smax_var_i32(i32 noundef signext %0, i32 noundef signext %1) { +; CHECK-LABEL: func_smax_var_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: maxs.w.sx %s0, %s0, %s1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call i32 @llvm.smax.i32(i32 %0, i32 %1) + ret i32 %3 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i64 @func_smax_var_i64(i64 noundef %0, i64 noundef %1) { +; CHECK-LABEL: func_smax_var_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: maxs.l %s0, %s0, %s1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call i64 @llvm.smax.i64(i64 %0, i64 %1) + ret i64 %3 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i128 @func_smax_var_i128(i128 noundef %0, i128 noundef %1) { +; CHECK-LABEL: func_smax_var_i128: +; CHECK: # %bb.0: +; CHECK-NEXT: cmps.l %s5, %s1, %s3 +; CHECK-NEXT: or %s4, 0, %s2 +; CHECK-NEXT: cmov.l.gt %s4, %s0, %s5 +; CHECK-NEXT: cmpu.l %s6, %s0, %s2 +; CHECK-NEXT: cmov.l.gt %s2, %s0, %s6 +; CHECK-NEXT: cmov.l.eq %s4, %s2, %s5 +; CHECK-NEXT: maxs.l %s1, %s1, %s3 +; CHECK-NEXT: or %s0, 0, %s4 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call i128 @llvm.smax.i128(i128 %0, i128 %1) + ret i128 %3 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i8 @func_smax_fore_zero_i8(i8 noundef signext %0) { +; CHECK-LABEL: func_smax_fore_zero_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: maxs.w.sx %s0, 0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i8 @llvm.smax.i8(i8 %0, i8 0) + ret i8 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i16 @func_smax_fore_zero_i16(i16 noundef signext %0) { +; CHECK-LABEL: func_smax_fore_zero_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: maxs.w.sx %s0, 0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i16 @llvm.smax.i16(i16 %0, i16 0) + ret i16 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i32 @func_smax_fore_zero_i32(i32 noundef signext %0) { +; CHECK-LABEL: func_smax_fore_zero_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: maxs.w.sx %s0, 0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i32 @llvm.smax.i32(i32 %0, i32 0) + ret i32 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i64 @func_smax_fore_zero_i64(i64 noundef %0) { +; CHECK-LABEL: func_smax_fore_zero_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: maxs.l %s0, 0, %s0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i64 @llvm.smax.i64(i64 %0, i64 0) + ret i64 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i128 @func_smax_fore_zero_i128(i128 noundef %0) { +; CHECK-LABEL: func_smax_fore_zero_i128: +; CHECK: # %bb.0: +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: cmps.l %s3, %s1, (0)1 +; CHECK-NEXT: cmov.l.gt %s2, %s0, %s3 +; CHECK-NEXT: cmov.l.eq %s2, %s0, %s3 +; CHECK-NEXT: maxs.l %s1, 0, %s1 +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i128 @llvm.smax.i128(i128 %0, i128 0) + ret i128 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i8 @func_smax_back_zero_i8(i8 noundef signext %0) { +; CHECK-LABEL: func_smax_back_zero_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: maxs.w.sx %s0, 0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i8 @llvm.smax.i8(i8 %0, i8 0) + ret i8 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i16 @func_smax_back_zero_i16(i16 noundef signext %0) { +; CHECK-LABEL: func_smax_back_zero_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: maxs.w.sx %s0, 0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i16 @llvm.smax.i16(i16 %0, i16 0) + ret i16 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i32 @func_smax_back_zero_i32(i32 noundef signext %0) { +; CHECK-LABEL: func_smax_back_zero_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: maxs.w.sx %s0, 0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i32 @llvm.smax.i32(i32 %0, i32 0) + ret i32 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i64 @func_smax_back_zero_i64(i64 noundef %0) { +; CHECK-LABEL: func_smax_back_zero_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: maxs.l %s0, 0, %s0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i64 @llvm.smax.i64(i64 %0, i64 0) + ret i64 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i128 @func_smax_back_zero_i128(i128 noundef %0) { +; CHECK-LABEL: func_smax_back_zero_i128: +; CHECK: # %bb.0: +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: cmps.l %s3, %s1, (0)1 +; CHECK-NEXT: cmov.l.gt %s2, %s0, %s3 +; CHECK-NEXT: cmov.l.eq %s2, %s0, %s3 +; CHECK-NEXT: maxs.l %s1, 0, %s1 +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i128 @llvm.smax.i128(i128 %0, i128 0) + ret i128 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i8 @func_smax_fore_const_i8(i8 noundef signext %0) { +; CHECK-LABEL: func_smax_fore_const_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: maxs.w.sx %s0, -1, %s0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i8 @llvm.smax.i8(i8 %0, i8 -1) + ret i8 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i16 @func_smax_fore_const_i16(i16 noundef signext %0) { +; CHECK-LABEL: func_smax_fore_const_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: maxs.w.sx %s0, %s0, (56)0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i16 @llvm.smax.i16(i16 %0, i16 255) + ret i16 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i32 @func_smax_fore_const_i32(i32 noundef signext %0) { +; CHECK-LABEL: func_smax_fore_const_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: maxs.w.sx %s0, %s0, (56)0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i32 @llvm.smax.i32(i32 %0, i32 255) + ret i32 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i64 @func_smax_fore_const_i64(i64 noundef %0) { +; CHECK-LABEL: func_smax_fore_const_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: maxs.l %s0, %s0, (56)0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i64 @llvm.smax.i64(i64 %0, i64 255) + ret i64 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i128 @func_smax_fore_const_i128(i128 noundef %0) { +; CHECK-LABEL: func_smax_fore_const_i128: +; CHECK: # %bb.0: +; CHECK-NEXT: cmps.l %s3, %s1, (0)1 +; CHECK-NEXT: lea %s4, 255 +; CHECK-NEXT: lea %s2, 255 +; CHECK-NEXT: cmov.l.gt %s2, %s0, %s3 +; CHECK-NEXT: cmpu.l %s5, %s0, (56)0 +; CHECK-NEXT: cmov.l.gt %s4, %s0, %s5 +; CHECK-NEXT: cmov.l.eq %s2, %s4, %s3 +; CHECK-NEXT: maxs.l %s1, 0, %s1 +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i128 @llvm.smax.i128(i128 %0, i128 255) + ret i128 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i8 @func_smax_back_const_i8(i8 noundef signext %0) { +; CHECK-LABEL: func_smax_back_const_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: maxs.w.sx %s0, -1, %s0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i8 @llvm.smax.i8(i8 %0, i8 -1) + ret i8 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i16 @func_smax_back_const_i16(i16 noundef signext %0) { +; CHECK-LABEL: func_smax_back_const_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: maxs.w.sx %s0, %s0, (56)0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i16 @llvm.smax.i16(i16 %0, i16 255) + ret i16 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i32 @func_smax_back_const_i32(i32 noundef signext %0) { +; CHECK-LABEL: func_smax_back_const_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: maxs.w.sx %s0, %s0, (56)0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i32 @llvm.smax.i32(i32 %0, i32 255) + ret i32 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i64 @func_smax_back_const_i64(i64 noundef %0) { +; CHECK-LABEL: func_smax_back_const_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: maxs.l %s0, %s0, (56)0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i64 @llvm.smax.i64(i64 %0, i64 255) + ret i64 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i128 @func_smax_back_const_i128(i128 noundef %0) { +; CHECK-LABEL: func_smax_back_const_i128: +; CHECK: # %bb.0: +; CHECK-NEXT: cmps.l %s3, %s1, (0)1 +; CHECK-NEXT: lea %s4, 255 +; CHECK-NEXT: lea %s2, 255 +; CHECK-NEXT: cmov.l.gt %s2, %s0, %s3 +; CHECK-NEXT: cmpu.l %s5, %s0, (56)0 +; CHECK-NEXT: cmov.l.gt %s4, %s0, %s5 +; CHECK-NEXT: cmov.l.eq %s2, %s4, %s3 +; CHECK-NEXT: maxs.l %s1, 0, %s1 +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i128 @llvm.smax.i128(i128 %0, i128 255) + ret i128 %2 +} + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare i32 @llvm.smax.i32(i32, i32) + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare i8 @llvm.smax.i8(i8, i8) + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare i16 @llvm.smax.i16(i16, i16) + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare i64 @llvm.smax.i64(i64, i64) + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare i128 @llvm.smax.i128(i128, i128) diff --git a/llvm/test/CodeGen/VE/Scalar/smin.ll b/llvm/test/CodeGen/VE/Scalar/smin.ll new file mode 100644 index 00000000000000..a46c4e19a4ea82 --- /dev/null +++ b/llvm/test/CodeGen/VE/Scalar/smin.ll @@ -0,0 +1,337 @@ +; RUN: llc < %s -mtriple=ve | FileCheck %s + +;;; Test ‘llvm.smin.*’ intrinsic +;;; +;;; Syntax: +;;; This is an overloaded intrinsic. You can use @llvm.smin on any +;;; integer bit width or any vector of integer elements. +;;; +;;; declare i32 @llvm.smin.i32(i32 %a, i32 %b) +;;; declare <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> %b) +;;; +;;; Overview: +;;; Return the smaller of %a and %b comparing the values as signed +;;; integers. Vector intrinsics operate on a per-element basis. +;;; The smaller element of %a and %b at a given index is returned +;;; for that index. +;;; +;;; Arguments: +;;; The arguments (%a and %b) may be of any integer type or a vector +;;; with integer element type. The argument types must match each +;;; other, and the return type must match the argument type. +;;; +;;; Note: +;;; We test only i8/i16/i32/i64/i128. + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i8 @func_smin_var_i8(i8 noundef signext %0, i8 noundef signext %1) { +; CHECK-LABEL: func_smin_var_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: mins.w.sx %s0, %s0, %s1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call i8 @llvm.smin.i8(i8 %0, i8 %1) + ret i8 %3 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i16 @func_smin_var_i16(i16 noundef signext %0, i16 noundef signext %1) { +; CHECK-LABEL: func_smin_var_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: mins.w.sx %s0, %s0, %s1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call i16 @llvm.smin.i16(i16 %0, i16 %1) + ret i16 %3 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i32 @func_smin_var_i32(i32 noundef signext %0, i32 noundef signext %1) { +; CHECK-LABEL: func_smin_var_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: mins.w.sx %s0, %s0, %s1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call i32 @llvm.smin.i32(i32 %0, i32 %1) + ret i32 %3 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i64 @func_smin_var_i64(i64 noundef %0, i64 noundef %1) { +; CHECK-LABEL: func_smin_var_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: mins.l %s0, %s0, %s1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call i64 @llvm.smin.i64(i64 %0, i64 %1) + ret i64 %3 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i128 @func_smin_var_i128(i128 noundef %0, i128 noundef %1) { +; CHECK-LABEL: func_smin_var_i128: +; CHECK: # %bb.0: +; CHECK-NEXT: cmps.l %s5, %s1, %s3 +; CHECK-NEXT: or %s4, 0, %s2 +; CHECK-NEXT: cmov.l.lt %s4, %s0, %s5 +; CHECK-NEXT: cmpu.l %s6, %s0, %s2 +; CHECK-NEXT: cmov.l.lt %s2, %s0, %s6 +; CHECK-NEXT: cmov.l.eq %s4, %s2, %s5 +; CHECK-NEXT: mins.l %s1, %s1, %s3 +; CHECK-NEXT: or %s0, 0, %s4 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call i128 @llvm.smin.i128(i128 %0, i128 %1) + ret i128 %3 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i8 @func_smin_fore_zero_i8(i8 noundef signext %0) { +; CHECK-LABEL: func_smin_fore_zero_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: mins.w.sx %s0, 0, %s0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i8 @llvm.smin.i8(i8 %0, i8 0) + ret i8 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i16 @func_smin_fore_zero_i16(i16 noundef signext %0) { +; CHECK-LABEL: func_smin_fore_zero_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: mins.w.sx %s0, 0, %s0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i16 @llvm.smin.i16(i16 %0, i16 0) + ret i16 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i32 @func_smin_fore_zero_i32(i32 noundef signext %0) { +; CHECK-LABEL: func_smin_fore_zero_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: mins.w.sx %s0, 0, %s0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i32 @llvm.smin.i32(i32 %0, i32 0) + ret i32 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i64 @func_smin_fore_zero_i64(i64 noundef %0) { +; CHECK-LABEL: func_smin_fore_zero_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: mins.l %s0, 0, %s0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i64 @llvm.smin.i64(i64 %0, i64 0) + ret i64 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i128 @func_smin_fore_zero_i128(i128 noundef %0) { +; CHECK-LABEL: func_smin_fore_zero_i128: +; CHECK: # %bb.0: +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: cmps.l %s3, %s1, (0)1 +; CHECK-NEXT: sra.l %s4, %s1, 63 +; CHECK-NEXT: and %s0, %s4, %s0 +; CHECK-NEXT: cmov.l.eq %s0, %s2, %s3 +; CHECK-NEXT: mins.l %s1, 0, %s1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i128 @llvm.smin.i128(i128 %0, i128 0) + ret i128 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i8 @func_smin_back_zero_i8(i8 noundef signext %0) { +; CHECK-LABEL: func_smin_back_zero_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: mins.w.sx %s0, 0, %s0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i8 @llvm.smin.i8(i8 %0, i8 0) + ret i8 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i16 @func_smin_back_zero_i16(i16 noundef signext %0) { +; CHECK-LABEL: func_smin_back_zero_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: mins.w.sx %s0, 0, %s0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i16 @llvm.smin.i16(i16 %0, i16 0) + ret i16 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i32 @func_smin_back_zero_i32(i32 noundef signext %0) { +; CHECK-LABEL: func_smin_back_zero_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: mins.w.sx %s0, 0, %s0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i32 @llvm.smin.i32(i32 %0, i32 0) + ret i32 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i64 @func_smin_back_zero_i64(i64 noundef %0) { +; CHECK-LABEL: func_smin_back_zero_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: mins.l %s0, 0, %s0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i64 @llvm.smin.i64(i64 %0, i64 0) + ret i64 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i128 @func_smin_back_zero_i128(i128 noundef %0) { +; CHECK-LABEL: func_smin_back_zero_i128: +; CHECK: # %bb.0: +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: cmps.l %s3, %s1, (0)1 +; CHECK-NEXT: sra.l %s4, %s1, 63 +; CHECK-NEXT: and %s0, %s4, %s0 +; CHECK-NEXT: cmov.l.eq %s0, %s2, %s3 +; CHECK-NEXT: mins.l %s1, 0, %s1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i128 @llvm.smin.i128(i128 %0, i128 0) + ret i128 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i8 @func_smin_fore_const_i8(i8 noundef signext %0) { +; CHECK-LABEL: func_smin_fore_const_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: mins.w.sx %s0, -1, %s0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i8 @llvm.smin.i8(i8 %0, i8 -1) + ret i8 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i16 @func_smin_fore_const_i16(i16 noundef signext %0) { +; CHECK-LABEL: func_smin_fore_const_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: mins.w.sx %s0, %s0, (56)0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i16 @llvm.smin.i16(i16 %0, i16 255) + ret i16 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i32 @func_smin_fore_const_i32(i32 noundef signext %0) { +; CHECK-LABEL: func_smin_fore_const_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: mins.w.sx %s0, %s0, (56)0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i32 @llvm.smin.i32(i32 %0, i32 255) + ret i32 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i64 @func_smin_fore_const_i64(i64 noundef %0) { +; CHECK-LABEL: func_smin_fore_const_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: mins.l %s0, %s0, (56)0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i64 @llvm.smin.i64(i64 %0, i64 255) + ret i64 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i128 @func_smin_fore_const_i128(i128 noundef %0) { +; CHECK-LABEL: func_smin_fore_const_i128: +; CHECK: # %bb.0: +; CHECK-NEXT: cmps.l %s3, %s1, (0)1 +; CHECK-NEXT: lea %s4, 255 +; CHECK-NEXT: lea %s2, 255 +; CHECK-NEXT: cmov.l.lt %s2, %s0, %s3 +; CHECK-NEXT: cmpu.l %s5, %s0, (56)0 +; CHECK-NEXT: cmov.l.lt %s4, %s0, %s5 +; CHECK-NEXT: cmov.l.eq %s2, %s4, %s3 +; CHECK-NEXT: mins.l %s1, 0, %s1 +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i128 @llvm.smin.i128(i128 %0, i128 255) + ret i128 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i8 @func_smin_back_const_i8(i8 noundef signext %0) { +; CHECK-LABEL: func_smin_back_const_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: mins.w.sx %s0, -1, %s0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i8 @llvm.smin.i8(i8 %0, i8 -1) + ret i8 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i16 @func_smin_back_const_i16(i16 noundef signext %0) { +; CHECK-LABEL: func_smin_back_const_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: mins.w.sx %s0, %s0, (56)0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i16 @llvm.smin.i16(i16 %0, i16 255) + ret i16 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define signext i32 @func_smin_back_const_i32(i32 noundef signext %0) { +; CHECK-LABEL: func_smin_back_const_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: mins.w.sx %s0, %s0, (56)0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i32 @llvm.smin.i32(i32 %0, i32 255) + ret i32 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i64 @func_smin_back_const_i64(i64 noundef %0) { +; CHECK-LABEL: func_smin_back_const_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: mins.l %s0, %s0, (56)0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i64 @llvm.smin.i64(i64 %0, i64 255) + ret i64 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i128 @func_smin_back_const_i128(i128 noundef %0) { +; CHECK-LABEL: func_smin_back_const_i128: +; CHECK: # %bb.0: +; CHECK-NEXT: cmps.l %s3, %s1, (0)1 +; CHECK-NEXT: lea %s4, 255 +; CHECK-NEXT: lea %s2, 255 +; CHECK-NEXT: cmov.l.lt %s2, %s0, %s3 +; CHECK-NEXT: cmpu.l %s5, %s0, (56)0 +; CHECK-NEXT: cmov.l.lt %s4, %s0, %s5 +; CHECK-NEXT: cmov.l.eq %s2, %s4, %s3 +; CHECK-NEXT: mins.l %s1, 0, %s1 +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i128 @llvm.smin.i128(i128 %0, i128 255) + ret i128 %2 +} + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare i32 @llvm.smin.i32(i32, i32) + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare i8 @llvm.smin.i8(i8, i8) + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare i16 @llvm.smin.i16(i16, i16) + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare i64 @llvm.smin.i64(i64, i64) + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare i128 @llvm.smin.i128(i128, i128) diff --git a/llvm/test/CodeGen/VE/Scalar/umax.ll b/llvm/test/CodeGen/VE/Scalar/umax.ll new file mode 100644 index 00000000000000..3df721fc789a64 --- /dev/null +++ b/llvm/test/CodeGen/VE/Scalar/umax.ll @@ -0,0 +1,358 @@ +; RUN: llc < %s -mtriple=ve | FileCheck %s + +;;; Test ‘llvm.umax.*’ intrinsic +;;; +;;; Syntax: +;;; This is an overloaded intrinsic. You can use @llvm.umax on any +;;; integer bit width or any vector of integer elements. +;;; +;;; declare i32 @llvm.umax.i32(i32 %a, i32 %b) +;;; declare <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> %b) +;;; +;;; Overview: +;;; Return the larger of %a and %b comparing the values as unsigned +;;; integers. Vector intrinsics operate on a per-element basis. The +;;; larger element of %a and %b at a given index is returned for +;;; that index. +;;; +;;; Arguments: +;;; The arguments (%a and %b) may be of any integer type or a vector +;;; with integer element type. The argument types must match each +;;; other, and the return type must match the argument type. +;;; +;;; Note: +;;; We test only i1/u8/u16/u32/u64/u128. + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define zeroext i1 @func_umax_var_i1(i1 noundef zeroext %0, i1 noundef zeroext %1) { +; CHECK-LABEL: func_umax_var_i1: +; CHECK: # %bb.0: +; CHECK-NEXT: maxs.w.sx %s0, %s0, %s1 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call i1 @llvm.umax.i1(i1 %0, i1 %1) + ret i1 %3 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define zeroext i8 @func_umax_var_u8(i8 noundef zeroext %0, i8 noundef zeroext %1) { +; CHECK-LABEL: func_umax_var_u8: +; CHECK: # %bb.0: +; CHECK-NEXT: maxs.w.sx %s0, %s0, %s1 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call i8 @llvm.umax.i8(i8 %0, i8 %1) + ret i8 %3 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define zeroext i16 @func_umax_var_u16(i16 noundef zeroext %0, i16 noundef zeroext %1) { +; CHECK-LABEL: func_umax_var_u16: +; CHECK: # %bb.0: +; CHECK-NEXT: maxs.w.sx %s0, %s0, %s1 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call i16 @llvm.umax.i16(i16 %0, i16 %1) + ret i16 %3 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define zeroext i32 @func_umax_var_u32(i32 noundef zeroext %0, i32 noundef zeroext %1) { +; CHECK-LABEL: func_umax_var_u32: +; CHECK: # %bb.0: +; CHECK-NEXT: cmpu.w %s2, %s0, %s1 +; CHECK-NEXT: cmov.w.gt %s1, %s0, %s2 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call i32 @llvm.umax.i32(i32 %0, i32 %1) + ret i32 %3 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i64 @func_umax_var_u64(i64 noundef %0, i64 noundef %1) { +; CHECK-LABEL: func_umax_var_u64: +; CHECK: # %bb.0: +; CHECK-NEXT: cmpu.l %s2, %s0, %s1 +; CHECK-NEXT: cmov.l.gt %s1, %s0, %s2 +; CHECK-NEXT: or %s0, 0, %s1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call i64 @llvm.umax.i64(i64 %0, i64 %1) + ret i64 %3 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i128 @func_umax_var_u128(i128 noundef %0, i128 noundef %1) { +; CHECK-LABEL: func_umax_var_u128: +; CHECK: # %bb.0: +; CHECK-NEXT: cmpu.l %s5, %s1, %s3 +; CHECK-NEXT: or %s4, 0, %s2 +; CHECK-NEXT: cmov.l.gt %s4, %s0, %s5 +; CHECK-NEXT: cmpu.l %s6, %s0, %s2 +; CHECK-NEXT: cmov.l.gt %s2, %s0, %s6 +; CHECK-NEXT: cmps.l %s0, %s1, %s3 +; CHECK-NEXT: cmov.l.eq %s4, %s2, %s0 +; CHECK-NEXT: cmov.l.gt %s3, %s1, %s5 +; CHECK-NEXT: or %s0, 0, %s4 +; CHECK-NEXT: or %s1, 0, %s3 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call i128 @llvm.umax.i128(i128 %0, i128 %1) + ret i128 %3 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define zeroext i1 @func_umax_fore_zero_i1(i1 noundef returned zeroext %0) { +; CHECK-LABEL: func_umax_fore_zero_i1: +; CHECK: # %bb.0: +; CHECK-NEXT: b.l.t (, %s10) + ret i1 %0 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define zeroext i8 @func_umax_fore_zero_u8(i8 noundef returned zeroext %0) { +; CHECK-LABEL: func_umax_fore_zero_u8: +; CHECK: # %bb.0: +; CHECK-NEXT: b.l.t (, %s10) + ret i8 %0 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define zeroext i16 @func_umax_fore_zero_u16(i16 noundef returned zeroext %0) { +; CHECK-LABEL: func_umax_fore_zero_u16: +; CHECK: # %bb.0: +; CHECK-NEXT: b.l.t (, %s10) + ret i16 %0 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define zeroext i32 @func_umax_fore_zero_u32(i32 noundef returned zeroext %0) { +; CHECK-LABEL: func_umax_fore_zero_u32: +; CHECK: # %bb.0: +; CHECK-NEXT: b.l.t (, %s10) + ret i32 %0 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define i64 @func_umax_fore_zero_u64(i64 noundef returned %0) { +; CHECK-LABEL: func_umax_fore_zero_u64: +; CHECK: # %bb.0: +; CHECK-NEXT: b.l.t (, %s10) + ret i64 %0 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define i128 @func_umax_fore_zero_u128(i128 noundef returned %0) { +; CHECK-LABEL: func_umax_fore_zero_u128: +; CHECK: # %bb.0: +; CHECK-NEXT: b.l.t (, %s10) + ret i128 %0 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define zeroext i1 @func_umax_back_zero_i1(i1 noundef returned zeroext %0) { +; CHECK-LABEL: func_umax_back_zero_i1: +; CHECK: # %bb.0: +; CHECK-NEXT: b.l.t (, %s10) + ret i1 %0 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define zeroext i8 @func_umax_back_zero_u8(i8 noundef returned zeroext %0) { +; CHECK-LABEL: func_umax_back_zero_u8: +; CHECK: # %bb.0: +; CHECK-NEXT: b.l.t (, %s10) + ret i8 %0 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define zeroext i16 @func_umax_back_zero_u16(i16 noundef returned zeroext %0) { +; CHECK-LABEL: func_umax_back_zero_u16: +; CHECK: # %bb.0: +; CHECK-NEXT: b.l.t (, %s10) + ret i16 %0 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define zeroext i32 @func_umax_back_zero_u32(i32 noundef returned zeroext %0) { +; CHECK-LABEL: func_umax_back_zero_u32: +; CHECK: # %bb.0: +; CHECK-NEXT: b.l.t (, %s10) + ret i32 %0 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define i64 @func_umax_back_zero_u64(i64 noundef returned %0) { +; CHECK-LABEL: func_umax_back_zero_u64: +; CHECK: # %bb.0: +; CHECK-NEXT: b.l.t (, %s10) + ret i64 %0 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define i128 @func_umax_back_zero_u128(i128 noundef returned %0) { +; CHECK-LABEL: func_umax_back_zero_u128: +; CHECK: # %bb.0: +; CHECK-NEXT: b.l.t (, %s10) + ret i128 %0 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define zeroext i1 @func_umax_fore_const_i1(i1 noundef zeroext %0) { +; CHECK-LABEL: func_umax_fore_const_i1: +; CHECK: # %bb.0: +; CHECK-NEXT: or %s0, 1, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + ret i1 true +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define zeroext i8 @func_umax_fore_const_u8(i8 noundef zeroext %0) { +; CHECK-LABEL: func_umax_fore_const_u8: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 255 +; CHECK-NEXT: b.l.t (, %s10) + ret i8 -1 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define zeroext i16 @func_umax_fore_const_u16(i16 noundef zeroext %0) { +; CHECK-LABEL: func_umax_fore_const_u16: +; CHECK: # %bb.0: +; CHECK-NEXT: maxs.w.sx %s0, %s0, (56)0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i16 @llvm.umax.i16(i16 %0, i16 255) + ret i16 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define zeroext i32 @func_umax_fore_const_u32(i32 noundef zeroext %0) { +; CHECK-LABEL: func_umax_fore_const_u32: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 255 +; CHECK-NEXT: cmpu.w %s2, %s0, %s1 +; CHECK-NEXT: cmov.w.gt %s1, %s0, %s2 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i32 @llvm.umax.i32(i32 %0, i32 255) + ret i32 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i64 @func_umax_fore_const_u64(i64 noundef %0) { +; CHECK-LABEL: func_umax_fore_const_u64: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 255 +; CHECK-NEXT: cmpu.l %s2, %s0, (56)0 +; CHECK-NEXT: cmov.l.gt %s1, %s0, %s2 +; CHECK-NEXT: or %s0, 0, %s1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i64 @llvm.umax.i64(i64 %0, i64 255) + ret i64 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i128 @func_umax_fore_const_u128(i128 noundef %0) { +; CHECK-LABEL: func_umax_fore_const_u128: +; CHECK: # %bb.0: +; CHECK-NEXT: cmps.l %s3, %s1, (0)1 +; CHECK-NEXT: lea %s4, 255 +; CHECK-NEXT: lea %s2, 255 +; CHECK-NEXT: cmov.l.ne %s2, %s0, %s3 +; CHECK-NEXT: cmpu.l %s5, %s0, (56)0 +; CHECK-NEXT: cmov.l.gt %s4, %s0, %s5 +; CHECK-NEXT: cmov.l.eq %s2, %s4, %s3 +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i128 @llvm.umax.i128(i128 %0, i128 255) + ret i128 %2 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define zeroext i1 @func_umax_back_const_i1(i1 noundef zeroext %0) { +; CHECK-LABEL: func_umax_back_const_i1: +; CHECK: # %bb.0: +; CHECK-NEXT: or %s0, 1, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + ret i1 true +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define zeroext i8 @func_umax_back_const_u8(i8 noundef zeroext %0) { +; CHECK-LABEL: func_umax_back_const_u8: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 255 +; CHECK-NEXT: b.l.t (, %s10) + ret i8 -1 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define zeroext i16 @func_umax_back_const_u16(i16 noundef zeroext %0) { +; CHECK-LABEL: func_umax_back_const_u16: +; CHECK: # %bb.0: +; CHECK-NEXT: maxs.w.sx %s0, %s0, (56)0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i16 @llvm.umax.i16(i16 %0, i16 255) + ret i16 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define zeroext i32 @func_umax_back_const_u32(i32 noundef zeroext %0) { +; CHECK-LABEL: func_umax_back_const_u32: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 255 +; CHECK-NEXT: cmpu.w %s2, %s0, %s1 +; CHECK-NEXT: cmov.w.gt %s1, %s0, %s2 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i32 @llvm.umax.i32(i32 %0, i32 255) + ret i32 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i64 @func_umax_back_const_u64(i64 noundef %0) { +; CHECK-LABEL: func_umax_back_const_u64: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 255 +; CHECK-NEXT: cmpu.l %s2, %s0, (56)0 +; CHECK-NEXT: cmov.l.gt %s1, %s0, %s2 +; CHECK-NEXT: or %s0, 0, %s1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i64 @llvm.umax.i64(i64 %0, i64 255) + ret i64 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i128 @func_umax_back_const_u128(i128 noundef %0) { +; CHECK-LABEL: func_umax_back_const_u128: +; CHECK: # %bb.0: +; CHECK-NEXT: cmps.l %s3, %s1, (0)1 +; CHECK-NEXT: lea %s4, 255 +; CHECK-NEXT: lea %s2, 255 +; CHECK-NEXT: cmov.l.ne %s2, %s0, %s3 +; CHECK-NEXT: cmpu.l %s5, %s0, (56)0 +; CHECK-NEXT: cmov.l.gt %s4, %s0, %s5 +; CHECK-NEXT: cmov.l.eq %s2, %s4, %s3 +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i128 @llvm.umax.i128(i128 %0, i128 255) + ret i128 %2 +} + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare i32 @llvm.umax.i32(i32, i32) + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare i1 @llvm.umax.i1(i1, i1) + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare i8 @llvm.umax.i8(i8, i8) + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare i16 @llvm.umax.i16(i16, i16) + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare i64 @llvm.umax.i64(i64, i64) + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare i128 @llvm.umax.i128(i128, i128) diff --git a/llvm/test/CodeGen/VE/Scalar/umin.ll b/llvm/test/CodeGen/VE/Scalar/umin.ll new file mode 100644 index 00000000000000..937fa420c8a346 --- /dev/null +++ b/llvm/test/CodeGen/VE/Scalar/umin.ll @@ -0,0 +1,368 @@ +; RUN: llc < %s -mtriple=ve | FileCheck %s + +;;; Test ‘llvm.umin.*’ intrinsic +;;; +;;; Syntax: +;;; This is an overloaded intrinsic. You can use @llvm.umin on any +;;; integer bit width or any vector of integer elements. +;;; +;;; declare i32 @llvm.umin.i32(i32 %a, i32 %b) +;;; declare <4 x i32> @llvm.umin.v4i32(<4 x i32> %a, <4 x i32> %b) +;;; +;;; Overview: +;;; Return the smaller of %a and %b comparing the values as unsigned +;;; integers. Vector intrinsics operate on a per-element basis. The +;;; smaller element of %a and %b at a given index is returned for +;;; that index. +;;; +;;; Arguments: +;;; The arguments (%a and %b) may be of any integer type or a vector +;;; with integer element type. The argument types must match each +;;; other, and the return type must match the argument type. +;;; +;;; Note: +;;; We test only i1/u8/u16/u32/u64/u128. + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define zeroext i1 @func_umin_var_i1(i1 noundef zeroext %0, i1 noundef zeroext %1) { +; CHECK-LABEL: func_umin_var_i1: +; CHECK: # %bb.0: +; CHECK-NEXT: mins.w.sx %s0, %s0, %s1 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call i1 @llvm.umin.i1(i1 %0, i1 %1) + ret i1 %3 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define zeroext i8 @func_umin_var_u8(i8 noundef zeroext %0, i8 noundef zeroext %1) { +; CHECK-LABEL: func_umin_var_u8: +; CHECK: # %bb.0: +; CHECK-NEXT: mins.w.sx %s0, %s0, %s1 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call i8 @llvm.umin.i8(i8 %0, i8 %1) + ret i8 %3 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define zeroext i16 @func_umin_var_u16(i16 noundef zeroext %0, i16 noundef zeroext %1) { +; CHECK-LABEL: func_umin_var_u16: +; CHECK: # %bb.0: +; CHECK-NEXT: mins.w.sx %s0, %s0, %s1 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call i16 @llvm.umin.i16(i16 %0, i16 %1) + ret i16 %3 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define zeroext i32 @func_umin_var_u32(i32 noundef zeroext %0, i32 noundef zeroext %1) { +; CHECK-LABEL: func_umin_var_u32: +; CHECK: # %bb.0: +; CHECK-NEXT: cmpu.w %s2, %s0, %s1 +; CHECK-NEXT: cmov.w.lt %s1, %s0, %s2 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call i32 @llvm.umin.i32(i32 %0, i32 %1) + ret i32 %3 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i64 @func_umin_var_u64(i64 noundef %0, i64 noundef %1) { +; CHECK-LABEL: func_umin_var_u64: +; CHECK: # %bb.0: +; CHECK-NEXT: cmpu.l %s2, %s0, %s1 +; CHECK-NEXT: cmov.l.lt %s1, %s0, %s2 +; CHECK-NEXT: or %s0, 0, %s1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call i64 @llvm.umin.i64(i64 %0, i64 %1) + ret i64 %3 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i128 @func_umin_var_u128(i128 noundef %0, i128 noundef %1) { +; CHECK-LABEL: func_umin_var_u128: +; CHECK: # %bb.0: +; CHECK-NEXT: cmpu.l %s5, %s1, %s3 +; CHECK-NEXT: or %s4, 0, %s2 +; CHECK-NEXT: cmov.l.lt %s4, %s0, %s5 +; CHECK-NEXT: cmpu.l %s6, %s0, %s2 +; CHECK-NEXT: cmov.l.lt %s2, %s0, %s6 +; CHECK-NEXT: cmps.l %s0, %s1, %s3 +; CHECK-NEXT: cmov.l.eq %s4, %s2, %s0 +; CHECK-NEXT: cmov.l.lt %s3, %s1, %s5 +; CHECK-NEXT: or %s0, 0, %s4 +; CHECK-NEXT: or %s1, 0, %s3 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call i128 @llvm.umin.i128(i128 %0, i128 %1) + ret i128 %3 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define zeroext i1 @func_umin_fore_zero_i1(i1 noundef zeroext %0) { +; CHECK-LABEL: func_umin_fore_zero_i1: +; CHECK: # %bb.0: +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + ret i1 false +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define zeroext i8 @func_umin_fore_zero_u8(i8 noundef zeroext %0) { +; CHECK-LABEL: func_umin_fore_zero_u8: +; CHECK: # %bb.0: +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + ret i8 0 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define zeroext i16 @func_umin_fore_zero_u16(i16 noundef zeroext %0) { +; CHECK-LABEL: func_umin_fore_zero_u16: +; CHECK: # %bb.0: +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + ret i16 0 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define zeroext i32 @func_umin_fore_zero_u32(i32 noundef zeroext %0) { +; CHECK-LABEL: func_umin_fore_zero_u32: +; CHECK: # %bb.0: +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + ret i32 0 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define i64 @func_umin_fore_zero_u64(i64 noundef %0) { +; CHECK-LABEL: func_umin_fore_zero_u64: +; CHECK: # %bb.0: +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + ret i64 0 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define i128 @func_umin_fore_zero_u128(i128 noundef %0) { +; CHECK-LABEL: func_umin_fore_zero_u128: +; CHECK: # %bb.0: +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + ret i128 0 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define zeroext i1 @func_umin_back_zero_i1(i1 noundef zeroext %0) { +; CHECK-LABEL: func_umin_back_zero_i1: +; CHECK: # %bb.0: +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + ret i1 false +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define zeroext i8 @func_umin_back_zero_u8(i8 noundef zeroext %0) { +; CHECK-LABEL: func_umin_back_zero_u8: +; CHECK: # %bb.0: +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + ret i8 0 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define zeroext i16 @func_umin_back_zero_u16(i16 noundef zeroext %0) { +; CHECK-LABEL: func_umin_back_zero_u16: +; CHECK: # %bb.0: +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + ret i16 0 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define zeroext i32 @func_umin_back_zero_u32(i32 noundef zeroext %0) { +; CHECK-LABEL: func_umin_back_zero_u32: +; CHECK: # %bb.0: +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + ret i32 0 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define i64 @func_umin_back_zero_u64(i64 noundef %0) { +; CHECK-LABEL: func_umin_back_zero_u64: +; CHECK: # %bb.0: +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + ret i64 0 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define i128 @func_umin_back_zero_u128(i128 noundef %0) { +; CHECK-LABEL: func_umin_back_zero_u128: +; CHECK: # %bb.0: +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + ret i128 0 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define zeroext i1 @func_umin_fore_const_i1(i1 noundef returned zeroext %0) { +; CHECK-LABEL: func_umin_fore_const_i1: +; CHECK: # %bb.0: +; CHECK-NEXT: b.l.t (, %s10) + ret i1 %0 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define zeroext i8 @func_umin_fore_const_u8(i8 noundef returned zeroext %0) { +; CHECK-LABEL: func_umin_fore_const_u8: +; CHECK: # %bb.0: +; CHECK-NEXT: b.l.t (, %s10) + ret i8 %0 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define zeroext i16 @func_umin_fore_const_u16(i16 noundef zeroext %0) { +; CHECK-LABEL: func_umin_fore_const_u16: +; CHECK: # %bb.0: +; CHECK-NEXT: mins.w.sx %s0, %s0, (56)0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i16 @llvm.umin.i16(i16 %0, i16 255) + ret i16 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define zeroext i32 @func_umin_fore_const_u32(i32 noundef zeroext %0) { +; CHECK-LABEL: func_umin_fore_const_u32: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 255 +; CHECK-NEXT: cmpu.w %s2, %s0, %s1 +; CHECK-NEXT: cmov.w.lt %s1, %s0, %s2 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i32 @llvm.umin.i32(i32 %0, i32 255) + ret i32 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i64 @func_umin_fore_const_u64(i64 noundef %0) { +; CHECK-LABEL: func_umin_fore_const_u64: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 255 +; CHECK-NEXT: cmpu.l %s2, %s0, (56)0 +; CHECK-NEXT: cmov.l.lt %s1, %s0, %s2 +; CHECK-NEXT: or %s0, 0, %s1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i64 @llvm.umin.i64(i64 %0, i64 255) + ret i64 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i128 @func_umin_fore_const_u128(i128 noundef %0) { +; CHECK-LABEL: func_umin_fore_const_u128: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s2, 255 +; CHECK-NEXT: cmpu.l %s3, %s0, (56)0 +; CHECK-NEXT: lea %s4, 255 +; CHECK-NEXT: cmov.l.lt %s4, %s0, %s3 +; CHECK-NEXT: cmps.l %s0, %s1, (0)1 +; CHECK-NEXT: cmov.l.eq %s2, %s4, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i128 @llvm.umin.i128(i128 %0, i128 255) + ret i128 %2 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define zeroext i1 @func_umin_back_const_i1(i1 noundef returned zeroext %0) { +; CHECK-LABEL: func_umin_back_const_i1: +; CHECK: # %bb.0: +; CHECK-NEXT: b.l.t (, %s10) + ret i1 %0 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define zeroext i8 @func_umin_back_const_u8(i8 noundef returned zeroext %0) { +; CHECK-LABEL: func_umin_back_const_u8: +; CHECK: # %bb.0: +; CHECK-NEXT: b.l.t (, %s10) + ret i8 %0 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define zeroext i16 @func_umin_back_const_u16(i16 noundef zeroext %0) { +; CHECK-LABEL: func_umin_back_const_u16: +; CHECK: # %bb.0: +; CHECK-NEXT: mins.w.sx %s0, %s0, (56)0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i16 @llvm.umin.i16(i16 %0, i16 255) + ret i16 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define zeroext i32 @func_umin_back_const_u32(i32 noundef zeroext %0) { +; CHECK-LABEL: func_umin_back_const_u32: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 255 +; CHECK-NEXT: cmpu.w %s2, %s0, %s1 +; CHECK-NEXT: cmov.w.lt %s1, %s0, %s2 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i32 @llvm.umin.i32(i32 %0, i32 255) + ret i32 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i64 @func_umin_back_const_u64(i64 noundef %0) { +; CHECK-LABEL: func_umin_back_const_u64: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 255 +; CHECK-NEXT: cmpu.l %s2, %s0, (56)0 +; CHECK-NEXT: cmov.l.lt %s1, %s0, %s2 +; CHECK-NEXT: or %s0, 0, %s1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i64 @llvm.umin.i64(i64 %0, i64 255) + ret i64 %2 +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i128 @func_umin_back_const_u128(i128 noundef %0) { +; CHECK-LABEL: func_umin_back_const_u128: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s2, 255 +; CHECK-NEXT: cmpu.l %s3, %s0, (56)0 +; CHECK-NEXT: lea %s4, 255 +; CHECK-NEXT: cmov.l.lt %s4, %s0, %s3 +; CHECK-NEXT: cmps.l %s0, %s1, (0)1 +; CHECK-NEXT: cmov.l.eq %s2, %s4, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i128 @llvm.umin.i128(i128 %0, i128 255) + ret i128 %2 +} + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare i32 @llvm.umin.i32(i32, i32) + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare i1 @llvm.umin.i1(i1, i1) + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare i8 @llvm.umin.i8(i8, i8) + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare i16 @llvm.umin.i16(i16, i16) + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare i64 @llvm.umin.i64(i64, i64) + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare i128 @llvm.umin.i128(i128, i128) diff --git a/llvm/test/Transforms/LoopDistribute/laa-invalidation.ll b/llvm/test/Transforms/LoopDistribute/laa-invalidation.ll new file mode 100644 index 00000000000000..2bf507bfe5d892 --- /dev/null +++ b/llvm/test/Transforms/LoopDistribute/laa-invalidation.ll @@ -0,0 +1,34 @@ +; RUN: opt -passes='loop-load-elim,indvars,loop-distribute' -enable-loop-distribute %s + +; REQUIRES: asserts +; XFAIL: * + +define void @test_pr50940(ptr %A, ptr %B) { +entry: + %gep.A.1 = getelementptr inbounds i16, ptr %A, i64 1 + br label %outer.header + +outer.header: + %gep.A.2 = getelementptr inbounds i16, ptr %gep.A.1, i64 1 + br i1 false, label %outer.latch, label %inner.ph + +inner.ph: ; preds = %for.body5 + %lcssa.gep = phi ptr [ %gep.A.2, %outer.header ] + %gep.A.3 = getelementptr inbounds i16, ptr %A, i64 3 + br label %inner + +inner: + %iv = phi i16 [ 0, %inner.ph ], [ %iv.next, %inner ] + %l = load <2 x i16>, ptr %lcssa.gep, align 1 + store i16 0, ptr %gep.A.3, align 1 + store i16 1, ptr %B, align 1 + %iv.next = add nuw nsw i16 %iv, 1 + %c.1 = icmp ult i16 %iv, 38 + br i1 %c.1, label %inner, label %exit + +outer.latch: + br label %outer.header + +exit: + ret void +} diff --git a/llvm/test/tools/llvm-readobj/COFF/exports-forwarder.yaml b/llvm/test/tools/llvm-readobj/COFF/exports-forwarder.yaml new file mode 100644 index 00000000000000..61f8ab1e595d4d --- /dev/null +++ b/llvm/test/tools/llvm-readobj/COFF/exports-forwarder.yaml @@ -0,0 +1,52 @@ +# RUN: yaml2obj %s -o %t +# RUN: llvm-readobj --coff-exports %t | FileCheck %s + +# CHECK: Export { +# CHECK-NEXT: Ordinal: 1 +# CHECK-NEXT: Name: LoadLibrary +# CHECK-NEXT: ForwardedTo: kernel32.LoadLibrary +# CHECK-NEXT: } + +# Test file generated with: +# clang -O2 --target=x86_64-windows-msvc test.c -nostdlib -c -o test.obj +# lld-link -dll -out:test.dll -entry:entry -export:LoadLibrary=kernel32.LoadLibrary test.obj +# test.c: +# void entry(void) {} + +--- !COFF +OptionalHeader: + AddressOfEntryPoint: 4096 + ImageBase: 6442450944 + SectionAlignment: 4096 + FileAlignment: 512 + MajorOperatingSystemVersion: 6 + MinorOperatingSystemVersion: 0 + MajorImageVersion: 0 + MinorImageVersion: 0 + MajorSubsystemVersion: 6 + MinorSubsystemVersion: 0 + Subsystem: IMAGE_SUBSYSTEM_WINDOWS_GUI + DLLCharacteristics: [ IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA, IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE, IMAGE_DLL_CHARACTERISTICS_NX_COMPAT ] + SizeOfStackReserve: 1048576 + SizeOfStackCommit: 4096 + SizeOfHeapReserve: 1048576 + SizeOfHeapCommit: 4096 + ExportTable: + RelativeVirtualAddress: 8192 + Size: 110 +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [ IMAGE_FILE_EXECUTABLE_IMAGE, IMAGE_FILE_LARGE_ADDRESS_AWARE, IMAGE_FILE_DLL ] +sections: + - Name: .text + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + VirtualAddress: 4096 + VirtualSize: 1 + SectionData: C3 + - Name: .rdata + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ] + VirtualAddress: 8192 + VirtualSize: 110 + SectionData: 0000000000000000000000002820000001000000010000000100000043200000472000004B2000006578706F72742D666F727761726465722E632E746D702E646C6C00592000004D20000000004C6F61644C696272617279006B65726E656C33322E4C6F61644C69627261727900 +symbols: [] +... diff --git a/llvm/tools/llvm-readobj/COFFDumper.cpp b/llvm/tools/llvm-readobj/COFFDumper.cpp index 56c5d9c0ae5387..4ae9d131535695 100644 --- a/llvm/tools/llvm-readobj/COFFDumper.cpp +++ b/llvm/tools/llvm-readobj/COFFDumper.cpp @@ -1789,18 +1789,29 @@ void COFFDumper::printCOFFExports() { DictScope Export(W, "Export"); StringRef Name; - uint32_t Ordinal, RVA; + uint32_t Ordinal; + bool IsForwarder; if (Error E = Exp.getSymbolName(Name)) reportError(std::move(E), Obj->getFileName()); if (Error E = Exp.getOrdinal(Ordinal)) reportError(std::move(E), Obj->getFileName()); - if (Error E = Exp.getExportRVA(RVA)) + if (Error E = Exp.isForwarder(IsForwarder)) reportError(std::move(E), Obj->getFileName()); W.printNumber("Ordinal", Ordinal); W.printString("Name", Name); - W.printHex("RVA", RVA); + StringRef ForwardTo; + if (IsForwarder) { + if (Error E = Exp.getForwardTo(ForwardTo)) + reportError(std::move(E), Obj->getFileName()); + W.printString("ForwardedTo", ForwardTo); + } else { + uint32_t RVA; + if (Error E = Exp.getExportRVA(RVA)) + reportError(std::move(E), Obj->getFileName()); + W.printHex("RVA", RVA); + } } } diff --git a/llvm/tools/llvm-readobj/llvm-readobj.h b/llvm/tools/llvm-readobj/llvm-readobj.h index 989cd0aba6c01c..5a9fe28d883e57 100644 --- a/llvm/tools/llvm-readobj/llvm-readobj.h +++ b/llvm/tools/llvm-readobj/llvm-readobj.h @@ -50,6 +50,6 @@ extern OutputStyleTy Output; { #enum, ns::enum } #define LLVM_READOBJ_ENUM_CLASS_ENT(enum_class, enum) \ - { #enum, std::underlying_type::type(enum_class::enum) } + { #enum, std::underlying_type_t(enum_class::enum) } #endif diff --git a/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp b/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp index 9e54c7745e4dd4..63b161ed73e0b9 100644 --- a/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp +++ b/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp @@ -307,7 +307,7 @@ TEST(LinkGraphTest, MakeAbsolute) { << "Unexpected number of external symbols"; // Add an external symbol. - auto &S2 = G.addExternalSymbol("S2", 0, Linkage::Strong); + auto &S2 = G.addExternalSymbol("S2", 0, true); EXPECT_TRUE(S2.isExternal()) << "Symbol should be external"; EXPECT_EQ( @@ -356,7 +356,7 @@ TEST(LinkGraphTest, MakeDefined) { auto &B1 = G.createContentBlock(Sec, BlockContent, B1Addr, 8, 0); // Add an external symbol. - auto &S1 = G.addExternalSymbol("S1", 4, Linkage::Strong); + auto &S1 = G.addExternalSymbol("S1", 4, true); EXPECT_FALSE(S1.isDefined()) << "Symbol should not be defined"; EXPECT_TRUE(S1.isExternal()) << "Symbol should be external"; diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.h b/llvm/utils/TableGen/CodeGenDAGPatterns.h index dbdc72f0873a1d..83ac7173441e61 100644 --- a/llvm/utils/TableGen/CodeGenDAGPatterns.h +++ b/llvm/utils/TableGen/CodeGenDAGPatterns.h @@ -50,7 +50,7 @@ using TreePatternNodePtr = std::shared_ptr; /// To reduce the allocations even further, make MachineValueTypeSet own /// the storage and use std::array as the bit container. struct MachineValueTypeSet { - static_assert(std::is_same::type, + static_assert(std::is_same, uint8_t>::value, "Change uint8_t here to the SimpleValueType's type"); static unsigned constexpr Capacity = std::numeric_limits::max()+1; diff --git a/mlir/include/mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h b/mlir/include/mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h index 906c1152bd2d03..34d01392e2606d 100644 --- a/mlir/include/mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h +++ b/mlir/include/mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h @@ -16,15 +16,13 @@ class Pass; class LLVMTypeConverter; class RewritePatternSet; -#define GEN_PASS_DECL_CONVERTMEMREFTOLLVM +#define GEN_PASS_DECL_MEMREFTOLLVMCONVERSIONPASS #include "mlir/Conversion/Passes.h.inc" /// Collect a set of patterns to convert memory-related operations from the /// MemRef dialect to the LLVM dialect. void populateMemRefToLLVMConversionPatterns(LLVMTypeConverter &converter, RewritePatternSet &patterns); - -std::unique_ptr createMemRefToLLVMPass(); } // namespace mlir #endif // MLIR_CONVERSION_MEMREFTOLLVM_MEMREFTOLLVM_H diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 6163c6ae5d0cb9..5f48481647935f 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -541,10 +541,9 @@ def ConvertMathToFuncs : Pass<"convert-math-to-funcs", "ModuleOp"> { // MemRefToLLVM //===----------------------------------------------------------------------===// -def ConvertMemRefToLLVM : Pass<"convert-memref-to-llvm", "ModuleOp"> { +def MemRefToLLVMConversionPass : Pass<"convert-memref-to-llvm", "ModuleOp"> { let summary = "Convert operations from the MemRef dialect to the LLVM " "dialect"; - let constructor = "mlir::createMemRefToLLVMPass()"; let dependentDialects = ["LLVM::LLVMDialect"]; let options = [ Option<"useAlignedAlloc", "use-aligned-alloc", "bool", /*default=*/"false", diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h index 422a101040e156..4fb212024b1c1b 100644 --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h @@ -130,7 +130,7 @@ class OneShotAnalysisState : public AnalysisState { OneShotAnalysisState(const OneShotAnalysisState &) = delete; - virtual ~OneShotAnalysisState() = default; + ~OneShotAnalysisState() override = default; /// Return a reference to the BufferizationAliasInfo. BufferizationAliasInfo &getAliasInfo() { return aliasInfo; } diff --git a/mlir/include/mlir/Dialect/CMakeLists.txt b/mlir/include/mlir/Dialect/CMakeLists.txt index 270cd54cca96f3..22505ba06e521f 100644 --- a/mlir/include/mlir/Dialect/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/CMakeLists.txt @@ -1,10 +1,10 @@ -add_subdirectory(Affine) add_subdirectory(AMDGPU) +add_subdirectory(AMX) +add_subdirectory(Affine) add_subdirectory(Arithmetic) -add_subdirectory(Async) add_subdirectory(ArmNeon) add_subdirectory(ArmSVE) -add_subdirectory(AMX) +add_subdirectory(Async) add_subdirectory(Bufferization) add_subdirectory(Complex) add_subdirectory(ControlFlow) @@ -12,11 +12,11 @@ add_subdirectory(DLTI) add_subdirectory(EmitC) add_subdirectory(Func) add_subdirectory(GPU) -add_subdirectory(Math) -add_subdirectory(Linalg) add_subdirectory(LLVMIR) -add_subdirectory(MemRef) +add_subdirectory(Linalg) add_subdirectory(MLProgram) +add_subdirectory(Math) +add_subdirectory(MemRef) add_subdirectory(NVGPU) add_subdirectory(OpenACC) add_subdirectory(OpenMP) @@ -24,11 +24,12 @@ add_subdirectory(PDL) add_subdirectory(PDLInterp) add_subdirectory(Quant) add_subdirectory(SCF) +add_subdirectory(SPIRV) add_subdirectory(Shape) add_subdirectory(SparseTensor) -add_subdirectory(SPIRV) add_subdirectory(Tensor) add_subdirectory(Tosa) add_subdirectory(Transform) +add_subdirectory(Utils) add_subdirectory(Vector) add_subdirectory(X86Vector) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/Linalg/IR/CMakeLists.txt index 35840bdce8d2d6..f5d48b2ebcefe5 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/Linalg/IR/CMakeLists.txt @@ -44,7 +44,7 @@ add_dependencies(mlir-headers LinalgOdsGen) add_mlir_dialect(LinalgOps linalg) -set(LLVM_TARGET_DEFINITIONS LinalgOps.td) +set(LLVM_TARGET_DEFINITIONS LinalgEnums.td) mlir_tablegen(LinalgOpsEnums.h.inc -gen-enum-decls) mlir_tablegen(LinalgOpsEnums.cpp.inc -gen-enum-defs) add_public_tablegen_target(MLIRLinalgOpsEnumsIncGen) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgBase.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgBase.td index 87108a9ef8e3aa..f55f093e7b1fc9 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgBase.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgBase.td @@ -13,6 +13,7 @@ #ifndef LINALG_BASE #define LINALG_BASE +include "mlir/Dialect/Linalg/IR/LinalgEnums.td" include "mlir/IR/EnumAttr.td" include "mlir/IR/OpBase.td" @@ -62,37 +63,6 @@ def Linalg_Dialect : Dialect { } // Define the function attribute enums matching the OpDSL functions. -def UnaryFn : I32EnumAttr<"UnaryFn", "", [ - I32EnumAttrCase<"exp", 0>, - I32EnumAttrCase<"log", 1>, - I32EnumAttrCase<"abs", 2>, - I32EnumAttrCase<"ceil", 3>, - I32EnumAttrCase<"floor", 4>, - I32EnumAttrCase<"negf", 5> -]> { - let genSpecializedAttr = 0; - let cppNamespace = "::mlir::linalg"; -} -def BinaryFn : I32EnumAttr<"BinaryFn", "", [ - I32EnumAttrCase<"add", 0>, - I32EnumAttrCase<"sub", 1>, - I32EnumAttrCase<"mul", 2>, - I32EnumAttrCase<"max_signed", 3>, - I32EnumAttrCase<"min_signed", 4>, - I32EnumAttrCase<"max_unsigned", 5>, - I32EnumAttrCase<"min_unsigned", 6> -]> { - let genSpecializedAttr = 0; - let cppNamespace = "::mlir::linalg"; -} -def TypeFn : I32EnumAttr<"TypeFn", "", [ - I32EnumAttrCase<"cast_signed", 0>, - I32EnumAttrCase<"cast_unsigned", 1> -]> { - let genSpecializedAttr = 0; - let cppNamespace = "::mlir::linalg"; -} - def UnaryFnAttr : EnumAttr { let assemblyFormat = "`<` $value `>`"; } diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgEnums.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgEnums.td new file mode 100644 index 00000000000000..6d50cda9718625 --- /dev/null +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgEnums.td @@ -0,0 +1,50 @@ +//===- LinalgBase.td - Linalg dialect base support ---------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is the definition file for enums used in linear algebra operations. +// +//===----------------------------------------------------------------------===// + +#ifndef LINALG_ENUMS +#define LINALG_ENUMS + +include "mlir/IR/EnumAttr.td" + +// Define the function attribute enums matching the OpDSL functions. +def UnaryFn : I32EnumAttr<"UnaryFn", "", [ + I32EnumAttrCase<"exp", 0>, + I32EnumAttrCase<"log", 1>, + I32EnumAttrCase<"abs", 2>, + I32EnumAttrCase<"ceil", 3>, + I32EnumAttrCase<"floor", 4>, + I32EnumAttrCase<"negf", 5> +]> { + let genSpecializedAttr = 0; + let cppNamespace = "::mlir::linalg"; +} +def BinaryFn : I32EnumAttr<"BinaryFn", "", [ + I32EnumAttrCase<"add", 0>, + I32EnumAttrCase<"sub", 1>, + I32EnumAttrCase<"mul", 2>, + I32EnumAttrCase<"max_signed", 3>, + I32EnumAttrCase<"min_signed", 4>, + I32EnumAttrCase<"max_unsigned", 5>, + I32EnumAttrCase<"min_unsigned", 6> +]> { + let genSpecializedAttr = 0; + let cppNamespace = "::mlir::linalg"; +} +def TypeFn : I32EnumAttr<"TypeFn", "", [ + I32EnumAttrCase<"cast_signed", 0>, + I32EnumAttrCase<"cast_unsigned", 1> +]> { + let genSpecializedAttr = 0; + let cppNamespace = "::mlir::linalg"; +} + +#endif // LINALG_ENUMS diff --git a/mlir/include/mlir/Dialect/Utils/CMakeLists.txt b/mlir/include/mlir/Dialect/Utils/CMakeLists.txt new file mode 100644 index 00000000000000..edfb1ca873abe6 --- /dev/null +++ b/mlir/include/mlir/Dialect/Utils/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_TARGET_DEFINITIONS StructuredOpsUtils.td) +mlir_tablegen(DialectUtilsEnums.h.inc -gen-enum-decls) +mlir_tablegen(DialectUtilsEnums.cpp.inc -gen-enum-defs) +add_public_tablegen_target(MLIRDialectUtilsIncGen) +add_dependencies(mlir-headers MLIRDialectUtilsIncGen) diff --git a/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h b/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h index fd470c1330841d..8f7ac8cc2cee60 100644 --- a/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h +++ b/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h @@ -23,6 +23,9 @@ #include "mlir/Support/LLVM.h" #include "llvm/ADT/StringRef.h" +// Pull in all enum type definitions and utility function declarations. +#include "mlir/Dialect/Utils/DialectUtilsEnums.h.inc" + namespace mlir { class OpBuilder; diff --git a/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.td b/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.td new file mode 100644 index 00000000000000..4200343ce3e132 --- /dev/null +++ b/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.td @@ -0,0 +1,23 @@ +//===- StructuredOpsUtils.td - structured ops enums --------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef STRUCTURED_OPS_UTILS +#define STRUCTURED_OPS_UTILS + +include "mlir/IR/OpBase.td" +include "mlir/IR/EnumAttr.td" + +def IteratorType : I32EnumAttr<"IteratorType", "Iterator type", [ + I32EnumAttrCase<"parallel", 0>, + I32EnumAttrCase<"reduction", 1> +]> { + let genSpecializedAttr = 0; + let cppNamespace = "::mlir::utils"; +} + +#endif // STRUCTURED_OPS_UTILS diff --git a/mlir/include/mlir/Interfaces/TilingInterface.h b/mlir/include/mlir/Interfaces/TilingInterface.h index 51f09307074cae..99cbe21b178ca3 100644 --- a/mlir/include/mlir/Interfaces/TilingInterface.h +++ b/mlir/include/mlir/Interfaces/TilingInterface.h @@ -14,6 +14,7 @@ #ifndef MLIR_INTERFACES_TILINGINTERFACE_H_ #define MLIR_INTERFACES_TILINGINTERFACE_H_ +#include "mlir/Dialect/Utils/StructuredOpsUtils.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Operation.h" diff --git a/mlir/include/mlir/Interfaces/TilingInterface.td b/mlir/include/mlir/Interfaces/TilingInterface.td index 222c075e4eebf8..b7af9765393654 100644 --- a/mlir/include/mlir/Interfaces/TilingInterface.td +++ b/mlir/include/mlir/Interfaces/TilingInterface.td @@ -41,13 +41,9 @@ def TilingInterface : OpInterface<"TilingInterface"> { >, InterfaceMethod< /*desc=*/[{ - Returns a list of `StringRef`s that describe the number of - loops and the iterator types of the operation. The list is - expected to use - `getParallelIteratorTypeName()`/`getReductionIteratorTypeName()` - from MLIR Structured Op Utils. + Returns a list of iterator types that describe the number of loops. }], - /*retType=*/"SmallVector", + /*retType=*/"SmallVector", /*methodName=*/"getLoopIteratorTypes", /*args=*/(ins), /*methodBody=*/"", diff --git a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp index 8fe631b25bad40..c691c07f9bc4d6 100644 --- a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp +++ b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp @@ -24,7 +24,7 @@ #include "llvm/ADT/SmallBitVector.h" namespace mlir { -#define GEN_PASS_DEF_CONVERTMEMREFTOLLVM +#define GEN_PASS_DEF_MEMREFTOLLVMCONVERSIONPASS #include "mlir/Conversion/Passes.h.inc" } // namespace mlir @@ -2108,9 +2108,9 @@ void mlir::populateMemRefToLLVMConversionPatterns(LLVMTypeConverter &converter, } namespace { -struct MemRefToLLVMPass - : public impl::ConvertMemRefToLLVMBase { - MemRefToLLVMPass() = default; +struct MemRefToLLVMConversionPass + : public impl::MemRefToLLVMConversionPassBase { + using MemRefToLLVMConversionPassBase::MemRefToLLVMConversionPassBase; void runOnOperation() override { Operation *op = getOperation(); @@ -2137,7 +2137,3 @@ struct MemRefToLLVMPass } }; } // namespace - -std::unique_ptr mlir::createMemRefToLLVMPass() { - return std::make_unique(); -} diff --git a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp index bd693478a35333..1891ce813919a8 100644 --- a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp +++ b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp @@ -627,9 +627,21 @@ OpFoldResult arith::XOrIOp::fold(ArrayRef operands) { if (getLhs() == getRhs()) return Builder(getContext()).getZeroAttr(getType()); /// xor(xor(x, a), a) -> x - if (arith::XOrIOp prev = getLhs().getDefiningOp()) + /// xor(xor(a, x), a) -> x + if (arith::XOrIOp prev = getLhs().getDefiningOp()) { if (prev.getRhs() == getRhs()) return prev.getLhs(); + if (prev.getLhs() == getRhs()) + return prev.getRhs(); + } + /// xor(a, xor(x, a)) -> x + /// xor(a, xor(a, x)) -> x + if (arith::XOrIOp prev = getRhs().getDefiningOp()) { + if (prev.getRhs() == getLhs()) + return prev.getLhs(); + if (prev.getLhs() == getLhs()) + return prev.getRhs(); + } return constFoldBinaryOp( operands, [](APInt a, const APInt &b) { return std::move(a) ^ b; }); diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp index 317240089924f9..6e4c2fc9d7393a 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp @@ -638,7 +638,8 @@ LogicalResult mlir::linalg::detail::verifyStructuredOpInterface(Operation *op) { auto iteratorTypesRange = linalgOp.iterator_types().getAsValueRange(); for (StringRef iteratorType : iteratorTypesRange) { - if (!llvm::is_contained(getAllIteratorTypeNames(), iteratorType)) + if (!llvm::is_contained(getAllIteratorTypeNames(), iteratorType) || + !utils::symbolizeIteratorType(iteratorType).has_value()) return op->emitOpError("unexpected iterator_type (") << iteratorType << ")"; } diff --git a/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp index 870f96282edb2e..9d2a105f5ed687 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp @@ -90,11 +90,13 @@ struct LinalgOpTilingInterface } /// Return the loop iterator type. - SmallVector getLoopIteratorTypes(Operation *op) const { + SmallVector getLoopIteratorTypes(Operation *op) const { LinalgOpTy concreteOp = cast(op); return llvm::to_vector( llvm::map_range(concreteOp.iterator_types(), [](Attribute strAttr) { - return strAttr.cast().getValue(); + return utils::symbolizeIteratorType( + strAttr.cast().getValue()) + .getValue(); })); } diff --git a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp index fa8209ee94f7e7..4b961a76bf4960 100644 --- a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp +++ b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp @@ -76,7 +76,7 @@ void mlir::sparse_tensor::buildSparseCompiler( pm.addNestedPass(createConvertSCFToCFPass()); pm.addPass(createLowerAffinePass()); pm.addPass(createConvertVectorToLLVMPass(options.lowerVectorToLLVMOptions())); - pm.addPass(createMemRefToLLVMPass()); + pm.addPass(createMemRefToLLVMConversionPass()); pm.addNestedPass(createConvertComplexToStandardPass()); pm.addNestedPass( mlir::arith::createArithmeticExpandOpsPass()); diff --git a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp index 1897cde0d425e8..fab2adc4633c7b 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp @@ -36,10 +36,10 @@ struct PadOpTiling : public TilingInterface::ExternalModel { return {initTensor}; } - SmallVector getLoopIteratorTypes(Operation *op) const { + SmallVector getLoopIteratorTypes(Operation *op) const { auto padOp = cast(op); - SmallVector iteratorTypes(padOp.getResultType().getRank(), - getParallelIteratorTypeName()); + SmallVector iteratorTypes( + padOp.getResultType().getRank(), utils::IteratorType::parallel); return iteratorTypes; } diff --git a/mlir/lib/Dialect/Utils/StructuredOpsUtils.cpp b/mlir/lib/Dialect/Utils/StructuredOpsUtils.cpp index 27b570108011ed..d6c33126edef98 100644 --- a/mlir/lib/Dialect/Utils/StructuredOpsUtils.cpp +++ b/mlir/lib/Dialect/Utils/StructuredOpsUtils.cpp @@ -10,6 +10,8 @@ #include "mlir/IR/AffineMap.h" #include "mlir/IR/BuiltinAttributes.h" +#include "mlir/Dialect/Utils/DialectUtilsEnums.cpp.inc" + using namespace mlir; bool mlir::isRowMajorMatmul(ArrayAttr indexingMaps) { diff --git a/mlir/test/Dialect/Arithmetic/canonicalize.mlir b/mlir/test/Dialect/Arithmetic/canonicalize.mlir index 649da010cd3596..632e7af4a26a3e 100644 --- a/mlir/test/Dialect/Arithmetic/canonicalize.mlir +++ b/mlir/test/Dialect/Arithmetic/canonicalize.mlir @@ -1585,3 +1585,51 @@ func.func @test_andi_not_fold_lhs(%arg0 : index) -> index { %2 = arith.andi %1, %arg0 : index return %2 : index } + +// ----- +/// xor(xor(x, a), a) -> x + +// CHECK-LABEL: @xorxor0( +// CHECK-NOT: xori +// CHECK: return %arg0 +func.func @xorxor0(%a : i32, %b : i32) -> i32 { + %c = arith.xori %a, %b : i32 + %res = arith.xori %c, %b : i32 + return %res : i32 +} + +// ----- +/// xor(xor(a, x), a) -> x + +// CHECK-LABEL: @xorxor1( +// CHECK-NOT: xori +// CHECK: return %arg0 +func.func @xorxor1(%a : i32, %b : i32) -> i32 { + %c = arith.xori %b, %a : i32 + %res = arith.xori %c, %b : i32 + return %res : i32 +} + +// ----- +/// xor(a, xor(x, a)) -> x + +// CHECK-LABEL: @xorxor2( +// CHECK-NOT: xori +// CHECK: return %arg0 +func.func @xorxor2(%a : i32, %b : i32) -> i32 { + %c = arith.xori %a, %b : i32 + %res = arith.xori %b, %c : i32 + return %res : i32 +} + +// ----- +/// xor(a, xor(a, x)) -> x + +// CHECK-LABEL: @xorxor3( +// CHECK-NOT: xori +// CHECK: return %arg0 +func.func @xorxor3(%a : i32, %b : i32) -> i32 { + %c = arith.xori %b, %a : i32 + %res = arith.xori %b, %c : i32 + return %res : i32 +} diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp index 262125e65e5b2d..83d6794e730ef5 100644 --- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp @@ -1187,7 +1187,7 @@ void OperationFormat::genElementParser(FormatElement *element, MethodBody &body, if (!elseElements.empty()) { body << " else {\n"; ArrayRef elseElements = - optional->getElseElements(/*parsable=*/true); + optional->getElseElements(/*parseable=*/true); genElementParsers(elseElements.front(), elseElements, /*thenGroup=*/false); body << " }"; diff --git a/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp b/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp index 8dbbd17ba0a08e..69d77fc7336b76 100644 --- a/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp +++ b/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp @@ -55,7 +55,7 @@ static LogicalResult runMLIRPasses(ModuleOp module) { passManager.addPass(createConvertGpuLaunchFuncToVulkanLaunchFuncPass()); LowerToLLVMOptions llvmOptions(module.getContext(), DataLayout(module)); - passManager.addPass(createMemRefToLLVMPass()); + passManager.addPass(createMemRefToLLVMConversionPass()); passManager.nest().addPass(LLVM::createRequestCWrappersPass()); passManager.addPass(createConvertFuncToLLVMPass(llvmOptions)); passManager.addPass(createReconcileUnrealizedCastsPass()); diff --git a/mlir/unittests/ExecutionEngine/Invoke.cpp b/mlir/unittests/ExecutionEngine/Invoke.cpp index 60b5be6198ee61..676293a002eea4 100644 --- a/mlir/unittests/ExecutionEngine/Invoke.cpp +++ b/mlir/unittests/ExecutionEngine/Invoke.cpp @@ -53,7 +53,7 @@ static struct LLVMInitializer { /// dialects lowering to LLVM Dialect. static LogicalResult lowerToLLVMDialect(ModuleOp module) { PassManager pm(module.getContext()); - pm.addPass(mlir::createMemRefToLLVMPass()); + pm.addPass(mlir::createMemRefToLLVMConversionPass()); pm.addNestedPass( mlir::arith::createConvertArithmeticToLLVMPass()); pm.addPass(mlir::createConvertFuncToLLVMPass()); diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index d57eb3df274d55..90aea2c75cc2e6 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -980,6 +980,7 @@ cc_library( ], textual_hdrs = [ "src/string/memory_utils/bcmp_implementations.h", + "src/string/memory_utils/bzero_implementations.h", "src/string/memory_utils/memcmp_implementations.h", "src/string/memory_utils/memcpy_implementations.h", "src/string/memory_utils/memset_implementations.h", diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index c400e4529b12f8..8fade09a5448e0 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -2160,6 +2160,14 @@ gentbl_cc_library( ["-gen-op-doc"], "g3doc/Dialects/NVGPU/NVGPU.md", ), + ( + ["-gen-typedef-decls"], + "include/mlir/Dialect/NVGPU/IR/NVGPUTypes.h.inc", + ), + ( + ["-gen-typedef-defs"], + "include/mlir/Dialect/NVGPU/IR/NVGPUTypes.cpp.inc", + ), ], tblgen = ":mlir-tblgen", td_file = "include/mlir/Dialect/NVGPU/IR/NVGPU.td", @@ -2298,6 +2306,33 @@ cc_library( ], ) +td_library( + name = "DialectUtilsTdFiles", + srcs = [ + "include/mlir/Dialect/Utils/StructuredOpsUtils.td", + ], + includes = ["include"], + deps = [":OpBaseTdFiles"], +) + +gentbl_cc_library( + name = "DialectUtilsIncGen", + strip_include_prefix = "include", + tbl_outs = [ + ( + ["-gen-enum-decls"], + "include/mlir/Dialect/Utils/DialectUtilsEnums.h.inc", + ), + ( + ["-gen-enum-defs"], + "include/mlir/Dialect/Utils/DialectUtilsEnums.cpp.inc", + ), + ], + tblgen = ":mlir-tblgen", + td_file = "include/mlir/Dialect/Utils/StructuredOpsUtils.td", + deps = [":DialectUtilsTdFiles"], +) + cc_library( name = "DialectUtils", srcs = glob([ @@ -2309,6 +2344,7 @@ cc_library( ]), includes = ["include"], deps = [ + ":DialectUtilsIncGen", ":IR", ":Support", "//llvm:Support", @@ -7190,11 +7226,13 @@ td_library( name = "LinalgOpsTdFiles", srcs = [ "include/mlir/Dialect/Linalg/IR/LinalgBase.td", + "include/mlir/Dialect/Linalg/IR/LinalgEnums.td", "include/mlir/Dialect/Linalg/IR/LinalgOps.td", ], includes = ["include"], deps = [ ":ControlFlowInterfacesTdFiles", + ":DialectUtilsTdFiles", ":InferTypeOpInterfaceTdFiles", ":LoopLikeInterfaceTdFiles", ":OpBaseTdFiles", @@ -7242,14 +7280,6 @@ gentbl_cc_library( ], "include/mlir/Dialect/Linalg/IR/LinalgOpsDialect.cpp.inc", ), - ( - ["-gen-enum-decls"], - "include/mlir/Dialect/Linalg/IR/LinalgOpsEnums.h.inc", - ), - ( - ["-gen-enum-defs"], - "include/mlir/Dialect/Linalg/IR/LinalgOpsEnums.cpp.inc", - ), ( ["-gen-attrdef-decls"], "include/mlir/Dialect/Linalg/IR/LinalgOpsAttrDefs.h.inc", @@ -7264,6 +7294,24 @@ gentbl_cc_library( deps = [":LinalgOpsTdFiles"], ) +gentbl_cc_library( + name = "LinalgEnumsIncGen", + strip_include_prefix = "include", + tbl_outs = [ + ( + ["-gen-enum-decls"], + "include/mlir/Dialect/Linalg/IR/LinalgOpsEnums.h.inc", + ), + ( + ["-gen-enum-defs"], + "include/mlir/Dialect/Linalg/IR/LinalgOpsEnums.cpp.inc", + ), + ], + tblgen = ":mlir-tblgen", + td_file = "include/mlir/Dialect/Linalg/IR/LinalgEnums.td", + deps = [":LinalgOpsTdFiles"], +) + gentbl_cc_library( name = "LinalgTransformOpsIncGen", strip_include_prefix = "include", @@ -7519,6 +7567,7 @@ cc_library( ":FuncDialect", ":IR", ":InferTypeOpInterface", + ":LinalgEnumsIncGen", ":LinalgInterfacesIncGen", ":LinalgNamedStructuredOpsYamlIncGen", ":LinalgOpsIncGen", @@ -7709,6 +7758,7 @@ cc_library( hdrs = ["include/mlir/Interfaces/TilingInterface.h"], includes = ["include"], deps = [ + ":DialectUtils", ":IR", ":Support", ":TilingInterfaceIncGen",