Skip to content

Commit

Permalink
merge main into amd-staging
Browse files Browse the repository at this point in the history
Change-Id: Id4b2c0bcecfe3bca2ce95e78a1cbb93020db0141
  • Loading branch information
Jenkins committed Dec 9, 2024
2 parents 2feaf93 + f6c51ea commit 465e637
Show file tree
Hide file tree
Showing 42 changed files with 2,675 additions and 87 deletions.
3 changes: 2 additions & 1 deletion clang/docs/LanguageExtensions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,8 @@ elementwise to the input.
Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±infinity

The integer elementwise intrinsics, including ``__builtin_elementwise_popcount``,
``__builtin_elementwise_bitreverse``, can be called in a ``constexpr`` context.
``__builtin_elementwise_bitreverse``, ``__builtin_elementwise_add_sat``,
``__builtin_elementwise_sub_sat`` can be called in a ``constexpr`` context.

============================================== ====================================================================== =========================================
Name Operation Supported element types
Expand Down
12 changes: 6 additions & 6 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -416,12 +416,11 @@ Non-comprehensive list of changes in this release
The flexible array member (FAM) can now be accessed immediately without causing
issues with the sanitizer because the counter is automatically set.

- ``__builtin_reduce_add`` function can now be used in constant expressions.
- ``__builtin_reduce_mul`` function can now be used in constant expressions.
- ``__builtin_reduce_and`` function can now be used in constant expressions.
- ``__builtin_reduce_or`` and ``__builtin_reduce_xor`` functions can now be used in constant expressions.
- ``__builtin_elementwise_popcount`` function can now be used in constant expressions.
- ``__builtin_elementwise_bitreverse`` function can now be used in constant expressions.
- The following builtins can now be used in constant expressions: ``__builtin_reduce_add``,
``__builtin_reduce_mul``, ``__builtin_reduce_and``, ``__builtin_reduce_or``,
``__builtin_reduce_xor``, ``__builtin_elementwise_popcount``,
``__builtin_elementwise_bitreverse``, ``__builtin_elementwise_add_sat``,
``__builtin_elementwise_sub_sat``.

New Compiler Flags
------------------
Expand Down Expand Up @@ -828,6 +827,7 @@ Bug Fixes to C++ Support
- Fixed a bug where bounds of partially expanded pack indexing expressions were checked too early. (#GH116105)
- Fixed an assertion failure caused by using ``consteval`` in condition in consumed analyses. (#GH117385)
- Fix a crash caused by incorrect argument position in merging deduced template arguments. (#GH113659)
- Fixed a parser crash when using pack indexing as a nested name specifier. (#GH119072)
- Fixed an assertion failure caused by mangled names with invalid identifiers. (#GH112205)
- Fixed an incorrect lambda scope of generic lambdas that caused Clang to crash when computing potential lambda
captures at the end of a full expression. (#GH115931)
Expand Down
4 changes: 2 additions & 2 deletions clang/include/clang/Basic/Builtins.td
Original file line number Diff line number Diff line change
Expand Up @@ -1450,13 +1450,13 @@ def ElementwiseFma : Builtin {

def ElementwiseAddSat : Builtin {
let Spellings = ["__builtin_elementwise_add_sat"];
let Attributes = [NoThrow, Const, CustomTypeChecking];
let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
let Prototype = "void(...)";
}

def ElementwiseSubSat : Builtin {
let Spellings = ["__builtin_elementwise_sub_sat"];
let Attributes = [NoThrow, Const, CustomTypeChecking];
let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
let Prototype = "void(...)";
}

Expand Down
50 changes: 50 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11339,6 +11339,37 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {

return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
case Builtin::BI__builtin_elementwise_add_sat:
case Builtin::BI__builtin_elementwise_sub_sat: {
APValue SourceLHS, SourceRHS;
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
!EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
return false;

QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
unsigned SourceLen = SourceLHS.getVectorLength();
SmallVector<APValue, 4> ResultElements;
ResultElements.reserve(SourceLen);

for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
APSInt LHS = SourceLHS.getVectorElt(EltNum).getInt();
APSInt RHS = SourceRHS.getVectorElt(EltNum).getInt();
switch (E->getBuiltinCallee()) {
case Builtin::BI__builtin_elementwise_add_sat:
ResultElements.push_back(APValue(
APSInt(LHS.isSigned() ? LHS.sadd_sat(RHS) : RHS.uadd_sat(RHS),
DestEltTy->isUnsignedIntegerOrEnumerationType())));
break;
case Builtin::BI__builtin_elementwise_sub_sat:
ResultElements.push_back(APValue(
APSInt(LHS.isSigned() ? LHS.ssub_sat(RHS) : RHS.usub_sat(RHS),
DestEltTy->isUnsignedIntegerOrEnumerationType())));
break;
}
}

return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
}
}

Expand Down Expand Up @@ -13204,6 +13235,25 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
return Success(Val.rotr(Amt.urem(Val.getBitWidth())), E);
}

case Builtin::BI__builtin_elementwise_add_sat: {
APSInt LHS, RHS;
if (!EvaluateInteger(E->getArg(0), LHS, Info) ||
!EvaluateInteger(E->getArg(1), RHS, Info))
return false;

APInt Result = LHS.isSigned() ? LHS.sadd_sat(RHS) : LHS.uadd_sat(RHS);
return Success(APSInt(Result, !LHS.isSigned()), E);
}
case Builtin::BI__builtin_elementwise_sub_sat: {
APSInt LHS, RHS;
if (!EvaluateInteger(E->getArg(0), LHS, Info) ||
!EvaluateInteger(E->getArg(1), RHS, Info))
return false;

APInt Result = LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
return Success(APSInt(Result, !LHS.isSigned()), E);
}

case Builtin::BIstrlen:
case Builtin::BIwcslen:
// A call to strlen is not a constant expression.
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Parse/ParseExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1199,7 +1199,7 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
// If the token is not annotated, then it might be an expression pack
// indexing
if (!TryAnnotateTypeOrScopeToken() &&
Tok.is(tok::annot_pack_indexing_type))
Tok.isOneOf(tok::annot_pack_indexing_type, tok::annot_cxxscope))
return ParseCastExpression(ParseKind, isAddressOfOperand, isTypeCast,
isVectorLiteral, NotPrimaryExpression);
}
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGen/builtins-elementwise-math.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ void test_builtin_elementwise_add_sat(float f1, float f2, double d1, double d2,
// CHECK-NEXT: call i32 @llvm.sadd.sat.i32(i32 [[IAS1]], i32 [[B]])
int_as_one = __builtin_elementwise_add_sat(int_as_one, b);

// CHECK: call i32 @llvm.sadd.sat.i32(i32 1, i32 97)
// CHECK: store i64 98, ptr %i1.addr, align 8
i1 = __builtin_elementwise_add_sat(1, 'a');
}

Expand Down Expand Up @@ -165,7 +165,7 @@ void test_builtin_elementwise_sub_sat(float f1, float f2, double d1, double d2,
// CHECK-NEXT: call i32 @llvm.ssub.sat.i32(i32 [[IAS1]], i32 [[B]])
int_as_one = __builtin_elementwise_sub_sat(int_as_one, b);

// CHECK: call i32 @llvm.ssub.sat.i32(i32 1, i32 97)
// CHECK: store i64 -96, ptr %i1.addr, align 8
i1 = __builtin_elementwise_sub_sat(1, 'a');
}

Expand Down
13 changes: 13 additions & 0 deletions clang/test/Driver/aarch64-fujitsu-monaka.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// RUN: %clang --target=aarch64 -mcpu=fujitsu-monaka -### -c %s 2>&1 | FileCheck -check-prefix=fujitsu-monaka %s
// RUN: %clang --target=aarch64 -mlittle-endian -mcpu=fujitsu-monaka -### -c %s 2>&1 | FileCheck -check-prefix=fujitsu-monaka %s
// RUN: %clang --target=aarch64 -mtune=fujitsu-monaka -### -c %s 2>&1 | FileCheck -check-prefix=fujitsu-monaka-TUNE %s
// RUN: %clang --target=aarch64 -mlittle-endian -mtune=fujitsu-monaka -### -c %s 2>&1 | FileCheck -check-prefix=fujitsu-monaka-TUNE %s
// fujitsu-monaka: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "fujitsu-monaka"
// fujitsu-monaka-TUNE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic"

// RUN: %clang --target=arm64 -mcpu=fujitsu-monaka -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-fujitsu-monaka %s
// RUN: %clang --target=arm64 -mlittle-endian -mcpu=fujitsu-monaka -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-fujitsu-monaka %s
// RUN: %clang --target=arm64 -mtune=fujitsu-monaka -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-fujitsu-monaka-TUNE %s
// RUN: %clang --target=arm64 -mlittle-endian -mtune=fujitsu-monaka -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-fujitsu-monaka-TUNE %s
// ARM64-fujitsu-monaka: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "fujitsu-monaka"
// ARM64-fujitsu-monaka-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic"
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// REQUIRES: aarch64-registered-target
// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=fujitsu-monaka | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s

// CHECK: Extensions enabled for the given AArch64 target
// CHECK-EMPTY:
// CHECK-NEXT: Architecture Feature(s) Description
// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support
// CHECK-NEXT: FEAT_AMUv1 Enable Armv8.4-A Activity Monitors extension
// CHECK-NEXT: FEAT_AMUv1p1 Enable Armv8.6-A Activity Monitors Virtualization support
// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions
// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension
// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification
// CHECK-NEXT: FEAT_CCIDX Enable Armv8.3-A Extend of the CCSIDR number of sets
// CHECK-NEXT: FEAT_CLRBHB Enable Clear BHB instruction
// CHECK-NEXT: FEAT_CRC32 Enable Armv8.0-A CRC-32 checksum instructions
// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction
// CHECK-NEXT: FEAT_DIT Enable Armv8.4-A Data Independent Timing instructions
// CHECK-NEXT: FEAT_DPB Enable Armv8.2-A data Cache Clean to Point of Persistence
// CHECK-NEXT: FEAT_DPB2 Enable Armv8.5-A Cache Clean to Point of Deep Persistence
// CHECK-NEXT: FEAT_DotProd Enable dot product support
// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension
// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension
// CHECK-NEXT: FEAT_FAMINMAX Enable FAMIN and FAMAX instructions
// CHECK-NEXT: FEAT_FCMA Enable Armv8.3-A Floating-point complex number support
// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension
// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions
// CHECK-NEXT: FEAT_FP Enable Armv8.0-A Floating Point Extensions
// CHECK-NEXT: FEAT_FP16 Enable half-precision floating-point data processing
// CHECK-NEXT: FEAT_FP8 Enable FP8 instructions
// CHECK-NEXT: FEAT_FP8DOT2 Enable FP8 2-way dot instructions
// CHECK-NEXT: FEAT_FP8DOT4 Enable FP8 4-way dot instructions
// CHECK-NEXT: FEAT_FP8FMA Enable Armv9.5-A FP8 multiply-add instructions
// CHECK-NEXT: FEAT_FPAC Enable Armv8.3-A Pointer Authentication Faulting enhancement
// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int
// CHECK-NEXT: FEAT_FlagM Enable Armv8.4-A Flag Manipulation instructions
// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons
// CHECK-NEXT: FEAT_HBC Enable Armv8.8-A Hinted Conditional Branches Extension
// CHECK-NEXT: FEAT_HCX Enable Armv8.7-A HCRX_EL2 system register
// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension
// CHECK-NEXT: FEAT_JSCVT Enable Armv8.3-A JavaScript FP conversion instructions
// CHECK-NEXT: FEAT_LOR Enable Armv8.1-A Limited Ordering Regions extension
// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension
// CHECK-NEXT: FEAT_LRCPC2 Enable Armv8.4-A RCPC instructions with Immediate Offsets
// CHECK-NEXT: FEAT_LS64, FEAT_LS64_V, FEAT_LS64_ACCDATA Enable Armv8.7-A LD64B/ST64B Accelerator Extension
// CHECK-NEXT: FEAT_LSE Enable Armv8.1-A Large System Extension (LSE) atomic instructions
// CHECK-NEXT: FEAT_LSE2 Enable Armv8.4-A Large System Extension 2 (LSE2) atomicity rules
// CHECK-NEXT: FEAT_LUT Enable Lookup Table instructions
// CHECK-NEXT: FEAT_MEC Enable Memory Encryption Contexts Extension
// CHECK-NEXT: FEAT_MOPS Enable Armv8.8-A memcpy and memset acceleration instructions
// CHECK-NEXT: FEAT_MPAM Enable Armv8.4-A Memory system Partitioning and Monitoring extension
// CHECK-NEXT: FEAT_NMI, FEAT_GICv3_NMI Enable Armv8.8-A Non-maskable Interrupts
// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable Armv8.4-A Nested Virtualization Enchancement
// CHECK-NEXT: FEAT_PAN Enable Armv8.1-A Privileged Access-Never extension
// CHECK-NEXT: FEAT_PAN2 Enable Armv8.2-A PAN s1e1R and s1e1W Variants
// CHECK-NEXT: FEAT_PAuth Enable Armv8.3-A Pointer Authentication extension
// CHECK-NEXT: FEAT_PMUv3 Enable Armv8.0-A PMUv3 Performance Monitors extension
// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable Armv8.0-A Reliability, Availability and Serviceability Extensions
// CHECK-NEXT: FEAT_RDM Enable Armv8.1-A Rounding Double Multiply Add/Subtract instructions
// CHECK-NEXT: FEAT_RME Enable Realm Management Extension
// CHECK-NEXT: FEAT_RNG Enable Random Number generation instructions
// CHECK-NEXT: FEAT_SB Enable Armv8.5-A Speculation Barrier
// CHECK-NEXT: FEAT_SEL2 Enable Armv8.4-A Secure Exception Level 2 extension
// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support
// CHECK-NEXT: FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support
// CHECK-NEXT: FEAT_SM4, FEAT_SM3 Enable SM3 and SM4 support
// CHECK-NEXT: FEAT_SPECRES Enable Armv8.5-A execution and data prediction invalidation instructions
// CHECK-NEXT: FEAT_SPECRES2 Enable Speculation Restriction Instruction
// CHECK-NEXT: FEAT_SPEv1p2 Enable extra register in the Statistical Profiling Extension
// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit
// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions
// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions
// CHECK-NEXT: FEAT_SVE_AES, FEAT_SVE_PMULL128 Enable SVE AES and quadword SVE polynomial multiply instructions
// CHECK-NEXT: FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions
// CHECK-NEXT: FEAT_SVE_SHA3 Enable SHA3 SVE2 instructions
// CHECK-NEXT: FEAT_SVE_SM4 Enable SM4 SVE2 instructions
// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable Armv8.4-A TLB Range and Maintenance instructions
// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension
// CHECK-NEXT: FEAT_TRF Enable Armv8.4-A Trace extension
// CHECK-NEXT: FEAT_UAO Enable Armv8.2-A UAO PState
// CHECK-NEXT: FEAT_VHE Enable Armv8.1-A Virtual Host extension
// CHECK-NEXT: FEAT_WFxT Enable Armv8.7-A WFET and WFIT instruction
// CHECK-NEXT: FEAT_XS Enable Armv8.7-A limited-TLB-maintenance instruction
1 change: 1 addition & 0 deletions clang/test/Misc/target-invalid-cpu-note/aarch64.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
// CHECK-SAME: {{^}}, exynos-m4
// CHECK-SAME: {{^}}, exynos-m5
// CHECK-SAME: {{^}}, falkor
// CHECK-SAME: {{^}}, fujitsu-monaka
// CHECK-SAME: {{^}}, generic
// CHECK-SAME: {{^}}, grace
// CHECK-SAME: {{^}}, kryo
Expand Down
9 changes: 9 additions & 0 deletions clang/test/Parser/cxx2c-pack-indexing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,12 @@ struct SS {
}
};
}

namespace GH119072 {

template<typename... Ts>
void foo() {
decltype(Ts...[0]::t) value;
}

}
Loading

0 comments on commit 465e637

Please sign in to comment.