merge main into amd-staging

Change-Id: Id4b2c0bcecfe3bca2ce95e78a1cbb93020db0141
ROCm · Dec 9, 2024 · 465e637 · 465e637
2 parents 2feaf93 + f6c51ea
commit 465e637
Show file tree

Hide file tree

Showing 42 changed files with 2,675 additions and 87 deletions.
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
@@ -648,7 +648,8 @@ elementwise to the input.
 Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±infinity
 
 The integer elementwise intrinsics, including ``__builtin_elementwise_popcount``,
-``__builtin_elementwise_bitreverse``, can be called in a ``constexpr`` context.
+``__builtin_elementwise_bitreverse``, ``__builtin_elementwise_add_sat``,
+``__builtin_elementwise_sub_sat`` can be called in a ``constexpr`` context.
 
 ============================================== ====================================================================== =========================================
          Name                                   Operation                                                             Supported element types

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
@@ -416,12 +416,11 @@ Non-comprehensive list of changes in this release
   The flexible array member (FAM) can now be accessed immediately without causing
   issues with the sanitizer because the counter is automatically set.
 
-- ``__builtin_reduce_add`` function can now be used in constant expressions.
-- ``__builtin_reduce_mul`` function can now be used in constant expressions.
-- ``__builtin_reduce_and`` function can now be used in constant expressions.
-- ``__builtin_reduce_or`` and ``__builtin_reduce_xor`` functions can now be used in constant expressions.
-- ``__builtin_elementwise_popcount`` function can now be used in constant expressions.
-- ``__builtin_elementwise_bitreverse`` function can now be used in constant expressions.
+- The following builtins can now be used in constant expressions: ``__builtin_reduce_add``,
+  ``__builtin_reduce_mul``, ``__builtin_reduce_and``, ``__builtin_reduce_or``,
+  ``__builtin_reduce_xor``, ``__builtin_elementwise_popcount``,
+  ``__builtin_elementwise_bitreverse``, ``__builtin_elementwise_add_sat``,
+  ``__builtin_elementwise_sub_sat``.
 
 New Compiler Flags
 ------------------
@@ -828,6 +827,7 @@ Bug Fixes to C++ Support
 - Fixed a bug where bounds of partially expanded pack indexing expressions were checked too early. (#GH116105)
 - Fixed an assertion failure caused by using ``consteval`` in condition in consumed analyses. (#GH117385)
 - Fix a crash caused by incorrect argument position in merging deduced template arguments. (#GH113659)
+- Fixed a parser crash when using pack indexing as a nested name specifier. (#GH119072) 
 - Fixed an assertion failure caused by mangled names with invalid identifiers. (#GH112205)
 - Fixed an incorrect lambda scope of generic lambdas that caused Clang to crash when computing potential lambda
   captures at the end of a full expression. (#GH115931)

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
@@ -1450,13 +1450,13 @@ def ElementwiseFma : Builtin {
 
 def ElementwiseAddSat : Builtin {
   let Spellings = ["__builtin_elementwise_add_sat"];
-  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
   let Prototype = "void(...)";
 }
 
 def ElementwiseSubSat : Builtin {
   let Spellings = ["__builtin_elementwise_sub_sat"];
-  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
   let Prototype = "void(...)";
 }
 

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
@@ -11339,6 +11339,37 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
 
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
+  case Builtin::BI__builtin_elementwise_add_sat:
+  case Builtin::BI__builtin_elementwise_sub_sat: {
+    APValue SourceLHS, SourceRHS;
+    if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+        !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+      return false;
+
+    QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+    unsigned SourceLen = SourceLHS.getVectorLength();
+    SmallVector<APValue, 4> ResultElements;
+    ResultElements.reserve(SourceLen);
+
+    for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
+      APSInt LHS = SourceLHS.getVectorElt(EltNum).getInt();
+      APSInt RHS = SourceRHS.getVectorElt(EltNum).getInt();
+      switch (E->getBuiltinCallee()) {
+      case Builtin::BI__builtin_elementwise_add_sat:
+        ResultElements.push_back(APValue(
+            APSInt(LHS.isSigned() ? LHS.sadd_sat(RHS) : RHS.uadd_sat(RHS),
+                   DestEltTy->isUnsignedIntegerOrEnumerationType())));
+        break;
+      case Builtin::BI__builtin_elementwise_sub_sat:
+        ResultElements.push_back(APValue(
+            APSInt(LHS.isSigned() ? LHS.ssub_sat(RHS) : RHS.usub_sat(RHS),
+                   DestEltTy->isUnsignedIntegerOrEnumerationType())));
+        break;
+      }
+    }
+
+    return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+  }
   }
 }
 
@@ -13204,6 +13235,25 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
     return Success(Val.rotr(Amt.urem(Val.getBitWidth())), E);
   }
 
+  case Builtin::BI__builtin_elementwise_add_sat: {
+    APSInt LHS, RHS;
+    if (!EvaluateInteger(E->getArg(0), LHS, Info) ||
+        !EvaluateInteger(E->getArg(1), RHS, Info))
+      return false;
+
+    APInt Result = LHS.isSigned() ? LHS.sadd_sat(RHS) : LHS.uadd_sat(RHS);
+    return Success(APSInt(Result, !LHS.isSigned()), E);
+  }
+  case Builtin::BI__builtin_elementwise_sub_sat: {
+    APSInt LHS, RHS;
+    if (!EvaluateInteger(E->getArg(0), LHS, Info) ||
+        !EvaluateInteger(E->getArg(1), RHS, Info))
+      return false;
+
+    APInt Result = LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
+    return Success(APSInt(Result, !LHS.isSigned()), E);
+  }
+
   case Builtin::BIstrlen:
   case Builtin::BIwcslen:
     // A call to strlen is not a constant expression.

diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
@@ -1199,7 +1199,7 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
         // If the token is not annotated, then it might be an expression pack
         // indexing
         if (!TryAnnotateTypeOrScopeToken() &&
-            Tok.is(tok::annot_pack_indexing_type))
+            Tok.isOneOf(tok::annot_pack_indexing_type, tok::annot_cxxscope))
           return ParseCastExpression(ParseKind, isAddressOfOperand, isTypeCast,
                                      isVectorLiteral, NotPrimaryExpression);
       }

diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c
@@ -112,7 +112,7 @@ void test_builtin_elementwise_add_sat(float f1, float f2, double d1, double d2,
   // CHECK-NEXT: call i32 @llvm.sadd.sat.i32(i32 [[IAS1]], i32 [[B]])
   int_as_one = __builtin_elementwise_add_sat(int_as_one, b);
 
-  // CHECK: call i32 @llvm.sadd.sat.i32(i32 1, i32 97)
+  // CHECK: store i64 98, ptr %i1.addr, align 8
   i1 = __builtin_elementwise_add_sat(1, 'a');
 }
 
@@ -165,7 +165,7 @@ void test_builtin_elementwise_sub_sat(float f1, float f2, double d1, double d2,
   // CHECK-NEXT: call i32 @llvm.ssub.sat.i32(i32 [[IAS1]], i32 [[B]])
   int_as_one = __builtin_elementwise_sub_sat(int_as_one, b);
 
-  // CHECK: call i32 @llvm.ssub.sat.i32(i32 1, i32 97)
+  // CHECK: store i64 -96, ptr %i1.addr, align 8
   i1 = __builtin_elementwise_sub_sat(1, 'a');
 }
 

diff --git a/clang/test/Driver/aarch64-fujitsu-monaka.c b/clang/test/Driver/aarch64-fujitsu-monaka.c
@@ -0,0 +1,13 @@
+// RUN: %clang --target=aarch64 -mcpu=fujitsu-monaka -### -c %s 2>&1 | FileCheck -check-prefix=fujitsu-monaka %s
+// RUN: %clang --target=aarch64 -mlittle-endian -mcpu=fujitsu-monaka -### -c %s 2>&1 | FileCheck -check-prefix=fujitsu-monaka %s
+// RUN: %clang --target=aarch64 -mtune=fujitsu-monaka -### -c %s 2>&1 | FileCheck -check-prefix=fujitsu-monaka-TUNE %s
+// RUN: %clang --target=aarch64 -mlittle-endian -mtune=fujitsu-monaka -### -c %s 2>&1 | FileCheck -check-prefix=fujitsu-monaka-TUNE %s
+// fujitsu-monaka: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "fujitsu-monaka"
+// fujitsu-monaka-TUNE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic"
+
+// RUN: %clang --target=arm64 -mcpu=fujitsu-monaka -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-fujitsu-monaka %s
+// RUN: %clang --target=arm64 -mlittle-endian -mcpu=fujitsu-monaka -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-fujitsu-monaka %s
+// RUN: %clang --target=arm64 -mtune=fujitsu-monaka -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-fujitsu-monaka-TUNE %s
+// RUN: %clang --target=arm64 -mlittle-endian -mtune=fujitsu-monaka -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-fujitsu-monaka-TUNE %s
+// ARM64-fujitsu-monaka: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "fujitsu-monaka"
+// ARM64-fujitsu-monaka-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic"
diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c b/clang/test/Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c
@@ -0,0 +1,82 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=fujitsu-monaka | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s
+
+// CHECK: Extensions enabled for the given AArch64 target
+// CHECK-EMPTY:
+// CHECK-NEXT:     Architecture Feature(s)                                Description
+// CHECK-NEXT:     FEAT_AES, FEAT_PMULL                                   Enable AES support
+// CHECK-NEXT:     FEAT_AMUv1                                             Enable Armv8.4-A Activity Monitors extension
+// CHECK-NEXT:     FEAT_AMUv1p1                                           Enable Armv8.6-A Activity Monitors Virtualization support
+// CHECK-NEXT:     FEAT_AdvSIMD                                           Enable Advanced SIMD instructions
+// CHECK-NEXT:     FEAT_BF16                                              Enable BFloat16 Extension
+// CHECK-NEXT:     FEAT_BTI                                               Enable Branch Target Identification
+// CHECK-NEXT:     FEAT_CCIDX                                             Enable Armv8.3-A Extend of the CCSIDR number of sets
+// CHECK-NEXT:     FEAT_CLRBHB                                            Enable Clear BHB instruction
+// CHECK-NEXT:     FEAT_CRC32                                             Enable Armv8.0-A CRC-32 checksum instructions
+// CHECK-NEXT:     FEAT_CSV2_2                                            Enable architectural speculation restriction
+// CHECK-NEXT:     FEAT_DIT                                               Enable Armv8.4-A Data Independent Timing instructions
+// CHECK-NEXT:     FEAT_DPB                                               Enable Armv8.2-A data Cache Clean to Point of Persistence
+// CHECK-NEXT:     FEAT_DPB2                                              Enable Armv8.5-A Cache Clean to Point of Deep Persistence
+// CHECK-NEXT:     FEAT_DotProd                                           Enable dot product support
+// CHECK-NEXT:     FEAT_ECV                                               Enable enhanced counter virtualization extension
+// CHECK-NEXT:     FEAT_ETE                                               Enable Embedded Trace Extension
+// CHECK-NEXT:     FEAT_FAMINMAX                                          Enable FAMIN and FAMAX instructions
+// CHECK-NEXT:     FEAT_FCMA                                              Enable Armv8.3-A Floating-point complex number support
+// CHECK-NEXT:     FEAT_FGT                                               Enable fine grained virtualization traps extension
+// CHECK-NEXT:     FEAT_FHM                                               Enable FP16 FML instructions
+// CHECK-NEXT:     FEAT_FP                                                Enable Armv8.0-A Floating Point Extensions
+// CHECK-NEXT:     FEAT_FP16                                              Enable half-precision floating-point data processing
+// CHECK-NEXT:     FEAT_FP8                                               Enable FP8 instructions
+// CHECK-NEXT:     FEAT_FP8DOT2                                           Enable FP8 2-way dot instructions
+// CHECK-NEXT:     FEAT_FP8DOT4                                           Enable FP8 4-way dot instructions
+// CHECK-NEXT:     FEAT_FP8FMA                                            Enable Armv9.5-A FP8 multiply-add instructions
+// CHECK-NEXT:     FEAT_FPAC                                              Enable Armv8.3-A Pointer Authentication Faulting enhancement
+// CHECK-NEXT:     FEAT_FRINTTS                                           Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int
+// CHECK-NEXT:     FEAT_FlagM                                             Enable Armv8.4-A Flag Manipulation instructions
+// CHECK-NEXT:     FEAT_FlagM2                                            Enable alternative NZCV format for floating point comparisons
+// CHECK-NEXT:     FEAT_HBC                                               Enable Armv8.8-A Hinted Conditional Branches Extension
+// CHECK-NEXT:     FEAT_HCX                                               Enable Armv8.7-A HCRX_EL2 system register
+// CHECK-NEXT:     FEAT_I8MM                                              Enable Matrix Multiply Int8 Extension
+// CHECK-NEXT:     FEAT_JSCVT                                             Enable Armv8.3-A JavaScript FP conversion instructions
+// CHECK-NEXT:     FEAT_LOR                                               Enable Armv8.1-A Limited Ordering Regions extension
+// CHECK-NEXT:     FEAT_LRCPC                                             Enable support for RCPC extension
+// CHECK-NEXT:     FEAT_LRCPC2                                            Enable Armv8.4-A RCPC instructions with Immediate Offsets
+// CHECK-NEXT:     FEAT_LS64, FEAT_LS64_V, FEAT_LS64_ACCDATA              Enable Armv8.7-A LD64B/ST64B Accelerator Extension
+// CHECK-NEXT:     FEAT_LSE                                               Enable Armv8.1-A Large System Extension (LSE) atomic instructions
+// CHECK-NEXT:     FEAT_LSE2                                              Enable Armv8.4-A Large System Extension 2 (LSE2) atomicity rules
+// CHECK-NEXT:     FEAT_LUT                                               Enable Lookup Table instructions
+// CHECK-NEXT:     FEAT_MEC                                               Enable Memory Encryption Contexts Extension
+// CHECK-NEXT:     FEAT_MOPS                                              Enable Armv8.8-A memcpy and memset acceleration instructions
+// CHECK-NEXT:     FEAT_MPAM                                              Enable Armv8.4-A Memory system Partitioning and Monitoring extension
+// CHECK-NEXT:     FEAT_NMI, FEAT_GICv3_NMI                               Enable Armv8.8-A Non-maskable Interrupts
+// CHECK-NEXT:     FEAT_NV, FEAT_NV2                                      Enable Armv8.4-A Nested Virtualization Enchancement
+// CHECK-NEXT:     FEAT_PAN                                               Enable Armv8.1-A Privileged Access-Never extension
+// CHECK-NEXT:     FEAT_PAN2                                              Enable Armv8.2-A PAN s1e1R and s1e1W Variants
+// CHECK-NEXT:     FEAT_PAuth                                             Enable Armv8.3-A Pointer Authentication extension
+// CHECK-NEXT:     FEAT_PMUv3                                             Enable Armv8.0-A PMUv3 Performance Monitors extension
+// CHECK-NEXT:     FEAT_RAS, FEAT_RASv1p1                                 Enable Armv8.0-A Reliability, Availability and Serviceability Extensions
+// CHECK-NEXT:     FEAT_RDM                                               Enable Armv8.1-A Rounding Double Multiply Add/Subtract instructions
+// CHECK-NEXT:     FEAT_RME                                               Enable Realm Management Extension
+// CHECK-NEXT:     FEAT_RNG                                               Enable Random Number generation instructions
+// CHECK-NEXT:     FEAT_SB                                                Enable Armv8.5-A Speculation Barrier
+// CHECK-NEXT:     FEAT_SEL2                                              Enable Armv8.4-A Secure Exception Level 2 extension
+// CHECK-NEXT:     FEAT_SHA1, FEAT_SHA256                                 Enable SHA1 and SHA256 support
+// CHECK-NEXT:     FEAT_SHA3, FEAT_SHA512                                 Enable SHA512 and SHA3 support
+// CHECK-NEXT:     FEAT_SM4, FEAT_SM3                                     Enable SM3 and SM4 support
+// CHECK-NEXT:     FEAT_SPECRES                                           Enable Armv8.5-A execution and data prediction invalidation instructions
+// CHECK-NEXT:     FEAT_SPECRES2                                          Enable Speculation Restriction Instruction
+// CHECK-NEXT:     FEAT_SPEv1p2                                           Enable extra register in the Statistical Profiling Extension
+// CHECK-NEXT:     FEAT_SSBS, FEAT_SSBS2                                  Enable Speculative Store Bypass Safe bit
+// CHECK-NEXT:     FEAT_SVE                                               Enable Scalable Vector Extension (SVE) instructions
+// CHECK-NEXT:     FEAT_SVE2                                              Enable Scalable Vector Extension 2 (SVE2) instructions
+// CHECK-NEXT:     FEAT_SVE_AES, FEAT_SVE_PMULL128                        Enable SVE AES and quadword SVE polynomial multiply instructions
+// CHECK-NEXT:     FEAT_SVE_BitPerm                                       Enable bit permutation SVE2 instructions
+// CHECK-NEXT:     FEAT_SVE_SHA3                                          Enable SHA3 SVE2 instructions
+// CHECK-NEXT:     FEAT_SVE_SM4                                           Enable SM4 SVE2 instructions
+// CHECK-NEXT:     FEAT_TLBIOS, FEAT_TLBIRANGE                            Enable Armv8.4-A TLB Range and Maintenance instructions
+// CHECK-NEXT:     FEAT_TRBE                                              Enable Trace Buffer Extension
+// CHECK-NEXT:     FEAT_TRF                                               Enable Armv8.4-A Trace extension
+// CHECK-NEXT:     FEAT_UAO                                               Enable Armv8.2-A UAO PState
+// CHECK-NEXT:     FEAT_VHE                                               Enable Armv8.1-A Virtual Host extension
+// CHECK-NEXT:     FEAT_WFxT                                              Enable Armv8.7-A WFET and WFIT instruction
+// CHECK-NEXT:     FEAT_XS                                                Enable Armv8.7-A limited-TLB-maintenance instruction
diff --git a/clang/test/Misc/target-invalid-cpu-note/aarch64.c b/clang/test/Misc/target-invalid-cpu-note/aarch64.c
@@ -67,6 +67,7 @@
 // CHECK-SAME: {{^}}, exynos-m4
 // CHECK-SAME: {{^}}, exynos-m5
 // CHECK-SAME: {{^}}, falkor
+// CHECK-SAME: {{^}}, fujitsu-monaka
 // CHECK-SAME: {{^}}, generic
 // CHECK-SAME: {{^}}, grace
 // CHECK-SAME: {{^}}, kryo

diff --git a/clang/test/Parser/cxx2c-pack-indexing.cpp b/clang/test/Parser/cxx2c-pack-indexing.cpp
@@ -74,3 +74,12 @@ struct SS {
     }
 };
 }
+
+namespace GH119072 {
+
+template<typename... Ts>
+void foo() {
+  decltype(Ts...[0]::t) value;
+}
+
+}