From 62591c4d8fee3a89797778a58f726c2cbc345172 Mon Sep 17 00:00:00 2001 From: Steve Suzuki Date: Sun, 23 Nov 2025 11:10:42 +0000 Subject: [PATCH 1/2] Handle ARMFp16 and ARMDotProd features in SVE2 In SVE2, enable ARMFp16 and ARMDotProd features even if they are not explicitly enabled. --- src/CodeGen_ARM.cpp | 14 +++++++++----- test/correctness/simd_op_check_sve2.cpp | 2 +- test/warning/emulated_float16.cpp | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/CodeGen_ARM.cpp b/src/CodeGen_ARM.cpp index b4784db6a8a1..bc6b13f95cc9 100644 --- a/src/CodeGen_ARM.cpp +++ b/src/CodeGen_ARM.cpp @@ -212,9 +212,12 @@ class CodeGen_ARM : public CodeGen_Posix { !target.has_feature(Target::SVE2); } + bool has_feature_fp16() const { + return target.features_any_of({Target::ARMFp16, Target::SVE, Target::SVE2}); + } bool is_float16_and_has_feature(const Type &t) const { // NOTE : t.is_float() returns true even in case of BFloat16. We don't include it for now. - return t.code() == Type::Float && t.bits() == 16 && target.has_feature(Target::ARMFp16); + return t.code() == Type::Float && t.bits() == 16 && has_feature_fp16(); } bool supports_call_as_float16(const Call *op) const override; @@ -1004,7 +1007,7 @@ void CodeGen_ARM::init_module() { } for (const ArmIntrinsic &intrin : intrinsic_defs) { - if (intrin.flags & ArmIntrinsic::RequireFp16 && !target.has_feature(Target::ARMFp16)) { + if ((intrin.flags & ArmIntrinsic::RequireFp16) && !has_feature_fp16()) { continue; } @@ -1259,7 +1262,8 @@ void CodeGen_ARM::visit(const Add *op) { } // SDOT, UDOT - if (op->type.is_vector() && target.has_feature(Target::ARMDotProd) && op->type.is_int_or_uint() && op->type.bits() == 32) { + if (op->type.is_vector() && op->type.is_int_or_uint() && op->type.bits() == 32 && + target.features_any_of({Target::ARMDotProd, Target::SVE2})) { // Initial values. Expr init_i32 = Variable::make(Int(32, 0), "init"); Expr init_u32 = Variable::make(UInt(32, 0), "init"); @@ -2056,7 +2060,7 @@ void CodeGen_ARM::visit(const Call *op) { } } - if (target.has_feature(Target::ARMFp16)) { + if (has_feature_fp16()) { auto it = float16_transcendental_remapping.find(op->name); if (it != float16_transcendental_remapping.end()) { // This op doesn't have float16 native function. @@ -2489,7 +2493,7 @@ int CodeGen_ARM::target_vscale() const { bool CodeGen_ARM::supports_call_as_float16(const Call *op) const { bool is_fp16_native = float16_native_funcs.find(op->name) != float16_native_funcs.end(); bool is_fp16_transcendental = float16_transcendental_remapping.find(op->name) != float16_transcendental_remapping.end(); - return target.has_feature(Target::ARMFp16) && (is_fp16_native || is_fp16_transcendental); + return has_feature_fp16() && (is_fp16_native || is_fp16_transcendental); } } // namespace diff --git a/test/correctness/simd_op_check_sve2.cpp b/test/correctness/simd_op_check_sve2.cpp index 112434fd75ab..8a45a2325851 100644 --- a/test/correctness/simd_op_check_sve2.cpp +++ b/test/correctness/simd_op_check_sve2.cpp @@ -1365,7 +1365,7 @@ class SimdOpCheckArmSve : public SimdOpCheckTest { }; bool is_float16_supported() const { - return (target.bits == 64) && target.has_feature(Target::ARMFp16); + return (target.bits == 64) && target.features_any_of({Target::ARMFp16, Target::SVE, Target::SVE2}); } bool can_run_the_code; diff --git a/test/warning/emulated_float16.cpp b/test/warning/emulated_float16.cpp index adc29d45f823..84c3655fdc52 100644 --- a/test/warning/emulated_float16.cpp +++ b/test/warning/emulated_float16.cpp @@ -12,7 +12,7 @@ int main(int argc, char **argv) { // Make sure target has no float16 native support Target t = get_host_target(); - for (auto &feature : {Target::F16C, Target::ARMFp16}) { + for (auto &feature : {Target::F16C, Target::ARMFp16, Target::SVE, Target::SVE2}) { t = t.without_feature(feature); } From 6a58e29dad331dcaa7cb7f636eb6d754c753e4fb Mon Sep 17 00:00:00 2001 From: Steve Suzuki Date: Mon, 8 Dec 2025 10:31:39 +0000 Subject: [PATCH 2/2] Complete implied features of ARMFp16 and ARMDotProd --- src/CodeGen_ARM.cpp | 43 +++++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/src/CodeGen_ARM.cpp b/src/CodeGen_ARM.cpp index bc6b13f95cc9..0640554833cd 100644 --- a/src/CodeGen_ARM.cpp +++ b/src/CodeGen_ARM.cpp @@ -50,6 +50,12 @@ Target complete_arm_target(Target t) { t.set_feature(Target::ARMv84a); } + auto add_implied_feature_if_supported = [](Target &t, Target::Feature super, Target::Feature implied) { + if (t.has_feature(super)) { + t.set_feature(implied); + } + }; + constexpr int num_arm_v8_features = 10; static const Target::Feature arm_v8_features[num_arm_v8_features] = { Target::ARMv89a, @@ -65,9 +71,26 @@ Target complete_arm_target(Target t) { }; for (int i = 0; i < num_arm_v8_features - 1; i++) { - if (t.has_feature(arm_v8_features[i])) { - t.set_feature(arm_v8_features[i + 1]); - } + add_implied_feature_if_supported(t, + arm_v8_features[i], + arm_v8_features[i + 1]); + } + + static const Target::Feature features_with_fp16[] = { + Target::SVE, + Target::SVE2, + }; + + for (const auto &f : features_with_fp16) { + add_implied_feature_if_supported(t, f, Target::ARMFp16); + } + + static const Target::Feature features_with_dotprod[] = { + Target::SVE2, + }; + + for (const auto &f : features_with_dotprod) { + add_implied_feature_if_supported(t, f, Target::ARMDotProd); } return t; @@ -212,12 +235,9 @@ class CodeGen_ARM : public CodeGen_Posix { !target.has_feature(Target::SVE2); } - bool has_feature_fp16() const { - return target.features_any_of({Target::ARMFp16, Target::SVE, Target::SVE2}); - } bool is_float16_and_has_feature(const Type &t) const { // NOTE : t.is_float() returns true even in case of BFloat16. We don't include it for now. - return t.code() == Type::Float && t.bits() == 16 && has_feature_fp16(); + return t.code() == Type::Float && t.bits() == 16 && target.has_feature(Target::ARMFp16); } bool supports_call_as_float16(const Call *op) const override; @@ -1007,7 +1027,7 @@ void CodeGen_ARM::init_module() { } for (const ArmIntrinsic &intrin : intrinsic_defs) { - if ((intrin.flags & ArmIntrinsic::RequireFp16) && !has_feature_fp16()) { + if ((intrin.flags & ArmIntrinsic::RequireFp16) && !target.has_feature(Target::ARMFp16)) { continue; } @@ -1262,8 +1282,7 @@ void CodeGen_ARM::visit(const Add *op) { } // SDOT, UDOT - if (op->type.is_vector() && op->type.is_int_or_uint() && op->type.bits() == 32 && - target.features_any_of({Target::ARMDotProd, Target::SVE2})) { + if (op->type.is_vector() && target.has_feature(Target::ARMDotProd) && op->type.is_int_or_uint() && op->type.bits() == 32) { // Initial values. Expr init_i32 = Variable::make(Int(32, 0), "init"); Expr init_u32 = Variable::make(UInt(32, 0), "init"); @@ -2060,7 +2079,7 @@ void CodeGen_ARM::visit(const Call *op) { } } - if (has_feature_fp16()) { + if (target.has_feature(Target::ARMFp16)) { auto it = float16_transcendental_remapping.find(op->name); if (it != float16_transcendental_remapping.end()) { // This op doesn't have float16 native function. @@ -2493,7 +2512,7 @@ int CodeGen_ARM::target_vscale() const { bool CodeGen_ARM::supports_call_as_float16(const Call *op) const { bool is_fp16_native = float16_native_funcs.find(op->name) != float16_native_funcs.end(); bool is_fp16_transcendental = float16_transcendental_remapping.find(op->name) != float16_transcendental_remapping.end(); - return has_feature_fp16() && (is_fp16_native || is_fp16_transcendental); + return target.has_feature(Target::ARMFp16) && (is_fp16_native || is_fp16_transcendental); } } // namespace