From 3c3cf2552d662f81d82e2a71112864e17043db7b Mon Sep 17 00:00:00 2001 From: Jan Dupej Date: Thu, 27 Apr 2023 12:54:57 +0200 Subject: [PATCH 1/3] ArmBase methods are now intrinsic. --- src/mono/mono/arch/arm64/arm64-codegen.h | 13 +++++++++ src/mono/mono/mini/cpu-arm64.mdesc | 8 ++++++ src/mono/mono/mini/mini-arm64.c | 35 ++++++++++++++++++++++++ src/mono/mono/mini/simd-intrinsics.c | 2 +- 4 files changed, 57 insertions(+), 1 deletion(-) diff --git a/src/mono/mono/arch/arm64/arm64-codegen.h b/src/mono/mono/arch/arm64/arm64-codegen.h index 8729f34841d1d..a559a762d9a76 100644 --- a/src/mono/mono/arch/arm64/arm64-codegen.h +++ b/src/mono/mono/arch/arm64/arm64-codegen.h @@ -703,6 +703,19 @@ arm_encode_arith_imm (int imm, guint32 *shift) #define arm_mulw(p, rd, rn, rm) arm_maddw ((p), (rd), (rn), (rm), ARMREG_RZR) /* FIXME: Missing multiple opcodes */ +#define arm_format_clx(p, sf, op, rd, rn) arm_emit ((p), 0b01011010110000000001000000000000 | (sf) << 31 | (op) << 10 | (rn) << 5 | (rd)) +#define arm_clsw(p, rd, rn) arm_format_clx ((p), 0, 1, (rd), (rn)) +#define arm_clsx(p, rd, rn) arm_format_clx ((p), 1, 1, (rd), (rn)) +#define arm_clzw(p, rd, rn) arm_format_clx ((p), 0, 0, (rd), (rn)) +#define arm_clzx(p, rd, rn) arm_format_clx ((p), 1, 0, (rd), (rn)) + +#define arm_format_mulh(p, u, rd, rn, rm) arm_emit ((p), 0b10011011010000000111110000000000 | (u) << 23 | (rm) << 16 | (rn) << 5 | (rd)) +#define arm_smulh(p, rd, rn, rm) arm_format_mulh ((p), 0, (rd), (rn), (rm)) +#define arm_umulh(p, rd, rn, rm) arm_format_mulh ((p), 1, (rd), (rn), (rm)) + +#define arm_format_rbit(p, sf, rd, rn) arm_emit ((p), 0b01011010110000000000000000000000 | (sf) << 31 | (rn) << 5 | (rd)) +#define arm_rbitw(p, rd, rn) arm_format_rbit ((p), 0, (rd), (rn)) +#define arm_rbitx(p, rd, rn) arm_format_rbit ((p), 1, (rd), (rn)) /* Division */ #define arm_format_div(p, sf, o1, rd, rn, rm) arm_emit ((p), ((sf) << 31) | (0xd6 << 21) | ((rm) << 16) | (0x1 << 11) | ((o1) << 10) | ((rn) << 5) | ((rd) << 0)) diff --git a/src/mono/mono/mini/cpu-arm64.mdesc b/src/mono/mono/mini/cpu-arm64.mdesc index 1480b38aa72b6..147b0d1e0bef0 100644 --- a/src/mono/mono/mini/cpu-arm64.mdesc +++ b/src/mono/mono/mini/cpu-arm64.mdesc @@ -467,6 +467,14 @@ arm64_cbzw: src1:i len:16 arm64_cbzx: src1:i len:16 arm64_cbnzw: src1:i len:16 arm64_cbnzx: src1:i len:16 +lzcnt32: dest:i src1:i len:4 +lzcnt64: dest:i src1:i len:4 +lscnt32: dest:i src1:i len:4 +lscnt64: dest:i src1:i len:4 +xop_i8_i8: dest:i src1:i len:4 +xop_i4_i4: dest:i src1:i len:4 +arm64_smulh: dest:i src1:i src2:i len:4 +arm64_umulh: dest:i src1:i src2:i len:4 atomic_add_i4: dest:i src1:i src2:i len:32 atomic_add_i8: dest:i src1:i src2:i len:32 diff --git a/src/mono/mono/mini/mini-arm64.c b/src/mono/mono/mini/mini-arm64.c index f81099f16221c..c9c6824445720 100644 --- a/src/mono/mono/mini/mini-arm64.c +++ b/src/mono/mono/mini/mini-arm64.c @@ -5328,6 +5328,41 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) arm_strx (code, i, ins->sreg1, MONO_STRUCT_OFFSET (MonoContext, regs) + i * sizeof (target_mgreg_t)); break; + /**** Arm.ArmBase ****/ + case OP_LZCNT32: + arm_clzw (code, dreg, sreg1); + break; + + case OP_LSCNT32: + arm_clsw (code, dreg, sreg1); + break; + + case OP_LZCNT64: + arm_clzx (code, dreg, sreg1); + break; + + case OP_LSCNT64: + arm_clsx (code, dreg, sreg1); + break; + + case OP_ARM64_SMULH: + arm_smulh (code, dreg, sreg1, sreg2); + break; + + case OP_ARM64_UMULH: + arm_umulh (code, dreg, sreg1, sreg2); + break; + + case OP_XOP_I8_I8: + g_assert (ins->inst_c0 == INTRINS_BITREVERSE_I64); + arm_rbitx (code, dreg, sreg1); + break; + + case OP_XOP_I4_I4: + g_assert (ins->inst_c0 == INTRINS_BITREVERSE_I32); + arm_rbitw (code, dreg, sreg1); + break; + default: g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__); g_assert_not_reached (); diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 29bdf030026df..e44d94bf2af15 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -3560,7 +3560,7 @@ static const SimdIntrinsic dp_methods [] = { static const IntrinGroup supported_arm_intrinsics [] = { { "AdvSimd", MONO_CPU_ARM64_NEON, advsimd_methods, sizeof (advsimd_methods) }, { "Aes", MONO_CPU_ARM64_CRYPTO, crypto_aes_methods, sizeof (crypto_aes_methods) }, - { "ArmBase", MONO_CPU_ARM64_BASE, armbase_methods, sizeof (armbase_methods) }, + { "ArmBase", MONO_CPU_ARM64_BASE, armbase_methods, sizeof (armbase_methods), TRUE }, { "Crc32", MONO_CPU_ARM64_CRC, crc32_methods, sizeof (crc32_methods) }, { "Dp", MONO_CPU_ARM64_DP, dp_methods, sizeof (dp_methods) }, { "Rdm", MONO_CPU_ARM64_RDM, rdm_methods, sizeof (rdm_methods) }, From d6f6849d529944398c16ee625fcc8c52453e19bf Mon Sep 17 00:00:00 2001 From: Jan Dupej Date: Thu, 27 Apr 2023 16:08:06 +0200 Subject: [PATCH 2/3] Arm.Yield maps to a NOP. --- src/mono/mono/mini/simd-intrinsics.c | 8 ++++++++ src/mono/mono/mini/simd-methods.h | 1 + 2 files changed, 9 insertions(+) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index e44d94bf2af15..d8850facb9dfc 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -3117,6 +3117,7 @@ static SimdIntrinsic armbase_methods [] = { {SN_LeadingZeroCount}, {SN_MultiplyHigh}, {SN_ReverseElementBits}, + {SN_Yield}, {SN_get_IsSupported}, }; @@ -3596,6 +3597,13 @@ emit_arm64_intrinsics ( (is_64bit ? OP_XOP_I8_I8 : OP_XOP_I4_I4), (is_64bit ? INTRINS_BITREVERSE_I64 : INTRINS_BITREVERSE_I32), arg0_type, fsig, args); + case SN_Yield: { + MonoInst* ins; + MONO_INST_NEW (cfg, ins, OP_NOP); + MONO_ADD_INS (cfg->cbb, ins); + return ins; + } + default: g_assert_not_reached (); // if a new API is added we need to either implement it or change IsSupported to false } diff --git a/src/mono/mono/mini/simd-methods.h b/src/mono/mono/mini/simd-methods.h index 5addf3d1bf700..20db4e837c5f5 100644 --- a/src/mono/mono/mini/simd-methods.h +++ b/src/mono/mono/mini/simd-methods.h @@ -286,6 +286,7 @@ METHOD(CarrylessMultiply) // ArmBase METHOD(LeadingSignCount) METHOD(ReverseElementBits) +METHOD(Yield) // Crc32 METHOD(ComputeCrc32) METHOD(ComputeCrc32C) From 50b4d235c983e08bfd5fee348a4af2d3cd825f56 Mon Sep 17 00:00:00 2001 From: Jan Dupej Date: Fri, 28 Apr 2023 11:49:13 +0200 Subject: [PATCH 3/3] Adding hint operation for arm64's yield. --- src/mono/mono/arch/arm64/arm64-codegen.h | 9 +++++++++ src/mono/mono/mini/cpu-arm64.mdesc | 1 + src/mono/mono/mini/llvm-intrinsics.h | 1 + src/mono/mono/mini/mini-arm64.c | 5 +++++ src/mono/mono/mini/mini-llvm.c | 7 +++++++ src/mono/mono/mini/mini-ops.h | 1 + src/mono/mono/mini/simd-intrinsics.c | 3 ++- 7 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/mono/mono/arch/arm64/arm64-codegen.h b/src/mono/mono/arch/arm64/arm64-codegen.h index a559a762d9a76..0a69031867854 100644 --- a/src/mono/mono/arch/arm64/arm64-codegen.h +++ b/src/mono/mono/arch/arm64/arm64-codegen.h @@ -123,6 +123,15 @@ typedef enum { ARMSIZE_X = 0x3 } ARMSize; +typedef enum { + ARMHINT_NOP = 0x0, + ARMHINT_YIELD = 0x1, + ARMHINT_WFE = 0x2, + ARMHINT_WFI = 0x3, + ARMHINT_SEV = 0x4, + ARMHINT_SEVL = 0x5 +} ARMHint; + #define arm_emit(p, ins) do { *(guint32*)(p) = (ins); (p) += 4; } while (0) /* Overwrite bits [offset,offset+nbits] with value */ diff --git a/src/mono/mono/mini/cpu-arm64.mdesc b/src/mono/mono/mini/cpu-arm64.mdesc index 147b0d1e0bef0..03a0dcae94aff 100644 --- a/src/mono/mono/mini/cpu-arm64.mdesc +++ b/src/mono/mono/mini/cpu-arm64.mdesc @@ -475,6 +475,7 @@ xop_i8_i8: dest:i src1:i len:4 xop_i4_i4: dest:i src1:i len:4 arm64_smulh: dest:i src1:i src2:i len:4 arm64_umulh: dest:i src1:i src2:i len:4 +arm64_hint: len:4 atomic_add_i4: dest:i src1:i src2:i len:32 atomic_add_i8: dest:i src1:i src2:i len:32 diff --git a/src/mono/mono/mini/llvm-intrinsics.h b/src/mono/mono/mini/llvm-intrinsics.h index fab7aeda9d04f..10932f78bc4cc 100644 --- a/src/mono/mono/mini/llvm-intrinsics.h +++ b/src/mono/mono/mini/llvm-intrinsics.h @@ -318,6 +318,7 @@ INTRINS(AARCH64_SHA256SU1, aarch64_crypto_sha256su1, Arm64) INTRINS(AARCH64_SHA256H, aarch64_crypto_sha256h, Arm64) INTRINS(AARCH64_SHA256H2, aarch64_crypto_sha256h2, Arm64) INTRINS(AARCH64_PMULL64, aarch64_neon_pmull64, Arm64) +INTRINS(AARCH64_HINT, aarch64_hint, Arm64) INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_FACGE, aarch64_neon_facge, Arm64, Ftoi, Scalar | V64 | V128 | I4 | I8) INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_FACGT, aarch64_neon_facgt, Arm64, Ftoi, Scalar | V64 | V128 | I4 | I8) diff --git a/src/mono/mono/mini/mini-arm64.c b/src/mono/mono/mini/mini-arm64.c index c9c6824445720..eab2756509189 100644 --- a/src/mono/mono/mini/mini-arm64.c +++ b/src/mono/mono/mini/mini-arm64.c @@ -5363,6 +5363,11 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) arm_rbitw (code, dreg, sreg1); break; + case OP_ARM64_HINT: + g_assert (ins->inst_c0 <= ARMHINT_SEVL); + arm_hint (code, ins->inst_c0); + break; + default: g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__); g_assert_not_reached (); diff --git a/src/mono/mono/mini/mini-llvm.c b/src/mono/mono/mini/mini-llvm.c index 27ebf535b0ba7..6e2dfc3f45b60 100644 --- a/src/mono/mono/mini/mini-llvm.c +++ b/src/mono/mono/mini/mini-llvm.c @@ -10276,6 +10276,13 @@ MONO_RESTORE_WARNING values [ins->dreg] = LLVMBuildSExt (builder, result, ret_t, ""); break; } + case OP_ARM64_HINT: { + g_assert (ins->inst_c0 <= ARMHINT_SEVL); + LLVMValueRef hintid = LLVMConstInt (LLVMInt32Type (), ins->inst_c0, FALSE); + LLVMValueRef args [] = { hintid }; + call_intrins (ctx, INTRINS_AARCH64_HINT, args, ""); + break; + } case OP_ARM64_EXT: { LLVMTypeRef ret_t = LLVMTypeOf (lhs); unsigned int elems = LLVMGetVectorSize (ret_t); diff --git a/src/mono/mono/mini/mini-ops.h b/src/mono/mono/mini/mini-ops.h index 29c6809e24335..00f382b4148d9 100644 --- a/src/mono/mono/mini/mini-ops.h +++ b/src/mono/mono/mini/mini-ops.h @@ -1465,6 +1465,7 @@ MINI_OP(OP_ARM64_CBZX, "arm64_cbzx", NONE, IREG, NONE) /* Branch if sreg1 != 0 */ MINI_OP(OP_ARM64_CBNZW, "arm64_cbnzw", NONE, IREG, NONE) MINI_OP(OP_ARM64_CBNZX, "arm64_cbnzx", NONE, IREG, NONE) +MINI_OP(OP_ARM64_HINT, "arm64_hint", NONE, NONE, NONE) #endif /* Same as OUTARG_VT, but has a dreg */ diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index d8850facb9dfc..406404b0ee7c1 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -3599,7 +3599,8 @@ emit_arm64_intrinsics ( arg0_type, fsig, args); case SN_Yield: { MonoInst* ins; - MONO_INST_NEW (cfg, ins, OP_NOP); + MONO_INST_NEW (cfg, ins, OP_ARM64_HINT); + ins->inst_c0 = ARMHINT_YIELD; MONO_ADD_INS (cfg->cbb, ins); return ins; }