Skip to content

[mono][jit] Enable all X86Base intrinsics #91393

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Sep 8, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/mono/mono/arch/amd64/amd64-codegen.h
Original file line number Diff line number Diff line change
@@ -1206,6 +1206,9 @@ typedef union {
#define amd64_movsb_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_movsb(inst); amd64_codegen_post(inst); } while (0)
#define amd64_movsl_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_movsl(inst); amd64_codegen_post(inst); } while (0)
#define amd64_movsd_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_movsd(inst); amd64_codegen_post(inst); } while (0)
#define amd64_bsf_size(inst,dreg,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_bsf ((inst),(dreg)&0x7,(reg)&0x7); amd64_codegen_post (inst); } while (0)
#define amd64_bsr_size(inst,dreg,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_bsr ((inst),(dreg)&0x7,(reg)&0x7); amd64_codegen_post (inst); } while (0)

#define amd64_prefix_size(inst,p,size) do { x86_prefix((inst), p); } while (0)
#define amd64_rdtsc_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_rdtsc(inst); amd64_codegen_post(inst); } while (0)
#define amd64_cmpxchg_reg_reg_size(inst,dreg,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_cmpxchg_reg_reg((inst),((dreg)&0x7),((reg)&0x7)); amd64_codegen_post(inst); } while (0)
3 changes: 3 additions & 0 deletions src/mono/mono/arch/x86/x86-codegen.h
Original file line number Diff line number Diff line change
@@ -1963,6 +1963,9 @@ mono_x86_patch_inline (guchar* code, gpointer target)
#define x86_leave(inst) do { x86_byte (inst, 0xc9); } while (0)
#define x86_sahf(inst) do { x86_byte (inst, 0x9e); } while (0)

#define x86_bsf(inst,dreg,reg) do { x86_byte (inst, 0x0f); x86_byte (inst, 0xbc); x86_reg_emit ((inst), (dreg), (reg)); } while (0)
#define x86_bsr(inst,dreg,reg) do { x86_byte (inst, 0x0f); x86_byte (inst, 0xbd); x86_reg_emit ((inst), (dreg), (reg)); } while (0)

#define x86_fsin(inst) do { x86_codegen_pre(&(inst), 2); x86_byte (inst, 0xd9); x86_byte (inst, 0xfe); } while (0)
#define x86_fcos(inst) do { x86_codegen_pre(&(inst), 2); x86_byte (inst, 0xd9); x86_byte (inst, 0xff); } while (0)
#define x86_fabs(inst) do { x86_codegen_pre(&(inst), 2); x86_byte (inst, 0xd9); x86_byte (inst, 0xe1); } while (0)
10 changes: 10 additions & 0 deletions src/mono/mono/mini/cpu-amd64.mdesc
Original file line number Diff line number Diff line change
@@ -98,6 +98,9 @@ long_div: dest:a src1:a src2:i len:16 clob:d
long_div_un: dest:a src1:a src2:i len:16 clob:d
long_rem: dest:d src1:a src2:i len:16 clob:a
long_rem_un: dest:d src1:a src2:i len:16 clob:a
long_divrem: dest:a src1:a src2:d src3:i len:16 clob:x
long_divrem_un: dest:a src1:a src2:d src3:i len:16 clob:x
long_divrem2: dest:d len:3
long_and: dest:i src1:i src2:i len:3 clob:1
long_or: dest:i src1:i src2:i len:3 clob:1
long_xor: dest:i src1:i src2:i len:3 clob:1
@@ -337,6 +340,10 @@ amd64_lea_membase: dest:i src1:i len:11
x86_xchg: src1:i src2:i clob:x len:2
x86_fpop: src1:f len:3
x86_seteq_membase: src1:b len:9
x86_bsf32: dest:i src1:i len:4
x86_bsf64: dest:l src1:l len:4
x86_bsr32: dest:i src1:i len:4
x86_bsr64: dest:l src1:l len:4

x86_add_reg_membase: dest:i src1:i src2:b clob:1 len:13
x86_sub_reg_membase: dest:i src1:i src2:b clob:1 len:13
@@ -411,6 +418,9 @@ int_div: dest:a src1:a src2:i clob:d len:32
int_div_un: dest:a src1:a src2:i clob:d len:32
int_rem: dest:d src1:a src2:i clob:a len:32
int_rem_un: dest:d src1:a src2:i clob:a len:32
int_divrem: dest:a src1:a src2:d src3:i clob:x len:15
int_divrem_un: dest:a src1:a src2:d src3:i clob:x len:15
int_divrem2: dest:d len:3
int_and: dest:i src1:i src2:i clob:1 len:4
int_or: dest:i src1:i src2:i clob:1 len:4
int_xor: dest:i src1:i src2:i clob:1 len:4
7 changes: 7 additions & 0 deletions src/mono/mono/mini/cpu-x86.mdesc
Original file line number Diff line number Diff line change
@@ -111,6 +111,9 @@ int_div: dest:a src1:a src2:i len:15 clob:d
int_div_un: dest:a src1:a src2:i len:15 clob:d
int_rem: dest:d src1:a src2:i len:15 clob:a
int_rem_un: dest:d src1:a src2:i len:15 clob:a
int_divrem: dest:a src1:a src2:d src3:i clob:x len:15
int_divrem_un: dest:a src1:a src2:d src3:i clob:x len:15
int_divrem2: dest:d len:3
int_and: dest:i src1:i src2:i clob:1 len:2
int_or: dest:i src1:i src2:i clob:1 len:2
int_xor: dest:i src1:i src2:i clob:1 len:2
@@ -303,6 +306,10 @@ x86_fp_load_i8: dest:f src1:b len:7
x86_fp_load_i4: dest:f src1:b len:7
x86_seteq_membase: src1:b len:7
x86_setne_membase: src1:b len:7
x86_bsf32: dest:i src1:i len:4
x86_bsf64: dest:l src1:l len:4
x86_bsr32: dest:i src1:i len:4
x86_bsr64: dest:l src1:l len:4

x86_add_reg_membase: dest:i src1:i src2:b clob:1 len:11
x86_sub_reg_membase: dest:i src1:i src2:b clob:1 len:11
34 changes: 34 additions & 0 deletions src/mono/mono/mini/mini-amd64.c
Original file line number Diff line number Diff line change
@@ -5126,6 +5126,27 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
amd64_div_reg_size (code, ins->sreg2, FALSE, 4);
}
break;
case OP_X86_LDIVREM:
amd64_div_reg (code, ins->sreg3, TRUE);
break;
case OP_X86_IDIVREM:
amd64_div_reg_size (code, ins->sreg3, TRUE, 4);
break;
case OP_X86_LDIVREMU:
amd64_div_reg (code, ins->sreg3, FALSE);
break;
case OP_X86_IDIVREMU:
amd64_div_reg_size (code, ins->sreg3, FALSE, 4);
break;
case OP_X86_IDIVREM2:
if (ins->dreg != AMD64_RDX)
amd64_mov_reg_reg (code, ins->dreg, AMD64_RDX, 4);
break;
case OP_X86_LDIVREM2:
if (ins->dreg != AMD64_RDX)
amd64_mov_reg_reg (code, ins->dreg, AMD64_RDX, 8);
break;

case OP_LMUL_OVF:
amd64_imul_reg_reg (code, ins->sreg1, ins->sreg2);
EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
@@ -5685,6 +5706,19 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
case OP_X86_XCHG:
amd64_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
break;
case OP_X86_BSF32:
amd64_bsf_size (code, ins->dreg, ins->sreg1, 4);
break;
case OP_X86_BSF64:
amd64_bsf_size (code, ins->dreg, ins->sreg1, 8);
break;
case OP_X86_BSR32:
amd64_bsr_size (code, ins->dreg, ins->sreg1, 4);
break;
case OP_X86_BSR64:
amd64_bsr_size (code, ins->dreg, ins->sreg1, 8);
break;

case OP_LOCALLOC:
/* keep alignment */
amd64_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
41 changes: 41 additions & 0 deletions src/mono/mono/mini/mini-llvm.c
Original file line number Diff line number Diff line change
@@ -5640,6 +5640,7 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb)
LLVMBuilderRef builder;
gboolean has_terminator;
LLVMValueRef lhs, rhs, arg3;
LLVMValueRef last_divrem = NULL;
int nins = 0;

cbb = get_end_bb (ctx, bb);
@@ -6638,6 +6639,44 @@ MONO_RESTORE_WARNING
values [ins->dreg] = LLVMBuildXor (builder, tz, width, dname);
break;
}
case OP_X86_IDIVREM:
case OP_X86_LDIVREM: {
const LLVMTypeRef part_type = ins->opcode==OP_X86_IDIVREM ? LLVMInt32Type () : LLVMInt64Type ();
const LLVMTypeRef full_type = ins->opcode==OP_X86_IDIVREM ? LLVMInt64Type () : LLVMInt128Type ();
const LLVMValueRef shift_amount = ins->opcode==OP_X86_IDIVREM ? const_int32 (32) : const_int32 (64);

LLVMValueRef dividend_low = LLVMBuildZExt (builder, convert (ctx, lhs, part_type), full_type, "");
LLVMValueRef dividend_high = LLVMBuildSExt (builder, convert (ctx, rhs, part_type), full_type, "");
LLVMValueRef dividend = LLVMBuildOr (builder, dividend_low,
LLVMBuildShl (builder, dividend_high, shift_amount, ""), "");
LLVMValueRef divisor = LLVMBuildSExt (builder, convert (ctx, arg3, part_type), full_type, "");
// LLVM should fuse the individual Div and Rem instructions into one DIV/IDIV on x86
values [ins->dreg] = LLVMBuildTrunc (builder, LLVMBuildSDiv (builder, dividend, divisor, ""), part_type, "");
last_divrem = LLVMBuildTrunc (builder, LLVMBuildSRem (builder, dividend, divisor, ""), part_type, "");
break;
}
case OP_X86_IDIVREMU:
case OP_X86_LDIVREMU: {
const LLVMTypeRef part_type = ins->opcode==OP_X86_IDIVREMU ? LLVMInt32Type () : LLVMInt64Type ();
const LLVMTypeRef full_type = ins->opcode==OP_X86_IDIVREMU ? LLVMInt64Type () : LLVMInt128Type ();
const LLVMValueRef shift_amount = ins->opcode==OP_X86_IDIVREMU ? const_int32 (32) : const_int32 (64);

LLVMValueRef dividend_low = LLVMBuildZExt (builder, convert (ctx, lhs, part_type), full_type, "");
LLVMValueRef dividend_high = LLVMBuildZExt (builder, convert (ctx, rhs, part_type), full_type, "");
LLVMValueRef dividend = LLVMBuildOr (builder, dividend_low,
LLVMBuildShl (builder, dividend_high, shift_amount, ""), "");
LLVMValueRef divisor = LLVMBuildZExt (builder, convert (ctx, arg3, part_type), full_type, "");
values [ins->dreg] = LLVMBuildTrunc (builder, LLVMBuildUDiv (builder, dividend, divisor, ""), part_type, "");
last_divrem = LLVMBuildTrunc (builder, LLVMBuildURem (builder, dividend, divisor, ""), part_type, "");
break;
}
case OP_X86_IDIVREM2:
case OP_X86_LDIVREM2: {
g_assert (last_divrem);
values [ins->dreg] = last_divrem;
last_divrem = NULL;
break;
}
#endif

case OP_ICONV_TO_I1:
@@ -12043,6 +12082,8 @@ MONO_RESTORE_WARNING
}
}

g_assert (last_divrem == NULL);

if (!ctx_ok (ctx))
return;

22 changes: 22 additions & 0 deletions src/mono/mono/mini/mini-ops.h
Original file line number Diff line number Diff line change
@@ -1184,6 +1184,28 @@ MINI_OP3(OP_MULX_HL64, "mulxhl64", LREG, LREG, LREG, LREG)

#endif

#if defined(TARGET_X86) || defined(TARGET_AMD64)
/*
* These operations exist to facilitate simultaneous int/uint division
* and remainder on x86/x86-64. On that platform the DIV/IDIV instructions
* operate as follows edx:eax/reg32 -> (eax=quotient,edx=remainder). Mono
* ops only support one destination register, so two operations are needed
* to obtain two result values. One would use {long,int}_divrem[_un] first,
* and the corresponding {long_int}_divrem2 immediately afterwards. The
* first instruction returns the quotient and leaves the remainder in the
* edx(rdx) register. The second instruction puts a virtual register over
* edx, so that its value can be used. Note that if the first instruction
* is emitted, the second must be also (there is an assert). This works
* both in LLVM and mini.
*/
MINI_OP3(OP_X86_LDIVREM, "long_divrem", LREG, LREG, LREG, LREG)
MINI_OP3(OP_X86_LDIVREMU, "long_divrem_un", LREG, LREG, LREG, LREG)
MINI_OP3(OP_X86_LDIVREM2, "long_divrem2", LREG, NONE, NONE, NONE)
MINI_OP3(OP_X86_IDIVREM, "int_divrem", IREG, IREG, IREG, IREG)
MINI_OP3(OP_X86_IDIVREMU, "int_divrem_un", IREG, IREG, IREG, IREG)
MINI_OP3(OP_X86_IDIVREM2, "int_divrem2", IREG, NONE, NONE, NONE)
#endif

MINI_OP(OP_CREATE_SCALAR_UNSAFE, "create_scalar_unsafe", XREG, XREG, NONE)
MINI_OP(OP_CREATE_SCALAR, "create_scalar", XREG, XREG, NONE)

19 changes: 19 additions & 0 deletions src/mono/mono/mini/mini-x86.c
Original file line number Diff line number Diff line change
@@ -2877,6 +2877,19 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
}
break;
}
case OP_X86_LDIVREM:
case OP_X86_LDIVREMU:
case OP_X86_LDIVREM2:
g_assert_not_reached ();
break;
case OP_X86_IDIVREM:
case OP_X86_IDIVREMU:
x86_div_reg (code, ins->sreg3, ins->opcode==OP_X86_IDIVREM);
break;
case OP_X86_IDIVREM2:
if (ins->dreg != X86_EDX)
x86_mov_reg_reg (code, ins->dreg, X86_EDX);
break;
case OP_IOR:
x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
break;
@@ -3309,6 +3322,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
case OP_X86_XCHG:
x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
break;
case OP_X86_BSF32:
x86_bsf (code, ins->dreg, ins->sreg1);
break;
case OP_X86_BSR32:
x86_bsr (code, ins->dreg, ins->sreg1);
break;
case OP_LOCALLOC:
/* keep alignment */
x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_LOCALLOC_ALIGNMENT - 1);
46 changes: 45 additions & 1 deletion src/mono/mono/mini/simd-intrinsics.c
Original file line number Diff line number Diff line change
@@ -4599,6 +4599,7 @@ static SimdIntrinsic bmi2_methods [] = {
static SimdIntrinsic x86base_methods [] = {
{SN_BitScanForward},
{SN_BitScanReverse},
{SN_DivRem},
{SN_Pause, OP_XOP, INTRINS_SSE_PAUSE},
{SN_get_IsSupported}
};
@@ -4620,7 +4621,7 @@ static const IntrinGroup supported_x86_intrinsics [] = {
{ "Sse41", MONO_CPU_X86_SSE41, sse41_methods, sizeof (sse41_methods) },
{ "Sse42", MONO_CPU_X86_SSE42, sse42_methods, sizeof (sse42_methods) },
{ "Ssse3", MONO_CPU_X86_SSSE3, ssse3_methods, sizeof (ssse3_methods) },
{ "X86Base", 0, x86base_methods, sizeof (x86base_methods) },
{ "X86Base", MONO_CPU_INITED, x86base_methods, sizeof (x86base_methods), TRUE },
{ "X86Serialize", 0, unsupported, sizeof (unsupported) },
};

@@ -5246,6 +5247,49 @@ emit_x86_intrinsics (
ins->type = is_64bit ? STACK_I8 : STACK_I4;
MONO_ADD_INS (cfg->cbb, ins);
return ins;
case SN_DivRem: {
g_assert (!(TARGET_SIZEOF_VOID_P == 4 && is_64bit)); // x86(no -64) cannot do divisions with 64-bit regs
const MonoStackType divtype = is_64bit ? STACK_I8 : STACK_I4;
const int storetype = is_64bit ? OP_STOREI8_MEMBASE_REG : OP_STOREI4_MEMBASE_REG;
const int obj_size = MONO_ABI_SIZEOF (MonoObject);

// We must decide by the second argument, the first is always unsigned here
MonoTypeEnum arg1_type = fsig->param_count > 1 ? get_underlying_type (fsig->params [1]) : MONO_TYPE_VOID;
MonoInst* div;
MonoInst* div2;

if (type_enum_is_unsigned (arg1_type)) {
MONO_INST_NEW (cfg, div, is_64bit ? OP_X86_LDIVREMU : OP_X86_IDIVREMU);
} else {
MONO_INST_NEW (cfg, div, is_64bit ? OP_X86_LDIVREM : OP_X86_IDIVREM);
}
div->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
div->sreg1 = args [0]->dreg; // we can use this directly, reg alloc knows that the contents will be destroyed
div->sreg2 = args [1]->dreg; // same here as ^
div->sreg3 = args [2]->dreg;
div->type = divtype;
MONO_ADD_INS (cfg->cbb, div);

// Protect the contents of edx/rdx by assigning it a vreg. The instruction must
// immediately follow DIV/IDIV so that edx content is not modified.
// In LLVM the remainder is already calculated, just need to capture it in a vreg.
MONO_INST_NEW (cfg, div2, is_64bit ? OP_X86_LDIVREM2 : OP_X86_IDIVREM2);
div2->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
div2->type = divtype;
MONO_ADD_INS (cfg->cbb, div2);

// TODO: Can the creation of tuple be elided? (e.g. if deconstruction is used)
MonoInst* tuple = mono_compile_create_var (cfg, fsig->ret, OP_LOCAL);
MonoInst* tuple_addr;
EMIT_NEW_TEMPLOADA (cfg, tuple_addr, tuple->inst_c0);

MonoClassField* field1 = mono_class_get_field_from_name_full (tuple->klass, "Item1", NULL);
MONO_EMIT_NEW_STORE_MEMBASE (cfg, storetype, tuple_addr->dreg, field1->offset - obj_size, div->dreg);
MonoClassField* field2 = mono_class_get_field_from_name_full (tuple->klass, "Item2", NULL);
MONO_EMIT_NEW_STORE_MEMBASE (cfg, storetype, tuple_addr->dreg, field2->offset - obj_size, div2->dreg);
EMIT_NEW_TEMPLOAD (cfg, ins, tuple->inst_c0);
return ins;
}
default:
g_assert_not_reached ();
}
1 change: 1 addition & 0 deletions src/mono/mono/mini/simd-methods.h
Original file line number Diff line number Diff line change
@@ -293,6 +293,7 @@ METHOD(ComputeCrc32C)
// X86Base
METHOD(BitScanForward)
METHOD(BitScanReverse)
METHOD(DivRem)
METHOD(Pause)
// Crypto
METHOD(FixedRotate)
9 changes: 0 additions & 9 deletions src/tests/issues.targets
Original file line number Diff line number Diff line change
@@ -1219,15 +1219,6 @@
<ExcludeList Include = "$(XunitTestBinBase)/JIT/HardwareIntrinsics/X86/Sse42.X64/Crc32_*/**">
<Issue>https://github.com/dotnet/runtime/issues/54185</Issue>
</ExcludeList>
<ExcludeList Include = "$(XUnitTestBinBase)/JIT/HardwareIntrinsics/X86/X86Base/X86Base*/**">
<Issue>https://github.com/dotnet/runtime/issues/75767</Issue>
</ExcludeList>
<ExcludeList Include = "$(XUnitTestBinBase)/JIT/HardwareIntrinsics/X86/X86Base/DivRem*/**">
<Issue>https://github.com/dotnet/runtime/issues/75767</Issue>
</ExcludeList>
<ExcludeList Include = "$(XUnitTestBinBase)/JIT/HardwareIntrinsics/X86/X86Base.X64/X86Base.X64*/**">
<Issue>https://github.com/dotnet/runtime/issues/75767</Issue>
</ExcludeList>
<ExcludeList Include="$(XunitTestBinBase)/JIT/Directed/Convert/out_of_range_fp_to_int_conversions/*">
<Issue>Mono does not define out of range fp to int conversions</Issue>
</ExcludeList>