Skip to content

Commit 4ef0cc4

Browse files
committed
Add convert/load/store operations.
1 parent fb23d47 commit 4ef0cc4

File tree

4 files changed

+114
-31
lines changed

4 files changed

+114
-31
lines changed

src/mono/mono/arch/amd64/amd64-codegen.h

+12-1
Original file line numberDiff line numberDiff line change
@@ -801,6 +801,11 @@ typedef union {
801801
#define amd64_sse_movss_membase_reg(inst,basereg,disp,reg) emit_sse_membase_reg ((inst), (basereg), (disp), (reg), 0xf3, 0x0f, 0x11)
802802

803803
#define amd64_sse_movss_reg_membase(inst,dreg,basereg,disp) emit_sse_reg_membase ((inst), (dreg), (basereg), (disp), 0xf3, 0x0f, 0x10)
804+
#define amd64_sse_movlps_reg_membase(inst,dreg,basereg,disp) emit_sse_reg_membase_op2 ((inst), (dreg), (basereg), (disp), 0x0f, 0x12)
805+
#define amd64_sse_movhps_reg_membase(inst,dreg,basereg,disp) emit_sse_reg_membase_op2 ((inst), (dreg), (basereg), (disp), 0x0f, 0x16)
806+
807+
#define amd64_sse_movlps_membase_reg(inst,basereg,disp,reg) emit_sse_membase_reg_op2 ((inst), (basereg), (disp), (reg), 0x0f, 0x13)
808+
#define amd64_sse_movhps_membase_reg(inst,basereg,disp,reg) emit_sse_membase_reg_op2 ((inst), (basereg), (disp), (reg), 0x0f, 0x17)
804809

805810
#define amd64_sse_comisd_reg_reg(inst,dreg,reg) emit_sse_reg_reg ((inst),(dreg),(reg),0x66,0x0f,0x2f)
806811
#define amd64_sse_comiss_reg_reg(inst,dreg,reg) emit_sse_reg_reg ((inst),(dreg),(reg),0x67,0x0f,0x2f)
@@ -813,9 +818,11 @@ typedef union {
813818
#define amd64_sse_cvtss2si_reg_reg(inst,dreg,reg) emit_sse_reg_reg_size ((inst), (dreg), (reg), 0xf3, 0x0f, 0x2d, 8)
814819

815820
#define amd64_sse_cvttsd2si_reg_reg_size(inst,dreg,reg,size) emit_sse_reg_reg_size ((inst), (dreg), (reg), 0xf2, 0x0f, 0x2c, (size))
816-
#define amd64_sse_cvtss2si_reg_reg_size(inst,dreg,reg,size) emit_sse_reg_reg_size ((inst), (dreg), (reg), 0xf3, 0x0f, 0x2c, (size))
821+
#define amd64_sse_cvtss2si_reg_reg_size(inst,dreg,reg,size) emit_sse_reg_reg_size ((inst), (dreg), (reg), 0xf3, 0x0f, 0x2d, (size))
822+
#define amd64_sse_cvttss2si_reg_reg_size(inst,dreg,reg,size) emit_sse_reg_reg_size ((inst), (dreg), (reg), 0xf3, 0x0f, 0x2c, (size))
817823

818824
#define amd64_sse_cvttsd2si_reg_reg(inst,dreg,reg) amd64_sse_cvttsd2si_reg_reg_size ((inst), (dreg), (reg), 8)
825+
#define amd64_sse_cvttss2si_reg_reg(inst,dreg,reg) amd64_sse_cvttsd2si_reg_reg_size ((inst), (dreg), (reg), 8)
819826

820827
#define amd64_sse_cvtsi2sd_reg_reg_size(inst,dreg,reg,size) emit_sse_reg_reg_size ((inst), (dreg), (reg), 0xf2, 0x0f, 0x2a, (size))
821828

@@ -1178,6 +1185,8 @@ typedef union {
11781185

11791186
#define amd64_sse_movntps_reg_membase(inst, dreg, basereg, disp) emit_sse_reg_membase_op2((inst), (dreg), (basereg), (disp), 0x0f, 0x2b)
11801187

1188+
#define amd64_sse_movntps_membase_reg(inst, basereg, disp, reg) emit_sse_membase_reg_op2((inst), (basereg), (disp), (reg), 0x0f, 0x2b)
1189+
11811190
#define amd64_sse_prefetch_reg_membase(inst, arg, basereg, disp) emit_sse_reg_membase_op2((inst), (arg), (basereg), (disp), 0x0f, 0x18)
11821191

11831192
#define amd64_sse_lzcnt_reg_reg_size(inst, dreg, reg, size) emit_sse_reg_reg_size((inst), (dreg), (reg), 0xf3, 0x0f, 0xbd, (size))
@@ -1191,6 +1200,8 @@ typedef union {
11911200
#define amd64_sse_blendpd_reg_reg(inst,dreg,sreg,imm) emit_sse_reg_reg_op4_imm((inst), (dreg), (sreg), 0x66, 0x0f, 0x3a, 0x0d, (imm))
11921201
#define amd64_movq_reg_reg(inst,dreg,sreg) emit_sse_reg_reg ((inst), (dreg), (sreg), 0xf3, 0x0f, 0x7e)
11931202

1203+
#define amd64_sse_sfence(inst) emit_opcode3 ((inst), 0x0f, 0xae, 0xf8)
1204+
11941205
/* Generated from x86-codegen.h */
11951206

11961207
#define amd64_breakpoint_size(inst,size) do { x86_breakpoint(inst); } while (0)

src/mono/mono/mini/cpu-amd64.mdesc

+9
Original file line numberDiff line numberDiff line change
@@ -825,8 +825,11 @@ expand_i4: dest:x src1:i len:11
825825
expand_i8: dest:x src1:i len:11
826826
expand_r4: dest:x src1:f len:16
827827
expand_r8: dest:x src1:f len:13
828+
xop: len:16
828829
xop_x_x_x: dest:x src1:x src2:x len:16 clob:1
829830
xop_x_x: dest:x src1:x len:16 clob:1
831+
xop_i4_x: dest:i src1:x len:16
832+
xop_i8_x: dest:i src1:x len:16
830833
sse41_dpps_imm: dest:x src1:x src2:x len:7 clob:1
831834
sse41_dppd_imm: dest:x src1:x src2:x len:7 clob:1
832835
vector_andnot: dest:x src1:x src2:x len:7 clob:1
@@ -839,7 +842,13 @@ sse_subss: dest:x src1:x src2:x len:7 clob:1
839842
sse_mulss: dest:x src1:x src2:x len:7 clob:1
840843
sse_divss: dest:x src1:x src2:x len:7 clob:1
841844
sse_cvtsi2ss: dest:x src1:x src2:i len:7 clob:1
845+
sse_cvtsi2ss64: dest:x src1:x src2:i len:7 clob:1
842846
sse_movss: dest:x src1:b len:16
847+
sse_movlps_load: dest:x src1:x src2:b len:16 clob:1
848+
sse_movhps_load: dest:x src1:x src2:b len:16 clob:1
849+
sse_movlps_store: src1:i src2:x len:16
850+
sse_movhps_store: src1:i src2:x len:16
851+
sse_movss_store: src1:i src2:x len:16
843852

844853
roundp: dest:x src1:x len:10
845854

src/mono/mono/mini/mini-amd64.c

+58-8
Original file line numberDiff line numberDiff line change
@@ -5923,23 +5923,23 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
59235923
break;
59245924

59255925
case OP_RCONV_TO_I1:
5926-
amd64_sse_cvtss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
5926+
amd64_sse_cvttss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
59275927
amd64_widen_reg (code, ins->dreg, ins->dreg, TRUE, FALSE);
59285928
break;
59295929
case OP_RCONV_TO_U1:
5930-
amd64_sse_cvtss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
5930+
amd64_sse_cvttss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
59315931
amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
59325932
break;
59335933
case OP_RCONV_TO_I2:
5934-
amd64_sse_cvtss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
5934+
amd64_sse_cvttss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
59355935
amd64_widen_reg (code, ins->dreg, ins->dreg, TRUE, TRUE);
59365936
break;
59375937
case OP_RCONV_TO_U2:
5938-
amd64_sse_cvtss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
5938+
amd64_sse_cvttss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
59395939
amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, TRUE);
59405940
break;
59415941
case OP_RCONV_TO_I4:
5942-
amd64_sse_cvtss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
5942+
amd64_sse_cvttss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
59435943
break;
59445944
case OP_RCONV_TO_U4:
59455945
// Use 8 as register size to get Nan/Inf conversion result truncated to 0
@@ -6712,6 +6712,16 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
67126712
}
67136713
break;
67146714
}
6715+
case OP_XOP:
6716+
switch (ins->inst_c0) {
6717+
case INTRINS_SSE_SFENCE:
6718+
amd64_sse_sfence (code);
6719+
break;
6720+
default:
6721+
g_assert_not_reached ();
6722+
break;
6723+
}
6724+
break;
67156725
case OP_XOP_X_X_X: {
67166726
switch (ins->inst_c0) {
67176727
case INTRINS_SSE_PHADDW:
@@ -6746,6 +6756,28 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
67466756
}
67476757
break;
67486758
}
6759+
case OP_XOP_I4_X:
6760+
case OP_XOP_I8_X: {
6761+
switch (ins->inst_c0) {
6762+
case INTRINS_SSE_CVTSS2SI:
6763+
amd64_sse_cvtss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
6764+
break;
6765+
case INTRINS_SSE_CVTTSS2SI:
6766+
amd64_sse_cvttss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
6767+
break;
6768+
case INTRINS_SSE_CVTSS2SI64:
6769+
amd64_sse_cvtss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 8);
6770+
break;
6771+
case INTRINS_SSE_CVTTSS2SI64:
6772+
amd64_sse_cvttss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 8);
6773+
break;
6774+
default:
6775+
g_assert_not_reached ();
6776+
break;
6777+
}
6778+
break;
6779+
}
6780+
67496781
case OP_SSE41_DPPS_IMM:
67506782
amd64_sse_dpps_reg_reg (code, ins->dreg, ins->sreg2, ins->inst_c0);
67516783
break;
@@ -6949,10 +6981,28 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
69496981
amd64_sse_divss_reg_reg (code, ins->dreg, ins->sreg2);
69506982
break;
69516983
case OP_SSE_CVTSI2SS:
6952-
amd64_sse_cvtsi2ss_reg_reg_size (code, ins->dreg, ins->sreg1, 8);
6984+
amd64_sse_cvtsi2ss_reg_reg_size (code, ins->dreg, ins->sreg2, 4);
6985+
break;
6986+
case OP_SSE_CVTSI2SS64:
6987+
amd64_sse_cvtsi2ss_reg_reg_size (code, ins->dreg, ins->sreg2, 8);
69536988
break;
69546989
case OP_SSE_MOVSS:
6955-
amd64_sse_movss_reg_reg (code, ins->dreg, ins->sreg1);
6990+
amd64_sse_movss_reg_membase (code, ins->dreg, ins->sreg1, 0);
6991+
break;
6992+
case OP_SSE_MOVLPS_LOAD:
6993+
amd64_sse_movlps_reg_membase (code, ins->dreg, ins->sreg2, 0);
6994+
break;
6995+
case OP_SSE_MOVHPS_LOAD:
6996+
amd64_sse_movhps_reg_membase (code, ins->dreg, ins->sreg2, 0);
6997+
break;
6998+
case OP_SSE_MOVLPS_STORE:
6999+
amd64_sse_movlps_membase_reg (code, ins->sreg1, 0, ins->sreg2);
7000+
break;
7001+
case OP_SSE_MOVHPS_STORE:
7002+
amd64_sse_movhps_membase_reg (code, ins->sreg1, 0, ins->sreg2);
7003+
break;
7004+
case OP_SSE_MOVSS_STORE:
7005+
amd64_sse_movss_membase_reg (code, ins->sreg1, 0, ins->sreg2);
69567006
break;
69577007

69587008
case OP_EXTRACT_MASK:
@@ -7441,7 +7491,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
74417491
amd64_sse_movaps_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
74427492
break;
74437493
case OP_STOREX_NTA_MEMBASE_REG:
7444-
amd64_sse_movntps_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
7494+
amd64_sse_movntps_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
74457495
break;
74467496
case OP_PREFETCH_MEMBASE:
74477497
amd64_sse_prefetch_reg_membase (code, ins->backend.arg_info, ins->sreg1, ins->inst_offset);

src/mono/mono/mini/simd-intrinsics.c

+35-22
Original file line numberDiff line numberDiff line change
@@ -1025,16 +1025,6 @@ emit_hardware_intrinsics (
10251025
case SN_CompareScalarUnorderedLessThanOrEqual:
10261026
case SN_CompareScalarUnorderedNotEqual:
10271027
case SN_CompareUnordered:
1028-
case SN_ConvertScalarToVector128Single:
1029-
case SN_ConvertToInt32:
1030-
case SN_ConvertToInt32WithTruncation:
1031-
case SN_ConvertToInt64:
1032-
case SN_ConvertToInt64WithTruncation:
1033-
case SN_LoadAlignedVector128:
1034-
case SN_LoadHigh:
1035-
case SN_LoadLow:
1036-
case SN_LoadScalarVector128:
1037-
case SN_LoadVector128:
10381028
case SN_Max:
10391029
case SN_MaxScalar:
10401030
case SN_Min:
@@ -1054,13 +1044,6 @@ emit_hardware_intrinsics (
10541044
case SN_Shuffle:
10551045
case SN_Sqrt:
10561046
case SN_SqrtScalar:
1057-
case SN_Store:
1058-
case SN_StoreAligned:
1059-
case SN_StoreAlignedNonTemporal:
1060-
case SN_StoreFence:
1061-
case SN_StoreHigh:
1062-
case SN_StoreLow:
1063-
case SN_StoreScalar:
10641047
case SN_UnpackHigh:
10651048
case SN_UnpackLow:
10661049
return NULL;
@@ -4325,11 +4308,11 @@ static SimdIntrinsic sse_methods [] = {
43254308
{SN_ConvertToInt64WithTruncation, OP_XOP_I8_X, INTRINS_SSE_CVTTSS2SI64},
43264309
{SN_Divide, OP_XBINOP, OP_FDIV},
43274310
{SN_DivideScalar, OP_SSE_DIVSS},
4328-
{SN_LoadAlignedVector128, OP_SSE_LOADU, 16 /* alignment */},
4311+
{SN_LoadAlignedVector128, OP_LOADX_ALIGNED_MEMBASE},
43294312
{SN_LoadHigh, OP_SSE_MOVHPS_LOAD},
43304313
{SN_LoadLow, OP_SSE_MOVLPS_LOAD},
43314314
{SN_LoadScalarVector128, OP_SSE_MOVSS},
4332-
{SN_LoadVector128, OP_SSE_LOADU, 1 /* alignment */},
4315+
{SN_LoadVector128, OP_LOADX_MEMBASE},
43334316
{SN_Max, OP_XOP_X_X_X, INTRINS_SSE_MAXPS},
43344317
{SN_MaxScalar, OP_XOP_X_X_X, INTRINS_SSE_MAXSS},
43354318
{SN_Min, OP_XOP_X_X_X, INTRINS_SSE_MINPS},
@@ -4352,9 +4335,9 @@ static SimdIntrinsic sse_methods [] = {
43524335
{SN_Shuffle},
43534336
{SN_Sqrt, OP_XOP_X_X, INTRINS_SIMD_SQRT_R4},
43544337
{SN_SqrtScalar},
4355-
{SN_Store, OP_SIMD_STORE, 1 /* alignment */},
4356-
{SN_StoreAligned, OP_SIMD_STORE, 16 /* alignment */},
4357-
{SN_StoreAlignedNonTemporal, OP_SSE_MOVNTPS, 16 /* alignment */},
4338+
{SN_Store},
4339+
{SN_StoreAligned},
4340+
{SN_StoreAlignedNonTemporal},
43584341
{SN_StoreFence, OP_XOP, INTRINS_SSE_SFENCE},
43594342
{SN_StoreHigh, OP_SSE_MOVHPS_STORE},
43604343
{SN_StoreLow, OP_SSE_MOVLPS_STORE},
@@ -4660,6 +4643,36 @@ emit_x86_intrinsics (
46604643
g_assert_not_reached ();
46614644
break;
46624645
}
4646+
case SN_Store: {
4647+
if (!COMPILE_LLVM (cfg)) {
4648+
MonoInst *ins;
4649+
EMIT_NEW_STORE_MEMBASE (cfg, ins, OP_STOREX_MEMBASE, args [0]->dreg, 0, args [1]->dreg);
4650+
ins->klass = klass;
4651+
return ins;
4652+
} else {
4653+
return emit_simd_ins_for_sig (cfg, klass, OP_SIMD_STORE, 1, arg0_type, fsig, args);
4654+
}
4655+
}
4656+
case SN_StoreAligned: {
4657+
if (!COMPILE_LLVM (cfg)) {
4658+
MonoInst *ins;
4659+
EMIT_NEW_STORE_MEMBASE (cfg, ins, OP_STOREX_ALIGNED_MEMBASE_REG, args [0]->dreg, 0, args [1]->dreg);
4660+
ins->klass = klass;
4661+
return ins;
4662+
} else {
4663+
return emit_simd_ins_for_sig (cfg, klass, OP_SIMD_STORE, 16, arg0_type, fsig, args);
4664+
}
4665+
}
4666+
case SN_StoreAlignedNonTemporal: {
4667+
if (!COMPILE_LLVM (cfg)) {
4668+
MonoInst *ins;
4669+
EMIT_NEW_STORE_MEMBASE (cfg, ins, OP_STOREX_NTA_MEMBASE_REG, args [0]->dreg, 0, args [1]->dreg);
4670+
ins->klass = klass;
4671+
return ins;
4672+
} else {
4673+
return emit_simd_ins_for_sig (cfg, klass, OP_SSE_MOVNTPS, 16, arg0_type, fsig, args);
4674+
}
4675+
}
46634676
case SN_LoadScalarVector128:
46644677
return NULL;
46654678
default:

0 commit comments

Comments
 (0)