Skip to content

Commit 09e796a

Browse files
authored
[mono] Basic SIMD support for System.Numerics.Vector2 on arm64 (#91659)
* enable Basic SIMD support for Vector2 on arm64 * rename Vector2/3/4 methods table
1 parent 3551112 commit 09e796a

File tree

5 files changed

+54
-26
lines changed

5 files changed

+54
-26
lines changed

src/mono/mono/mini/mini-arm64.c

+16-7
Original file line numberDiff line numberDiff line change
@@ -3932,14 +3932,17 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
39323932
break;
39333933

39343934
case OP_XZERO:
3935-
arm_neon_eor_16b (code, dreg, dreg, dreg);
3935+
if (ins->klass && mono_class_value_size (ins->klass, NULL) == 8)
3936+
arm_neon_eor_8b (code, dreg, dreg, dreg);
3937+
else
3938+
arm_neon_eor_16b (code, dreg, dreg, dreg);
39363939
break;
39373940
case OP_XONES:
39383941
arm_neon_eor_16b (code, dreg, dreg, dreg);
39393942
arm_neon_not_16b (code, dreg, dreg);
39403943
break;
39413944
case OP_XEXTRACT:
3942-
code = emit_xextract (code, VREG_FULL, GTMREG_TO_INT (ins->inst_c0), dreg, sreg1);
3945+
code = emit_xextract (code, (ins->inst_c1 == 8) ? VREG_LOW : VREG_FULL, GTMREG_TO_INT (ins->inst_c0), dreg, sreg1);
39433946
break;
39443947
case OP_STOREX_MEMBASE:
39453948
if (ins->klass && mono_class_value_size (ins->klass, NULL) == 8)
@@ -3981,7 +3984,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
39813984
case OP_EXPAND_R4:
39823985
case OP_EXPAND_R8: {
39833986
const int t = get_type_size_macro (ins->inst_c1);
3984-
arm_neon_fdup_e (code, VREG_FULL, t, dreg, sreg1, 0);
3987+
if (ins->opcode == OP_EXPAND_R8)
3988+
arm_neon_fdup_e (code, VREG_FULL, t, dreg, sreg1, 0);
3989+
else
3990+
arm_neon_fdup_e (code, get_vector_size_macro (ins), t, dreg, sreg1, 0);
39853991
break;
39863992
}
39873993
case OP_EXTRACT_I1:
@@ -4004,6 +4010,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
40044010
// Technically, this broadcasts element #inst_c0 to all dest XREG elements; whereas it should
40054011
// set the FREG to the said element. Since FREG and XREG pool is the same on arm64 and the rest
40064012
// of the F/XREG is ignored in FREG mode, this operation remains valid.
4013+
// FIXME: pass VREG_LOW for 64-bit vectors
40074014
arm_neon_fdup_e (code, VREG_FULL, t, dreg, sreg1, GTMREG_TO_UINT32 (ins->inst_c0));
40084015
}
40094016
break;
@@ -4098,17 +4105,19 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
40984105

40994106
case OP_ARM64_XADDV: {
41004107
switch (ins->inst_c0) {
4101-
case INTRINS_AARCH64_ADV_SIMD_FADDV:
4108+
case INTRINS_AARCH64_ADV_SIMD_FADDV: {
4109+
const int width = get_vector_size_macro (ins);
41024110
if (ins->inst_c1 == MONO_TYPE_R8) {
41034111
arm_neon_faddp (code, VREG_FULL, TYPE_F64, dreg, sreg1, sreg1);
41044112
} else if (ins->inst_c1 == MONO_TYPE_R4) {
4105-
arm_neon_faddp (code, VREG_FULL, TYPE_F32, dreg, sreg1, sreg1);
4106-
arm_neon_faddp (code, VREG_FULL, TYPE_F32, dreg, dreg, dreg);
4113+
arm_neon_faddp (code, width, TYPE_F32, dreg, sreg1, sreg1);
4114+
if (width == VREG_FULL)
4115+
arm_neon_faddp (code, width, TYPE_F32, dreg, dreg, dreg);
41074116
} else {
41084117
g_assert_not_reached ();
41094118
}
41104119
break;
4111-
4120+
}
41124121
case INTRINS_AARCH64_ADV_SIMD_UADDV:
41134122
case INTRINS_AARCH64_ADV_SIMD_SADDV:
41144123
if (get_type_size_macro (ins->inst_c1) == TYPE_I64)

src/mono/mono/mini/mini-runtime.c

+9-1
Original file line numberDiff line numberDiff line change
@@ -4469,7 +4469,7 @@ init_class (MonoClass *klass)
44694469

44704470
const char *name = m_class_get_name (klass);
44714471

4472-
#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_WASM)
4472+
#if defined(TARGET_AMD64) || defined(TARGET_WASM)
44734473
/*
44744474
* Some of the intrinsics used by the VectorX classes are only implemented on amd64.
44754475
* The JIT can't handle SIMD types with != 16 size yet.
@@ -4481,6 +4481,14 @@ init_class (MonoClass *klass)
44814481
}
44824482
#endif
44834483

4484+
#ifdef TARGET_ARM64
4485+
if (!strcmp (m_class_get_name_space (klass), "System.Numerics")) {
4486+
// FIXME: Support Vector3 https://github.com/dotnet/runtime/issues/81501
4487+
if (!strcmp (name, "Vector2") || !strcmp (name, "Vector4") || !strcmp (name, "Quaternion") || !strcmp (name, "Plane"))
4488+
mono_class_set_is_simd_type (klass, TRUE);
4489+
}
4490+
#endif
4491+
44844492
if (m_class_is_ginst (klass)) {
44854493
if (!strcmp (name, "Vector`1") || !strcmp (name, "Vector64`1") || !strcmp (name, "Vector128`1") || !strcmp (name, "Vector256`1") || !strcmp (name, "Vector512`1")) {
44864494
MonoGenericClass *gclass = mono_class_try_get_generic_class (klass);

src/mono/mono/mini/mini.c

+3
Original file line numberDiff line numberDiff line change
@@ -4573,6 +4573,9 @@ mini_get_simd_type_info (MonoClass *klass, guint32 *nelems)
45734573
if (!strcmp (klass_name, "Vector4") || !strcmp (klass_name, "Quaternion") || !strcmp (klass_name, "Plane")) {
45744574
*nelems = 4;
45754575
return MONO_TYPE_R4;
4576+
} else if (!strcmp (klass_name, "Vector2")) {
4577+
*nelems = 2;
4578+
return MONO_TYPE_R4;
45764579
} else if (!strcmp (klass_name, "Vector`1") || !strcmp (klass_name, "Vector64`1") || !strcmp (klass_name, "Vector128`1") || !strcmp (klass_name, "Vector256`1") || !strcmp (klass_name, "Vector512`1")) {
45774580
MonoType *etype = mono_class_get_generic_class (klass)->context.class_inst->type_argv [0];
45784581
int size = mono_class_value_size (klass, NULL);

src/mono/mono/mini/simd-arm64.h

+8
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,14 @@ SIMD_OP (64, OP_XCOMPARE_FP, CMP_LE, WTDSS_REV, _UNDEF
2929

3030
SIMD_OP (64, OP_XBINOP, OP_IADD, WTDSS, arm_neon_add, arm_neon_add, arm_neon_add, _UNDEF, _UNDEF, _UNDEF)
3131
SIMD_OP (64, OP_XBINOP, OP_FADD, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fadd, _UNDEF)
32+
SIMD_OP (64, OP_XBINOP, OP_FSUB, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fsub, _UNDEF)
33+
SIMD_OP (64, OP_XBINOP, OP_FMAX, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fmax, _UNDEF)
34+
SIMD_OP (64, OP_XBINOP, OP_FMIN, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fmin, _UNDEF)
35+
SIMD_OP (64, OP_XBINOP, OP_FMUL, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fmul, _UNDEF)
36+
SIMD_OP (64, OP_XBINOP, OP_FDIV, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fdiv, _UNDEF)
37+
SIMD_OP (64, OP_ARM64_XADDV, INTRINS_AARCH64_ADV_SIMD_FADDV, WTDS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, _SKIP, _UNDEF)
38+
SIMD_OP (64, OP_XOP_OVR_X_X, INTRINS_AARCH64_ADV_SIMD_FSQRT, WTDS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fsqrt, _UNDEF)
39+
SIMD_OP (64, OP_XOP_OVR_X_X, INTRINS_AARCH64_ADV_SIMD_FABS, WTDS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fabs, _UNDEF)
3240

3341
/* 128-bit vectors */
3442
/* Width Opcode Function Operand config I8 I16 I32 I64 F32 F64 */

src/mono/mono/mini/simd-intrinsics.c

+18-18
Original file line numberDiff line numberDiff line change
@@ -1845,7 +1845,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
18451845
arg_class = mono_class_from_mono_type_internal (fsig->params [0]);
18461846
}
18471847

1848-
// FIXME: Add support for Vector64 on arm64
1848+
// FIXME: Add support for Vector64 on arm64 https://github.com/dotnet/runtime/issues/90402
18491849
int size = mono_class_value_size (arg_class, NULL);
18501850
if (size != 16)
18511851
return NULL;
@@ -2608,7 +2608,7 @@ emit_vector64_vector128_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSign
26082608
}
26092609

26102610
// System.Numerics.Vector2/Vector3/Vector4, Quaternion, and Plane
2611-
static guint16 vector2_methods[] = {
2611+
static guint16 vector_2_3_4_methods[] = {
26122612
SN_ctor,
26132613
SN_Abs,
26142614
SN_Add,
@@ -2651,12 +2651,12 @@ static G_GNUC_UNUSED MonoInst*
26512651
emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
26522652
{
26532653
MonoInst *ins;
2654-
int id;
2654+
int id, len;
26552655
MonoClass *klass;
26562656
MonoType *type, *etype;
26572657

26582658

2659-
id = lookup_intrins (vector2_methods, sizeof (vector2_methods), cmethod);
2659+
id = lookup_intrins (vector_2_3_4_methods, sizeof (vector_2_3_4_methods), cmethod);
26602660
if (id == -1) {
26612661
// https://github.com/dotnet/runtime/issues/81961
26622662
// check_no_intrinsic_cattr (cmethod);
@@ -2677,6 +2677,7 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
26772677
klass = cmethod->klass;
26782678
type = m_class_get_byval_arg (klass);
26792679
etype = m_class_get_byval_arg (mono_defaults.single_class);
2680+
len = mono_class_value_size (klass, NULL) / 4;
26802681

26812682
// Similar to the cases in emit_sys_numerics_vector_t ()
26822683
switch (id) {
@@ -2706,7 +2707,6 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
27062707
}
27072708
// FIXME: These don't work since Vector2/Vector3 are not handled as SIMD
27082709
#if 0
2709-
int len = mono_class_value_size (klass, NULL) / 4;
27102710
} else if (len == 3 && fsig->param_count == 2 && fsig->params [0]->type == MONO_TYPE_VALUETYPE && fsig->params [1]->type == etype->type) {
27112711
/* Vector3 (Vector2, float) */
27122712
int dreg = load_simd_vreg (cfg, cmethod, args [0], NULL);
@@ -2739,26 +2739,25 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
27392739
case SN_get_Item: {
27402740
// GetElement is marked as Intrinsic, but handling this in get_Item leads to better code
27412741
int src1 = load_simd_vreg (cfg, cmethod, args [0], NULL);
2742-
int elems = 4;
27432742
MonoTypeEnum ty = etype->type;
27442743

27452744
if (args [1]->opcode == OP_ICONST) {
27462745
// If the index is provably a constant, we can generate vastly better code.
27472746
int index = GTMREG_TO_INT (args[1]->inst_c0);
27482747

2749-
if (index < 0 || index >= elems) {
2750-
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, elems);
2748+
if (index < 0 || index >= len) {
2749+
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, len);
27512750
MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "ArgumentOutOfRangeException");
27522751
}
27532752

27542753
int opcode = type_to_extract_op (ty);
27552754
ins = emit_simd_ins (cfg, klass, opcode, src1, -1);
2756-
ins->inst_c0 = args[1]->inst_c0;
2755+
ins->inst_c0 = index;
27572756
ins->inst_c1 = ty;
27582757
return ins;
27592758
}
27602759

2761-
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, elems);
2760+
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, len);
27622761
MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "ArgumentOutOfRangeException");
27632762

27642763
if (COMPILE_LLVM (cfg)) {
@@ -2826,14 +2825,14 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
28262825
g_assert (fsig->hasthis && fsig->param_count == 2 && fsig->params [0]->type == MONO_TYPE_I4 && fsig->params [1]->type == MONO_TYPE_R4);
28272826

28282827
gboolean indirect = FALSE;
2829-
int elems = 4, index = GTMREG_TO_INT (args [1]->inst_c0);
2828+
int index = GTMREG_TO_INT (args [1]->inst_c0);
28302829
int dreg = load_simd_vreg (cfg, cmethod, args [0], &indirect);
28312830

28322831
if (args [1]->opcode == OP_ICONST) {
28332832
// If the index is provably a constant, we can generate vastly better code.
28342833
// Bounds check only if the index is out of range
2835-
if (index < 0 || index >= elems) {
2836-
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, elems);
2834+
if (index < 0 || index >= len) {
2835+
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, len);
28372836
MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "ArgumentOutOfRangeException");
28382837
}
28392838

@@ -2850,7 +2849,7 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
28502849
return ins;
28512850
}
28522851

2853-
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, elems);
2852+
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, len);
28542853
MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "ArgumentOutOfRangeException");
28552854

28562855
if (COMPILE_LLVM (cfg)) {
@@ -2968,7 +2967,7 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
29682967
#endif
29692968
}
29702969
case SN_CopyTo:
2971-
// FIXME:
2970+
// FIXME: https://github.com/dotnet/runtime/issues/91394
29722971
return NULL;
29732972
case SN_Clamp: {
29742973
if (!(!fsig->hasthis && fsig->param_count == 3 && mono_metadata_type_equal (fsig->ret, type) && mono_metadata_type_equal (fsig->params [0], type) && mono_metadata_type_equal (fsig->params [1], type) && mono_metadata_type_equal (fsig->params [2], type)))
@@ -2991,7 +2990,7 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
29912990
case SN_LengthSquared:
29922991
case SN_Lerp:
29932992
case SN_Normalize: {
2994-
// FIXME:
2993+
// FIXME: https://github.com/dotnet/runtime/issues/91394
29952994
return NULL;
29962995
}
29972996
default:
@@ -5896,8 +5895,9 @@ arch_emit_simd_intrinsics (const char *class_ns, const char *class_name, MonoCom
58965895
}
58975896

58985897
if (!strcmp (class_ns, "System.Numerics")) {
5899-
// FIXME: Support Vector2/Vector3
5900-
if (!strcmp (class_name, "Vector4") || !strcmp (class_name, "Quaternion") || !strcmp (class_name, "Plane"))
5898+
// FIXME: Support Vector2 https://github.com/dotnet/runtime/issues/81501
5899+
if (!strcmp (class_name, "Vector2") || !strcmp (class_name, "Vector4") ||
5900+
!strcmp (class_name, "Quaternion") || !strcmp (class_name, "Plane"))
59015901
return emit_vector_2_3_4 (cfg, cmethod, fsig, args);
59025902
}
59035903

0 commit comments

Comments
 (0)