Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[mono] Basic SIMD support for System.Numerics.Vector2 on arm64 #91659

Merged
merged 17 commits into from
Sep 21, 2023
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 14 additions & 8 deletions src/mono/mono/mini/mini-arm64.c
Original file line number Diff line number Diff line change
Expand Up @@ -3917,14 +3917,17 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
break;

case OP_XZERO:
arm_neon_eor_16b (code, dreg, dreg, dreg);
if (ins->klass && mono_class_value_size (ins->klass, NULL) == 8)
arm_neon_eor_8b (code, dreg, dreg, dreg);
else
arm_neon_eor_16b (code, dreg, dreg, dreg);
break;
case OP_XONES:
arm_neon_eor_16b (code, dreg, dreg, dreg);
arm_neon_not_16b (code, dreg, dreg);
break;
case OP_XEXTRACT:
code = emit_xextract (code, VREG_FULL, GTMREG_TO_INT (ins->inst_c0), dreg, sreg1);
code = emit_xextract (code, (ins->inst_c1 == 8) ? VREG_LOW : VREG_FULL, GTMREG_TO_INT (ins->inst_c0), dreg, sreg1);
break;
case OP_STOREX_MEMBASE:
if (ins->klass && mono_class_value_size (ins->klass, NULL) == 8)
Expand Down Expand Up @@ -3966,7 +3969,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
case OP_EXPAND_R4:
case OP_EXPAND_R8: {
const int t = get_type_size_macro (ins->inst_c1);
arm_neon_fdup_e (code, VREG_FULL, t, dreg, sreg1, 0);
arm_neon_fdup_e (code, get_vector_size_macro (ins), t, dreg, sreg1, 0);
matouskozak marked this conversation as resolved.
Show resolved Hide resolved
break;
}
case OP_EXTRACT_I1:
Expand All @@ -3989,6 +3992,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
// Technically, this broadcasts element #inst_c0 to all dest XREG elements; whereas it should
// set the FREG to the said element. Since FREG and XREG pool is the same on arm64 and the rest
// of the F/XREG is ignored in FREG mode, this operation remains valid.
// FIXME: pass VREG_LOW for 64-bit vectors
arm_neon_fdup_e (code, VREG_FULL, t, dreg, sreg1, GTMREG_TO_UINT32 (ins->inst_c0));
}
break;
Expand Down Expand Up @@ -4083,17 +4087,19 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)

case OP_ARM64_XADDV: {
switch (ins->inst_c0) {
case INTRINS_AARCH64_ADV_SIMD_FADDV:
case INTRINS_AARCH64_ADV_SIMD_FADDV: {
const int width = get_vector_size_macro (ins);
if (ins->inst_c1 == MONO_TYPE_R8) {
arm_neon_faddp (code, VREG_FULL, TYPE_F64, dreg, sreg1, sreg1);
arm_neon_faddp (code, width, TYPE_F64, dreg, sreg1, sreg1);
matouskozak marked this conversation as resolved.
Show resolved Hide resolved
} else if (ins->inst_c1 == MONO_TYPE_R4) {
arm_neon_faddp (code, VREG_FULL, TYPE_F32, dreg, sreg1, sreg1);
arm_neon_faddp (code, VREG_FULL, TYPE_F32, dreg, dreg, dreg);
arm_neon_faddp (code, width, TYPE_F32, dreg, sreg1, sreg1);
if (width == VREG_FULL)
arm_neon_faddp (code, width, TYPE_F32, dreg, dreg, dreg);
} else {
g_assert_not_reached ();
}
break;

}
case INTRINS_AARCH64_ADV_SIMD_UADDV:
case INTRINS_AARCH64_ADV_SIMD_SADDV:
if (get_type_size_macro (ins->inst_c1) == TYPE_I64)
Expand Down
10 changes: 9 additions & 1 deletion src/mono/mono/mini/mini-runtime.c
Original file line number Diff line number Diff line change
Expand Up @@ -4469,7 +4469,7 @@ init_class (MonoClass *klass)

const char *name = m_class_get_name (klass);

#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_WASM)
#if defined(TARGET_AMD64) || defined(TARGET_WASM)
/*
* Some of the intrinsics used by the VectorX classes are only implemented on amd64.
* The JIT can't handle SIMD types with != 16 size yet.
Expand All @@ -4481,6 +4481,14 @@ init_class (MonoClass *klass)
}
#endif

#ifdef TARGET_ARM64
if (!strcmp (m_class_get_name_space (klass), "System.Numerics")) {
// FIXME: Support Vector3 https://github.com/dotnet/runtime/issues/81501
if (!strcmp (name, "Vector2") || !strcmp (name, "Vector4") || !strcmp (name, "Quaternion") || !strcmp (name, "Plane"))
mono_class_set_is_simd_type (klass, TRUE);
}
#endif

if (m_class_is_ginst (klass)) {
if (!strcmp (name, "Vector`1") || !strcmp (name, "Vector64`1") || !strcmp (name, "Vector128`1") || !strcmp (name, "Vector256`1") || !strcmp (name, "Vector512`1")) {
MonoGenericClass *gclass = mono_class_try_get_generic_class (klass);
Expand Down
3 changes: 3 additions & 0 deletions src/mono/mono/mini/mini.c
Original file line number Diff line number Diff line change
Expand Up @@ -4573,6 +4573,9 @@ mini_get_simd_type_info (MonoClass *klass, guint32 *nelems)
if (!strcmp (klass_name, "Vector4") || !strcmp (klass_name, "Quaternion") || !strcmp (klass_name, "Plane")) {
*nelems = 4;
return MONO_TYPE_R4;
} else if (!strcmp (klass_name, "Vector2")) {
*nelems = 2;
return MONO_TYPE_R4;
} else if (!strcmp (klass_name, "Vector`1") || !strcmp (klass_name, "Vector64`1") || !strcmp (klass_name, "Vector128`1") || !strcmp (klass_name, "Vector256`1") || !strcmp (klass_name, "Vector512`1")) {
MonoType *etype = mono_class_get_generic_class (klass)->context.class_inst->type_argv [0];
int size = mono_class_value_size (klass, NULL);
Expand Down
8 changes: 8 additions & 0 deletions src/mono/mono/mini/simd-arm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ SIMD_OP (64, OP_XCOMPARE_FP, CMP_LE, WTDSS_REV, _UNDEF

SIMD_OP (64, OP_XBINOP, OP_IADD, WTDSS, arm_neon_add, arm_neon_add, arm_neon_add, _UNDEF, _UNDEF, _UNDEF)
SIMD_OP (64, OP_XBINOP, OP_FADD, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fadd, _UNDEF)
SIMD_OP (64, OP_XBINOP, OP_FSUB, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fsub, _UNDEF)
SIMD_OP (64, OP_XBINOP, OP_FMAX, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fmax, _UNDEF)
SIMD_OP (64, OP_XBINOP, OP_FMIN, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fmin, _UNDEF)
SIMD_OP (64, OP_XBINOP, OP_FMUL, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fmul, _UNDEF)
SIMD_OP (64, OP_XBINOP, OP_FDIV, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fdiv, _UNDEF)
SIMD_OP (64, OP_ARM64_XADDV, INTRINS_AARCH64_ADV_SIMD_FADDV, WTDS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, _SKIP, _UNDEF)
SIMD_OP (64, OP_XOP_OVR_X_X, INTRINS_AARCH64_ADV_SIMD_FSQRT, WTDS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fsqrt, _UNDEF)
SIMD_OP (64, OP_XOP_OVR_X_X, INTRINS_AARCH64_ADV_SIMD_FABS, WTDS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fabs, _UNDEF)

/* 128-bit vectors */
/* Width Opcode Function Operand config I8 I16 I32 I64 F32 F64 */
Expand Down
32 changes: 16 additions & 16 deletions src/mono/mono/mini/simd-intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -1825,7 +1825,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
arg_class = mono_class_from_mono_type_internal (fsig->params [0]);
}

// FIXME: Add support for Vector64 on arm64
// FIXME: Add support for Vector64 on arm64 https://github.com/dotnet/runtime/issues/90402
int size = mono_class_value_size (arg_class, NULL);
if (size != 16)
return NULL;
Expand Down Expand Up @@ -2631,7 +2631,7 @@ static G_GNUC_UNUSED MonoInst*
emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
{
MonoInst *ins;
int id;
int id, len;
MonoClass *klass;
MonoType *type, *etype;

Expand All @@ -2657,6 +2657,7 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
klass = cmethod->klass;
type = m_class_get_byval_arg (klass);
etype = m_class_get_byval_arg (mono_defaults.single_class);
len = mono_class_value_size (klass, NULL) / 4;

// Similar to the cases in emit_sys_numerics_vector_t ()
switch (id) {
Expand Down Expand Up @@ -2686,7 +2687,6 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
}
// FIXME: These don't work since Vector2/Vector3 are not handled as SIMD
#if 0
int len = mono_class_value_size (klass, NULL) / 4;
} else if (len == 3 && fsig->param_count == 2 && fsig->params [0]->type == MONO_TYPE_VALUETYPE && fsig->params [1]->type == etype->type) {
/* Vector3 (Vector2, float) */
int dreg = load_simd_vreg (cfg, cmethod, args [0], NULL);
Expand Down Expand Up @@ -2719,26 +2719,25 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
case SN_get_Item: {
// GetElement is marked as Intrinsic, but handling this in get_Item leads to better code
int src1 = load_simd_vreg (cfg, cmethod, args [0], NULL);
int elems = 4;
MonoTypeEnum ty = etype->type;

if (args [1]->opcode == OP_ICONST) {
// If the index is provably a constant, we can generate vastly better code.
int index = GTMREG_TO_INT (args[1]->inst_c0);

if (index < 0 || index >= elems) {
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, elems);
if (index < 0 || index >= len) {
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, len);
MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "ArgumentOutOfRangeException");
}

int opcode = type_to_extract_op (ty);
ins = emit_simd_ins (cfg, klass, opcode, src1, -1);
ins->inst_c0 = args[1]->inst_c0;
ins->inst_c0 = index;
ins->inst_c1 = ty;
return ins;
}

MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, elems);
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, len);
MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "ArgumentOutOfRangeException");

if (COMPILE_LLVM (cfg)) {
Expand Down Expand Up @@ -2806,14 +2805,14 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
g_assert (fsig->hasthis && fsig->param_count == 2 && fsig->params [0]->type == MONO_TYPE_I4 && fsig->params [1]->type == MONO_TYPE_R4);

gboolean indirect = FALSE;
int elems = 4, index = GTMREG_TO_INT (args [1]->inst_c0);
int index = GTMREG_TO_INT (args [1]->inst_c0);
int dreg = load_simd_vreg (cfg, cmethod, args [0], &indirect);

if (args [1]->opcode == OP_ICONST) {
// If the index is provably a constant, we can generate vastly better code.
// Bounds check only if the index is out of range
if (index < 0 || index >= elems) {
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, elems);
if (index < 0 || index >= len) {
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, len);
MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "ArgumentOutOfRangeException");
}

Expand All @@ -2830,7 +2829,7 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
return ins;
}

MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, elems);
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, args [1]->dreg, len);
MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "ArgumentOutOfRangeException");

if (COMPILE_LLVM (cfg)) {
Expand Down Expand Up @@ -2948,7 +2947,7 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
#endif
}
case SN_CopyTo:
// FIXME:
// FIXME: https://github.com/dotnet/runtime/issues/91394
return NULL;
case SN_Clamp: {
if (!(!fsig->hasthis && fsig->param_count == 3 && mono_metadata_type_equal (fsig->ret, type) && mono_metadata_type_equal (fsig->params [0], type) && mono_metadata_type_equal (fsig->params [1], type) && mono_metadata_type_equal (fsig->params [2], type)))
Expand All @@ -2971,7 +2970,7 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
case SN_LengthSquared:
case SN_Lerp:
case SN_Normalize: {
// FIXME:
// FIXME: https://github.com/dotnet/runtime/issues/91394
return NULL;
}
default:
Expand Down Expand Up @@ -5832,8 +5831,9 @@ arch_emit_simd_intrinsics (const char *class_ns, const char *class_name, MonoCom
}

if (!strcmp (class_ns, "System.Numerics")) {
// FIXME: Support Vector2/Vector3
if (!strcmp (class_name, "Vector4") || !strcmp (class_name, "Quaternion") || !strcmp (class_name, "Plane"))
// FIXME: Support Vector2 https://github.com/dotnet/runtime/issues/81501
if (!strcmp (class_name, "Vector2") || !strcmp (class_name, "Vector4") ||
!strcmp (class_name, "Quaternion") || !strcmp (class_name, "Plane"))
return emit_vector_2_3_4 (cfg, cmethod, fsig, args);
}

Expand Down