Skip to content

Commit

Permalink
Improve the performance of Equals* for Vector128 and Vector on Arm64 (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
fanyang-mono authored Oct 13, 2022
1 parent b8cab1c commit b00ad94
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 16 deletions.
39 changes: 39 additions & 0 deletions src/mono/mono/mini/mini-llvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -9854,7 +9854,46 @@ MONO_RESTORE_WARNING
#endif

#if defined(TARGET_ARM64)
case OP_XEQUAL_ARM64_V128_FAST: {
LLVMTypeRef t, elemt;
LLVMValueRef cmp, first_elem, min_pair, result;
int nelems;

LLVMTypeRef srcelemt = LLVMGetElementType (LLVMTypeOf (lhs));

//%c = icmp sgt <16 x i8> %a0, %a1
if (srcelemt == LLVMDoubleType () || srcelemt == LLVMFloatType ())
cmp = LLVMBuildFCmp (builder, LLVMRealOEQ, lhs, rhs, "");
else
cmp = LLVMBuildICmp (builder, LLVMIntEQ, lhs, rhs, "");
nelems = LLVMGetVectorSize (LLVMTypeOf (cmp));

if (srcelemt == LLVMDoubleType ())
elemt = LLVMInt64Type ();
else if (srcelemt == LLVMFloatType ())
elemt = LLVMInt32Type ();
else
elemt = srcelemt;

t = LLVMVectorType (elemt, nelems);
cmp = LLVMBuildSExt (builder, cmp, t, "");
// cmp is a <nelems x elemt> vector, each element is either 0xff... or 0
cmp = convert (ctx, cmp, LLVMVectorType (LLVMInt32Type (), 4));
// MinPair
LLVMTypeRef arg_t = LLVMTypeOf (cmp);
llvm_ovr_tag_t ovr_tag = ovr_tag_from_llvm_type (arg_t);
LLVMValueRef args [] = { cmp, cmp };
min_pair = call_overloaded_intrins (ctx, INTRINS_AARCH64_ADV_SIMD_UMINP, ovr_tag, args, "");
// Get the lower 64 bits
min_pair = convert (ctx, min_pair, LLVMVectorType (LLVMInt64Type (), 2));
first_elem = LLVMBuildExtractElement (builder, min_pair, const_int32 (0), "");

// convert to 0/1
result = LLVMBuildICmp (builder, LLVMIntEQ, first_elem, LLVMConstAllOnes (LLVMInt64Type ()), "");

values [ins->dreg] = LLVMBuildZExt (builder, result, LLVMInt8Type (), "");
break;
}
case OP_XOP_I4_I4:
case OP_XOP_I8_I8: {
IntrinsicId id = (IntrinsicId)ins->inst_c0;
Expand Down
3 changes: 3 additions & 0 deletions src/mono/mono/mini/mini-ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -1467,6 +1467,9 @@ MINI_OP(OP_FILL_PROF_CALL_CTX, "fill_prof_call_ctx", NONE, IREG, NONE)

/* LLVM only, compare 2 vectors for equality, set dreg to 1/0 */
MINI_OP(OP_XEQUAL, "xequal", IREG, XREG, XREG)
#if defined(TARGET_ARM64)
MINI_OP(OP_XEQUAL_ARM64_V128_FAST, "arm64_xequal_v128", IREG, XREG, XREG)
#endif
/* Per element compate, inst_c0 contains a CompRelation */
MINI_OP(OP_XCOMPARE, "xcompare", XREG, XREG, XREG)
MINI_OP(OP_XCOMPARE_SCALAR, "xcompare_scalar", XREG, XREG, XREG)
Expand Down
40 changes: 24 additions & 16 deletions src/mono/mono/mini/simd-intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,15 @@ emit_xcompare_for_intrinsic (MonoCompile *cfg, MonoClass *klass, int intrinsic_i
static MonoInst*
emit_xequal (MonoCompile *cfg, MonoClass *klass, MonoInst *arg1, MonoInst *arg2)
{
#ifdef TARGET_ARM64
int size = mono_class_value_size (klass, NULL);
if (size == 16)
return emit_simd_ins (cfg, klass, OP_XEQUAL_ARM64_V128_FAST, arg1->dreg, arg2->dreg);
else
return emit_simd_ins (cfg, klass, OP_XEQUAL, arg1->dreg, arg2->dreg);
#else
return emit_simd_ins (cfg, klass, OP_XEQUAL, arg1->dreg, arg2->dreg);
#endif
}

static MonoInst*
Expand Down Expand Up @@ -1368,13 +1376,13 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
case SN_EqualsAny: {
if (!is_element_type_primitive (fsig->params [0]))
return NULL;
MonoClass *arg_class = mono_class_from_mono_type_internal (fsig->params [0]);
switch (id) {
case SN_Equals:
return emit_xcompare (cfg, klass, arg0_type, args [0], args [1]);
case SN_EqualsAll:
return emit_xequal (cfg, klass, args [0], args [1]);
return emit_xequal (cfg, arg_class, args [0], args [1]);
case SN_EqualsAny: {
MonoClass *arg_class = mono_class_from_mono_type_internal (fsig->params [0]);
MonoInst *cmp_eq = emit_xcompare (cfg, arg_class, arg0_type, args [0], args [1]);
MonoInst *zero = emit_xzero (cfg, arg_class);
return emit_not_xequal (cfg, arg_class, cmp_eq, zero);
Expand Down Expand Up @@ -1448,18 +1456,18 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
if (type_enum_is_float (arg0_type)) {
MonoInst *zero = emit_xzero (cfg, arg_class);
MonoInst *inverted_cmp = emit_xcompare (cfg, klass, arg0_type, cmp, zero);
return emit_xequal (cfg, klass, inverted_cmp, zero);
return emit_xequal (cfg, arg_class, inverted_cmp, zero);
}

MonoInst *ones = emit_xones (cfg, arg_class);
return emit_xequal (cfg, klass, cmp, ones);
return emit_xequal (cfg, arg_class, cmp, ones);
}
case SN_GreaterThanAny:
case SN_GreaterThanOrEqualAny:
case SN_LessThanAny:
case SN_LessThanOrEqualAny: {
MonoInst *zero = emit_xzero (cfg, arg_class);
return emit_not_xequal (cfg, klass, cmp, zero);
return emit_not_xequal (cfg, arg_class, cmp, zero);
}
default:
g_assert_not_reached ();
Expand Down Expand Up @@ -1759,14 +1767,16 @@ emit_vector64_vector128_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSign

}
case SN_op_Equality:
case SN_op_Inequality:
case SN_op_Inequality: {
if (fsig->param_count != 2 )
return NULL;
MonoClass *arg_class = mono_class_from_mono_type_internal (fsig->params [0]);
switch (id) {
case SN_op_Equality: return emit_xequal (cfg, klass, args [0], args [1]);
case SN_op_Inequality: return emit_not_xequal (cfg, klass, args [0], args [1]);
case SN_op_Equality: return emit_xequal (cfg, arg_class, args [0], args [1]);
case SN_op_Inequality: return emit_not_xequal (cfg, arg_class, args [0], args [1]);
default: g_assert_not_reached ();
}
}
case SN_op_OnesComplement:
case SN_op_UnaryNegation:
if (fsig->param_count != 1 )
Expand Down Expand Up @@ -1998,19 +2008,17 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
ins->inst_c1 = MONO_TYPE_R4;
return ins;
}
case SN_op_Equality:
case SN_op_Equality: {
if (!(fsig->param_count == 2 && mono_metadata_type_equal (fsig->params [0], type) && mono_metadata_type_equal (fsig->params [1], type)))
return NULL;
return emit_simd_ins (cfg, klass, OP_XEQUAL, args [0]->dreg, args [1]->dreg);
MonoClass *arg_class = mono_class_from_mono_type_internal (fsig->params [0]);
return emit_xequal (cfg, arg_class, args [0], args [1]);
}
case SN_op_Inequality: {
if (!(fsig->param_count == 2 && mono_metadata_type_equal (fsig->params [0], type) && mono_metadata_type_equal (fsig->params [1], type)))
return NULL;
ins = emit_simd_ins (cfg, klass, OP_XEQUAL, args [0]->dreg, args [1]->dreg);
int sreg = ins->dreg;
int dreg = alloc_ireg (cfg);
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, sreg, 0);
EMIT_NEW_UNALU (cfg, ins, OP_CEQ, dreg, -1);
return ins;
MonoClass *arg_class = mono_class_from_mono_type_internal (fsig->params [0]);
return emit_not_xequal (cfg, arg_class, args [0], args [1]);
}
case SN_SquareRoot: {
#ifdef TARGET_ARM64
Expand Down

0 comments on commit b00ad94

Please sign in to comment.