diff --git a/src/coreclr/src/jit/codegenarm64.cpp b/src/coreclr/src/jit/codegenarm64.cpp index 39b81bc1dc723..9c7a4447d241b 100644 --- a/src/coreclr/src/jit/codegenarm64.cpp +++ b/src/coreclr/src/jit/codegenarm64.cpp @@ -4518,11 +4518,11 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) { GetEmitter()->emitIns_R_R_R(INS_faddp, attr, tmpReg, tmpReg, tmpReg, INS_OPTS_4S); } - GetEmitter()->emitIns_R_R(INS_faddp, EA_4BYTE, targetReg, tmpReg); + GetEmitter()->emitIns_R_R(INS_faddp, EA_4BYTE, targetReg, tmpReg, INS_OPTS_2S); } else { - GetEmitter()->emitIns_R_R(INS_faddp, EA_8BYTE, targetReg, tmpReg); + GetEmitter()->emitIns_R_R(INS_faddp, EA_8BYTE, targetReg, tmpReg, INS_OPTS_2D); } } else @@ -6892,6 +6892,34 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R(INS_fabs, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_fabs, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); + // fmaxp scalar + theEmitter->emitIns_R_R(INS_fmaxp, EA_4BYTE, REG_V0, REG_V1, INS_OPTS_2S); + theEmitter->emitIns_R_R(INS_fmaxp, EA_8BYTE, REG_V2, REG_V3, INS_OPTS_2D); + + // fmaxnmp scalar + theEmitter->emitIns_R_R(INS_fmaxnmp, EA_4BYTE, REG_V0, REG_V1, INS_OPTS_2S); + theEmitter->emitIns_R_R(INS_fmaxnmp, EA_8BYTE, REG_V2, REG_V3, INS_OPTS_2D); + + // fmaxnmv vector + theEmitter->emitIns_R_R(INS_fmaxnmv, EA_4BYTE, REG_V0, REG_V1, INS_OPTS_4S); + + // fmaxv vector + theEmitter->emitIns_R_R(INS_fmaxv, EA_4BYTE, REG_V0, REG_V1, INS_OPTS_4S); + + // fminp vector + theEmitter->emitIns_R_R(INS_fminp, EA_4BYTE, REG_V0, REG_V1, INS_OPTS_2S); + theEmitter->emitIns_R_R(INS_fminp, EA_8BYTE, REG_V2, REG_V3, INS_OPTS_2D); + + // fminnmp scalar + theEmitter->emitIns_R_R(INS_fminnmp, EA_4BYTE, REG_V0, REG_V1, INS_OPTS_2S); + theEmitter->emitIns_R_R(INS_fminnmp, EA_8BYTE, REG_V2, REG_V3, INS_OPTS_2D); + + // fminnmv vector + theEmitter->emitIns_R_R(INS_fminnmv, EA_4BYTE, REG_V0, REG_V1, INS_OPTS_4S); + + // fminv vector + theEmitter->emitIns_R_R(INS_fminv, EA_4BYTE, REG_V0, REG_V1, INS_OPTS_4S); + // fneg scalar theEmitter->emitIns_R_R(INS_fneg, EA_4BYTE, REG_V0, REG_V1); theEmitter->emitIns_R_R(INS_fneg, EA_8BYTE, REG_V2, REG_V3); @@ -7006,21 +7034,19 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R(INS_saddlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_saddlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); - // smaxlv vector - theEmitter->emitIns_R_R(INS_smaxlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); - theEmitter->emitIns_R_R(INS_smaxlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); - theEmitter->emitIns_R_R(INS_smaxlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); - theEmitter->emitIns_R_R(INS_smaxlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); - theEmitter->emitIns_R_R(INS_smaxlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); - theEmitter->emitIns_R_R(INS_smaxlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); - - // sminlv vector - theEmitter->emitIns_R_R(INS_sminlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); - theEmitter->emitIns_R_R(INS_sminlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); - theEmitter->emitIns_R_R(INS_sminlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); - theEmitter->emitIns_R_R(INS_sminlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); - theEmitter->emitIns_R_R(INS_sminlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); - theEmitter->emitIns_R_R(INS_sminlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); + // smaxv vector + theEmitter->emitIns_R_R(INS_smaxv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); + theEmitter->emitIns_R_R(INS_smaxv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); + theEmitter->emitIns_R_R(INS_smaxv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); + theEmitter->emitIns_R_R(INS_smaxv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); + theEmitter->emitIns_R_R(INS_smaxv, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_4S); + + // sminv vector + theEmitter->emitIns_R_R(INS_sminv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); + theEmitter->emitIns_R_R(INS_sminv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); + theEmitter->emitIns_R_R(INS_sminv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); + theEmitter->emitIns_R_R(INS_sminv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); + theEmitter->emitIns_R_R(INS_sminv, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_4S); // uaddlv vector theEmitter->emitIns_R_R(INS_uaddlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); @@ -7030,79 +7056,23 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R(INS_uaddlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); theEmitter->emitIns_R_R(INS_uaddlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); - // umaxlv vector - theEmitter->emitIns_R_R(INS_umaxlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); - theEmitter->emitIns_R_R(INS_umaxlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); - theEmitter->emitIns_R_R(INS_umaxlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); - theEmitter->emitIns_R_R(INS_umaxlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); - theEmitter->emitIns_R_R(INS_umaxlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); - theEmitter->emitIns_R_R(INS_umaxlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); - - // uminlv vector - theEmitter->emitIns_R_R(INS_uminlv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); - theEmitter->emitIns_R_R(INS_uminlv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); - theEmitter->emitIns_R_R(INS_uminlv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); - theEmitter->emitIns_R_R(INS_uminlv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); - theEmitter->emitIns_R_R(INS_uminlv, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); - theEmitter->emitIns_R_R(INS_uminlv, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); + // umaxv vector + theEmitter->emitIns_R_R(INS_umaxv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); + theEmitter->emitIns_R_R(INS_umaxv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); + theEmitter->emitIns_R_R(INS_umaxv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); + theEmitter->emitIns_R_R(INS_umaxv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); + theEmitter->emitIns_R_R(INS_umaxv, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_4S); - // uzp1 vector - theEmitter->emitIns_R_R(INS_uzp1, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); - theEmitter->emitIns_R_R(INS_uzp1, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); - theEmitter->emitIns_R_R(INS_uzp1, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); - theEmitter->emitIns_R_R(INS_uzp1, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); - theEmitter->emitIns_R_R(INS_uzp1, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); - theEmitter->emitIns_R_R(INS_uzp1, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); - theEmitter->emitIns_R_R(INS_uzp1, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_2D); - - // uzp2 vector - theEmitter->emitIns_R_R(INS_uzp2, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); - theEmitter->emitIns_R_R(INS_uzp2, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); - theEmitter->emitIns_R_R(INS_uzp2, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); - theEmitter->emitIns_R_R(INS_uzp2, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); - theEmitter->emitIns_R_R(INS_uzp2, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); - theEmitter->emitIns_R_R(INS_uzp2, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); - theEmitter->emitIns_R_R(INS_uzp2, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_2D); - - // zip1 vector - theEmitter->emitIns_R_R(INS_zip1, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); - theEmitter->emitIns_R_R(INS_zip1, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); - theEmitter->emitIns_R_R(INS_zip1, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); - theEmitter->emitIns_R_R(INS_zip1, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); - theEmitter->emitIns_R_R(INS_zip1, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); - theEmitter->emitIns_R_R(INS_zip1, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); - theEmitter->emitIns_R_R(INS_zip1, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_2D); - - // zip2 vector - theEmitter->emitIns_R_R(INS_zip2, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); - theEmitter->emitIns_R_R(INS_zip2, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); - theEmitter->emitIns_R_R(INS_zip2, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); - theEmitter->emitIns_R_R(INS_zip2, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); - theEmitter->emitIns_R_R(INS_zip2, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); - theEmitter->emitIns_R_R(INS_zip2, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); - theEmitter->emitIns_R_R(INS_zip2, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_2D); - - // trn1 vector - theEmitter->emitIns_R_R(INS_trn1, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); - theEmitter->emitIns_R_R(INS_trn1, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); - theEmitter->emitIns_R_R(INS_trn1, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); - theEmitter->emitIns_R_R(INS_trn1, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); - theEmitter->emitIns_R_R(INS_trn1, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); - theEmitter->emitIns_R_R(INS_trn1, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); - theEmitter->emitIns_R_R(INS_trn1, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_2D); - - // trn2 vector - theEmitter->emitIns_R_R(INS_trn2, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); - theEmitter->emitIns_R_R(INS_trn2, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); - theEmitter->emitIns_R_R(INS_trn2, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); - theEmitter->emitIns_R_R(INS_trn2, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); - theEmitter->emitIns_R_R(INS_trn2, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S); - theEmitter->emitIns_R_R(INS_trn2, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S); - theEmitter->emitIns_R_R(INS_trn2, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_2D); + // uminv vector + theEmitter->emitIns_R_R(INS_uminv, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B); + theEmitter->emitIns_R_R(INS_uminv, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B); + theEmitter->emitIns_R_R(INS_uminv, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H); + theEmitter->emitIns_R_R(INS_uminv, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H); + theEmitter->emitIns_R_R(INS_uminv, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_4S); // faddp scalar - theEmitter->emitIns_R_R(INS_faddp, EA_4BYTE, REG_V0, REG_V1); - theEmitter->emitIns_R_R(INS_faddp, EA_8BYTE, REG_V2, REG_V3); + theEmitter->emitIns_R_R(INS_faddp, EA_4BYTE, REG_V0, REG_V1, INS_OPTS_2S); + theEmitter->emitIns_R_R(INS_faddp, EA_8BYTE, REG_V2, REG_V3, INS_OPTS_2D); // INS_fcvtl theEmitter->emitIns_R_R(INS_fcvtl, EA_4BYTE, REG_V0, REG_V1); @@ -7194,36 +7164,75 @@ void CodeGen::genArm64EmitterUnitTests() genDefineTempLabel(genCreateTempLabel()); + // fadd theEmitter->emitIns_R_R_R(INS_fadd, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE theEmitter->emitIns_R_R_R(INS_fadd, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE theEmitter->emitIns_R_R_R(INS_fadd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_fadd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_fadd, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); + // fsub theEmitter->emitIns_R_R_R(INS_fsub, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE theEmitter->emitIns_R_R_R(INS_fsub, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE theEmitter->emitIns_R_R_R(INS_fsub, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_fsub, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_fsub, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); + // fdiv theEmitter->emitIns_R_R_R(INS_fdiv, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE theEmitter->emitIns_R_R_R(INS_fdiv, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE theEmitter->emitIns_R_R_R(INS_fdiv, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_fdiv, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_fdiv, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); + // fmax theEmitter->emitIns_R_R_R(INS_fmax, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE theEmitter->emitIns_R_R_R(INS_fmax, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE theEmitter->emitIns_R_R_R(INS_fmax, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_fmax, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_fmax, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); + // fmaxp + theEmitter->emitIns_R_R_R(INS_fmaxp, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_2S); + theEmitter->emitIns_R_R_R(INS_fmaxp, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4S); + theEmitter->emitIns_R_R_R(INS_fmaxp, EA_16BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2D); + + // fmaxnm + theEmitter->emitIns_R_R_R(INS_fmaxnm, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE + theEmitter->emitIns_R_R_R(INS_fmaxnm, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE + theEmitter->emitIns_R_R_R(INS_fmaxnm, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); + theEmitter->emitIns_R_R_R(INS_fmaxnm, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); + theEmitter->emitIns_R_R_R(INS_fmaxnm, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); + + // fmaxnmp vector + theEmitter->emitIns_R_R_R(INS_fmaxnmp, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_2S); + theEmitter->emitIns_R_R_R(INS_fmaxnmp, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4S); + theEmitter->emitIns_R_R_R(INS_fmaxnmp, EA_16BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2D); + + // fmin theEmitter->emitIns_R_R_R(INS_fmin, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE theEmitter->emitIns_R_R_R(INS_fmin, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE theEmitter->emitIns_R_R_R(INS_fmin, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_fmin, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_fmin, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); + // fminp + theEmitter->emitIns_R_R_R(INS_fminp, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_2S); + theEmitter->emitIns_R_R_R(INS_fminp, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4S); + theEmitter->emitIns_R_R_R(INS_fminp, EA_16BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2D); + + // fminnm + theEmitter->emitIns_R_R_R(INS_fminnm, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE + theEmitter->emitIns_R_R_R(INS_fminnm, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE + theEmitter->emitIns_R_R_R(INS_fminnm, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S); + theEmitter->emitIns_R_R_R(INS_fminnm, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S); + theEmitter->emitIns_R_R_R(INS_fminnm, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D); + + // fminnmp vector + theEmitter->emitIns_R_R_R(INS_fminnmp, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_2S); + theEmitter->emitIns_R_R_R(INS_fminnmp, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4S); + theEmitter->emitIns_R_R_R(INS_fminnmp, EA_16BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2D); + // fabd theEmitter->emitIns_R_R_R(INS_fabd, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE theEmitter->emitIns_R_R_R(INS_fabd, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE @@ -7555,6 +7564,7 @@ void CodeGen::genArm64EmitterUnitTests() genDefineTempLabel(genCreateTempLabel()); + // add theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V0, REG_V1, REG_V2); // scalar 8BYTE theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); @@ -7564,6 +7574,17 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_2D); + // addp + theEmitter->emitIns_R_R(INS_addp, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_2D); // scalar 8BYTE + theEmitter->emitIns_R_R_R(INS_addp, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_8B); + theEmitter->emitIns_R_R_R(INS_addp, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); + theEmitter->emitIns_R_R_R(INS_addp, EA_8BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_2S); + theEmitter->emitIns_R_R_R(INS_addp, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_16B); + theEmitter->emitIns_R_R_R(INS_addp, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_8H); + theEmitter->emitIns_R_R_R(INS_addp, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_4S); + theEmitter->emitIns_R_R_R(INS_addp, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_2D); + + // sub theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V1, REG_V2, REG_V3); // scalar 8BYTE theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V4, REG_V5, REG_V6, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V7, REG_V8, REG_V9, INS_OPTS_4H); @@ -7617,6 +7638,14 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_smax, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_smax, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); + // smaxp vector + theEmitter->emitIns_R_R_R(INS_smaxp, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); + theEmitter->emitIns_R_R_R(INS_smaxp, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); + theEmitter->emitIns_R_R_R(INS_smaxp, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); + theEmitter->emitIns_R_R_R(INS_smaxp, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); + theEmitter->emitIns_R_R_R(INS_smaxp, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); + theEmitter->emitIns_R_R_R(INS_smaxp, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); + // smin vector theEmitter->emitIns_R_R_R(INS_smin, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_smin, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); @@ -7625,6 +7654,14 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_smin, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_smin, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); + // sminp vector + theEmitter->emitIns_R_R_R(INS_sminp, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); + theEmitter->emitIns_R_R_R(INS_sminp, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); + theEmitter->emitIns_R_R_R(INS_sminp, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); + theEmitter->emitIns_R_R_R(INS_sminp, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); + theEmitter->emitIns_R_R_R(INS_sminp, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); + theEmitter->emitIns_R_R_R(INS_sminp, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); + // umax vector theEmitter->emitIns_R_R_R(INS_umax, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_umax, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); @@ -7633,6 +7670,14 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_umax, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_umax, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); + // umaxp vector + theEmitter->emitIns_R_R_R(INS_umaxp, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); + theEmitter->emitIns_R_R_R(INS_umaxp, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); + theEmitter->emitIns_R_R_R(INS_umaxp, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); + theEmitter->emitIns_R_R_R(INS_umaxp, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); + theEmitter->emitIns_R_R_R(INS_umaxp, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); + theEmitter->emitIns_R_R_R(INS_umaxp, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); + // umin vector theEmitter->emitIns_R_R_R(INS_umin, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_umin, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); @@ -7641,6 +7686,14 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_umin, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_umin, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); + // uminp vector + theEmitter->emitIns_R_R_R(INS_uminp, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); + theEmitter->emitIns_R_R_R(INS_uminp, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); + theEmitter->emitIns_R_R_R(INS_uminp, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); + theEmitter->emitIns_R_R_R(INS_uminp, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); + theEmitter->emitIns_R_R_R(INS_uminp, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); + theEmitter->emitIns_R_R_R(INS_uminp, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); + // cmeq vector theEmitter->emitIns_R_R_R(INS_cmeq, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_cmeq, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); @@ -7648,8 +7701,7 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_cmeq, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); theEmitter->emitIns_R_R_R(INS_cmeq, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_cmeq, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); - theEmitter->emitIns_R_R_R(INS_cmeq, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D); - theEmitter->emitIns_R_R_R(INS_cmeq, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D); + theEmitter->emitIns_R_R_R(INS_cmeq, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_2D); // cmge vector theEmitter->emitIns_R_R_R(INS_cmge, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); @@ -7658,8 +7710,7 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_cmge, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); theEmitter->emitIns_R_R_R(INS_cmge, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_cmge, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); - theEmitter->emitIns_R_R_R(INS_cmge, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D); - theEmitter->emitIns_R_R_R(INS_cmge, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D); + theEmitter->emitIns_R_R_R(INS_cmge, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_2D); // cmgt vector theEmitter->emitIns_R_R_R(INS_cmgt, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); @@ -7668,8 +7719,7 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_cmgt, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); theEmitter->emitIns_R_R_R(INS_cmgt, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_cmgt, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); - theEmitter->emitIns_R_R_R(INS_cmgt, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D); - theEmitter->emitIns_R_R_R(INS_cmgt, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D); + theEmitter->emitIns_R_R_R(INS_cmgt, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_2D); // cmhi vector theEmitter->emitIns_R_R_R(INS_cmhi, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); @@ -7678,8 +7728,7 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_cmhi, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); theEmitter->emitIns_R_R_R(INS_cmhi, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_cmhi, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); - theEmitter->emitIns_R_R_R(INS_cmhi, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D); - theEmitter->emitIns_R_R_R(INS_cmhi, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D); + theEmitter->emitIns_R_R_R(INS_cmhi, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_2D); // cmhs vector theEmitter->emitIns_R_R_R(INS_cmhs, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); @@ -7688,18 +7737,16 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_cmhs, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); theEmitter->emitIns_R_R_R(INS_cmhs, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_cmhs, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); - theEmitter->emitIns_R_R_R(INS_cmhs, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D); - theEmitter->emitIns_R_R_R(INS_cmhs, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D); - - // ctst vector - theEmitter->emitIns_R_R_R(INS_ctst, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); - theEmitter->emitIns_R_R_R(INS_ctst, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); - theEmitter->emitIns_R_R_R(INS_ctst, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); - theEmitter->emitIns_R_R_R(INS_ctst, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); - theEmitter->emitIns_R_R_R(INS_ctst, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); - theEmitter->emitIns_R_R_R(INS_ctst, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); - theEmitter->emitIns_R_R_R(INS_ctst, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_1D); - theEmitter->emitIns_R_R_R(INS_ctst, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D); + theEmitter->emitIns_R_R_R(INS_cmhs, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_2D); + + // cmtst vector + theEmitter->emitIns_R_R_R(INS_cmtst, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); + theEmitter->emitIns_R_R_R(INS_cmtst, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); + theEmitter->emitIns_R_R_R(INS_cmtst, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); + theEmitter->emitIns_R_R_R(INS_cmtst, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); + theEmitter->emitIns_R_R_R(INS_cmtst, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); + theEmitter->emitIns_R_R_R(INS_cmtst, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); + theEmitter->emitIns_R_R_R(INS_cmtst, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_2D); // faddp vector theEmitter->emitIns_R_R_R(INS_faddp, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); @@ -7722,6 +7769,62 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_fcmgt, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D); #endif // ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS + // trn1 vector + theEmitter->emitIns_R_R_R(INS_trn1, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); + theEmitter->emitIns_R_R_R(INS_trn1, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); + theEmitter->emitIns_R_R_R(INS_trn1, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); + theEmitter->emitIns_R_R_R(INS_trn1, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); + theEmitter->emitIns_R_R_R(INS_trn1, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); + theEmitter->emitIns_R_R_R(INS_trn1, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); + theEmitter->emitIns_R_R_R(INS_trn1, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_2D); + + // trn2 vector + theEmitter->emitIns_R_R_R(INS_trn2, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); + theEmitter->emitIns_R_R_R(INS_trn2, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); + theEmitter->emitIns_R_R_R(INS_trn2, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); + theEmitter->emitIns_R_R_R(INS_trn2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); + theEmitter->emitIns_R_R_R(INS_trn2, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); + theEmitter->emitIns_R_R_R(INS_trn2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); + theEmitter->emitIns_R_R_R(INS_trn2, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_2D); + + // uzp1 vector + theEmitter->emitIns_R_R_R(INS_uzp1, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); + theEmitter->emitIns_R_R_R(INS_uzp1, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); + theEmitter->emitIns_R_R_R(INS_uzp1, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); + theEmitter->emitIns_R_R_R(INS_uzp1, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); + theEmitter->emitIns_R_R_R(INS_uzp1, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); + theEmitter->emitIns_R_R_R(INS_uzp1, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); + theEmitter->emitIns_R_R_R(INS_uzp1, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_2D); + + // uzp2 vector + theEmitter->emitIns_R_R_R(INS_uzp2, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); + theEmitter->emitIns_R_R_R(INS_uzp2, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); + theEmitter->emitIns_R_R_R(INS_uzp2, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); + theEmitter->emitIns_R_R_R(INS_uzp2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); + theEmitter->emitIns_R_R_R(INS_uzp2, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); + theEmitter->emitIns_R_R_R(INS_uzp2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); + theEmitter->emitIns_R_R_R(INS_uzp2, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_2D); + + // zip1 vector + theEmitter->emitIns_R_R_R(INS_zip1, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); + theEmitter->emitIns_R_R_R(INS_zip1, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); + theEmitter->emitIns_R_R_R(INS_zip1, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); + theEmitter->emitIns_R_R_R(INS_zip1, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); + theEmitter->emitIns_R_R_R(INS_zip1, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); + theEmitter->emitIns_R_R_R(INS_zip1, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); + theEmitter->emitIns_R_R_R(INS_zip1, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_2D); + + // zip2 vector + theEmitter->emitIns_R_R_R(INS_zip2, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); + theEmitter->emitIns_R_R_R(INS_zip2, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); + theEmitter->emitIns_R_R_R(INS_zip2, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H); + theEmitter->emitIns_R_R_R(INS_zip2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); + theEmitter->emitIns_R_R_R(INS_zip2, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); + theEmitter->emitIns_R_R_R(INS_zip2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); + theEmitter->emitIns_R_R_R(INS_zip2, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_2D); +#endif // ALL_ARM64_EMITTER_UNIT_TESTS + #ifdef ALL_ARM64_EMITTER_UNIT_TESTS // // R_R_R vector multiply diff --git a/src/coreclr/src/jit/emitarm64.cpp b/src/coreclr/src/jit/emitarm64.cpp index 4771a95e9202a..c196d95ebf4da 100644 --- a/src/coreclr/src/jit/emitarm64.cpp +++ b/src/coreclr/src/jit/emitarm64.cpp @@ -730,6 +730,35 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isVectorRegister(id->idReg2())); break; + case IF_DV_2Q: // DV_2Q .........X...... ......nnnnnddddd Sd Vn (faddp, fmaxnmp, fmaxp, fminnmp, + // fminp - scalar) + if (id->idOpSize() == EA_8BYTE) + { + assert(id->idInsOpt() == INS_OPTS_2D); + } + else + { + assert(id->idOpSize() == EA_4BYTE); + assert(id->idInsOpt() == INS_OPTS_2S); + } + assert(isVectorRegister(id->idReg1())); + assert(isVectorRegister(id->idReg2())); + break; + + case IF_DV_2R: // DV_2R .Q.......X...... ......nnnnnddddd Sd Vn (fmaxnmv, fmaxv, fminnmv, fminv) + assert(id->idOpSize() == EA_4BYTE); + assert(id->idInsOpt() == INS_OPTS_4S); + assert(isVectorRegister(id->idReg1())); + assert(isVectorRegister(id->idReg2())); + break; + + case IF_DV_2S: // DV_2S ........XX...... ......nnnnnddddd Sd Vn (addp - scalar) + assert(id->idOpSize() == EA_8BYTE); + assert(id->idInsOpt() == INS_OPTS_2D); + assert(isVectorRegister(id->idReg1())); + assert(isVectorRegister(id->idReg2())); + break; + case IF_DV_3A: // DV_3A .Q......XX.mmmmm ......nnnnnddddd Vd Vn Vm (vector) assert(isValidVectorDatasize(id->idOpSize())); assert(isValidArrangement(id->idOpSize(), id->idInsOpt())); @@ -895,6 +924,11 @@ bool emitter::emitInsMayWriteToGCReg(instrDesc* id) case IF_DV_2P: // DV_2P ................ ......nnnnnddddd Vd Vn (aes*, sha1su1) - Vd both source and // destination + case IF_DV_2Q: // DV_2Q .........X...... ......nnnnnddddd Sd Vn (faddp, fmaxnmp, fmaxp, fminnmp, + // fminp - scalar) + case IF_DV_2R: // DV_2R .Q.......X...... ......nnnnnddddd Sd Vn (fmaxnmv, fmaxv, fminnmv, fminv) + case IF_DV_2S: // DV_2S ........XX...... ......nnnnnddddd Sd Vn (addp - scalar) + case IF_DV_3A: // DV_3A .Q......XX.mmmmm ......nnnnnddddd Vd Vn Vm (vector) case IF_DV_3AI: // DV_3AI .Q......XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector) case IF_DV_3B: // DV_3B .Q.......X.mmmmm ......nnnnnddddd Vd Vn Vm (vector) @@ -1527,7 +1561,8 @@ emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt) const static insFormat formatEncode2M[2] = {IF_DV_3A, IF_DV_3AI}; const static insFormat formatEncode2N[2] = {IF_DV_2N, IF_DV_2O}; const static insFormat formatEncode2O[2] = {IF_DV_3E, IF_DV_3A}; - const static insFormat formatEncode2P[2] = {IF_DV_2G, IF_DV_3B}; + const static insFormat formatEncode2P[2] = {IF_DV_2Q, IF_DV_3B}; + const static insFormat formatEncode2Q[2] = {IF_DV_2S, IF_DV_3A}; code_t code = BAD_CODE; insFormat insFmt = emitInsFormat(ins); @@ -1976,6 +2011,17 @@ emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt) } break; + case IF_EN2Q: + for (index = 0; index < 2; index++) + { + if (fmt == formatEncode2Q[index]) + { + encoding_found = true; + break; + } + } + break; + case IF_BI_0A: case IF_BI_0B: case IF_BI_0C: @@ -2038,6 +2084,7 @@ emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt) case IF_DV_2N: case IF_DV_2O: case IF_DV_2P: + case IF_DV_2R: case IF_DV_3A: case IF_DV_3AI: case IF_DV_3B: @@ -4293,12 +4340,34 @@ void emitter::emitIns_R_R( break; case INS_faddp: + case INS_fmaxnmp: + case INS_fmaxp: + case INS_fminnmp: + case INS_fminp: // Scalar operation - assert(insOptsNone(opt)); - assert(isValidVectorElemsizeFloat(size)); + assert(((size == EA_4BYTE) && (opt == INS_OPTS_2S)) || ((size == EA_8BYTE) && (opt == INS_OPTS_2D))); assert(isVectorRegister(reg1)); assert(isVectorRegister(reg2)); - fmt = IF_DV_2G; + fmt = IF_DV_2Q; + break; + + case INS_fmaxnmv: + case INS_fmaxv: + case INS_fminnmv: + case INS_fminv: + assert(size == EA_4BYTE); + assert(opt == INS_OPTS_4S); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + fmt = IF_DV_2R; + break; + + case INS_addp: + assert(size == EA_8BYTE); + assert(opt == INS_OPTS_2D); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + fmt = IF_DV_2S; break; case INS_fcvt: @@ -5304,14 +5373,19 @@ void emitter::emitIns_R_R_R( case INS_saba: case INS_sabd: case INS_smax: + case INS_smaxp: case INS_smin: + case INS_sminp: case INS_uaba: case INS_uabd: case INS_umax: + case INS_umaxp: case INS_umin: + case INS_uminp: assert(elemsize != EA_8BYTE); // can't use 2D or 1D __fallthrough; + case INS_addp: case INS_uzp1: case INS_uzp2: case INS_zip1: @@ -5393,7 +5467,9 @@ void emitter::emitIns_R_R_R( case INS_fsub: case INS_fdiv: case INS_fmax: + case INS_fmaxnm: case INS_fmin: + case INS_fminnm: case INS_fabd: case INS_fmul: case INS_fmulx: @@ -5432,6 +5508,11 @@ void emitter::emitIns_R_R_R( break; case INS_faddp: + case INS_fmaxnmp: + case INS_fmaxp: + case INS_fminnmp: + case INS_fminp: + case INS_fmla: case INS_fmls: assert(isVectorRegister(reg1)); @@ -10074,7 +10155,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; - case IF_DV_2G: // DV_2G .........X...... ......nnnnnddddd Vd Vn (fmov,fcvtXX - register) + case IF_DV_2G: // DV_2G .........X...... ......nnnnnddddd Vd Vn (fmov, fcvtXX - register) + case IF_DV_2Q: // DV_2Q .........X...... ......nnnnnddddd Vd Vn (faddp, fmaxnmp, fmaxp, fminnmp, + // fminp - scalar) elemsize = id->idOpSize(); code = emitInsCode(ins, fmt); code |= insEncodeFloatElemsize(elemsize); // X @@ -10119,6 +10202,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) break; case IF_DV_2L: // DV_2L ........XX...... ......nnnnnddddd Vd Vn (abs, neg - scalar) + case IF_DV_2S: // DV_2S ........XX...... ......nnnnnddddd Sd Vn (addp - scalar) elemsize = id->idOpSize(); code = emitInsCode(ins, fmt); code |= insEncodeElemsize(elemsize); // XX @@ -10165,6 +10249,17 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; + case IF_DV_2R: // DV_2R .Q.......X...... ......nnnnnddddd Sd Vn (fmaxnmv, fmaxv, fminnmv, fminv) + code = emitInsCode(ins, fmt); + datasize = optGetDatasize(id->idInsOpt()); + elemsize = optGetElemsize(id->idInsOpt()); + code |= insEncodeVectorsize(datasize); // Q + code |= insEncodeFloatElemsize(elemsize); // X + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + dst += emitOutput_Instr(dst, code); + break; + case IF_DV_3A: // DV_3A .Q......XX.mmmmm ......nnnnnddddd Vd Vn Vm (vector) code = emitInsCode(ins, fmt); elemsize = optGetElemsize(id->idInsOpt()); @@ -11713,6 +11808,15 @@ void emitter::emitDispIns( emitDispReg(id->idReg2(), srcsize, false); break; + case IF_DV_2Q: // DV_2Q .........X...... ......nnnnnddddd Sd Vn (faddp, fmaxnmp, fmaxp, fminnmp, + // fminp - scalar) + case IF_DV_2R: // DV_2R .Q.......X...... ......nnnnnddddd Sd Vn (fmaxnmv, fmaxv, fminnmv, fminv) + case IF_DV_2S: // DV_2S ........XX...... ......nnnnnddddd Sd Vn (addp - scalar) + elemsize = id->idOpSize(); + emitDispReg(id->idReg1(), elemsize, true); + emitDispVectorReg(id->idReg2(), id->idInsOpt(), false); + break; + case IF_DV_3A: // DV_3A .Q......XX.mmmmm ......nnnnnddddd Vd Vn Vm (vector) case IF_DV_3B: // DV_3B .Q.........mmmmm ......nnnnnddddd Vd Vn Vm (vector) emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); @@ -12699,7 +12803,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; - case IF_DV_2G: // fmov, fabs, fneg, fsqrt, faddp, fcmXX, fcvtXX, frintX, scvtf, ucvtf (scalar) + case IF_DV_2G: // fmov, fabs, fneg, fsqrt, fcmXX, fcvtXX, frintX, scvtf, ucvtf (scalar) switch (ins) { case INS_fmov: @@ -12710,7 +12814,6 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_fabs: case INS_fneg: - case INS_faddp: case INS_fcvtas: case INS_fcvtau: @@ -12764,21 +12867,49 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; - case IF_DV_3B: // fadd, fsub, fdiv, fmul, fmulx, fmla, fmls, fmin, fmax, fabd, faddp, fcmXX (vector) + case IF_DV_2Q: // faddp, fmaxnmp, fmaxp, fminnmp, fminp (scalar) + case IF_DV_2R: // fmaxnmv, fmaxv, fminnmv, fminv + case IF_DV_2S: // addp (scalar) + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + + case IF_DV_3B: // fadd, fsub, fdiv, fmul, fmulx, fmla, fmls, fmin, fminnm, fmax, fmaxnm, fabd, fcmXX + // faddp, fmaxnmp, fmaxp, fminnmp, fminp, addp (vector) switch (ins) { case INS_fmin: + case INS_fminnm: case INS_fmax: + case INS_fmaxnm: case INS_fabd: case INS_fadd: case INS_fsub: - case INS_faddp: case INS_fmul: case INS_fmulx: result.insThroughput = PERFSCORE_THROUGHPUT_2X; result.insLatency = PERFSCORE_LATENCY_4C; break; + case INS_faddp: + case INS_fmaxnmp: + case INS_fmaxp: + case INS_fminnmp: + case INS_fminp: + case INS_addp: + if (id->idOpSize() == EA_16BYTE) + { + // Q-form + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + } + else + { + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_4C; + } + break; + case INS_facge: case INS_facgt: case INS_fcmeq: @@ -12833,13 +12964,15 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_4C; break; - case IF_DV_3D: // fadd, fsub, fdiv, fmul, fmulx, fmla, fmls, fmin, fmax, fabd, faddp, fcmXX (scalar) + case IF_DV_3D: // fadd, fsub, fdiv, fmul, fmulx, fmla, fmls, fmin, fminnm, fmax, fmaxnm, fabd, fcmXX (scalar) switch (ins) { case INS_fadd: case INS_fsub: case INS_fmin: + case INS_fminnm: case INS_fmax: + case INS_fmaxnm: case INS_fmul: case INS_fmulx: case INS_fnmul: @@ -12948,7 +13081,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case IF_DV_3A: // (vector) // add, sub, mul, mla, mls, cmeq, cmge, cmgt, cmhi, cmhs, ctst, - // pmul, saba, uaba, sabd, uabd, umin, umax, smin, smax, + // pmul, saba, uaba, sabd, uabd, umin, uminp, umax, umaxp, smin, sminp, smax, smaxp switch (ins) { case INS_add: @@ -12958,10 +13091,14 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_cmgt: case INS_cmhi: case INS_cmhs: - case INS_umin: - case INS_umax: - case INS_smin: case INS_smax: + case INS_smaxp: + case INS_smin: + case INS_sminp: + case INS_umax: + case INS_umaxp: + case INS_umin: + case INS_uminp: case INS_uzp1: case INS_uzp2: case INS_zip1: diff --git a/src/coreclr/src/jit/emitfmtsarm64.h b/src/coreclr/src/jit/emitfmtsarm64.h index 78d8da7d30082..42e4c5287b47b 100644 --- a/src/coreclr/src/jit/emitfmtsarm64.h +++ b/src/coreclr/src/jit/emitfmtsarm64.h @@ -86,6 +86,7 @@ IF_DEF(EN2M, IS_NONE, NONE) // Instruction has 2 possible encoding types, type M IF_DEF(EN2N, IS_NONE, NONE) // Instruction has 2 possible encoding types, type N IF_DEF(EN2O, IS_NONE, NONE) // Instruction has 2 possible encoding types, type O IF_DEF(EN2P, IS_NONE, NONE) // Instruction has 2 possible encoding types, type P +IF_DEF(EN2Q, IS_NONE, NONE) // Instruction has 2 possible encoding types, type Q ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // @@ -181,8 +182,7 @@ IF_DEF(DV_1C, IS_NONE, NONE) // DV_1C .........X...... ......nnnnn..... V IF_DEF(DV_2A, IS_NONE, NONE) // DV_2A .Q.......X...... ......nnnnnddddd Vd Vn (fabs, fcvtXX - vector) IF_DEF(DV_2B, IS_NONE, NONE) // DV_2B .Q.........iiiii ......nnnnnddddd Rd Vn[] (umov/smov - to general) -IF_DEF(DV_2C, IS_NONE, NONE) // DV_2C .Q.........iiiii ......nnnnnddddd Vd Rn (dup/ins - vector from - // general) +IF_DEF(DV_2C, IS_NONE, NONE) // DV_2C .Q.........iiiii ......nnnnnddddd Vd Rn (dup/ins - vector from general) IF_DEF(DV_2D, IS_NONE, NONE) // DV_2D .Q.........iiiii ......nnnnnddddd Vd Vn[] (dup - vector) IF_DEF(DV_2E, IS_NONE, NONE) // DV_2E ...........iiiii ......nnnnnddddd Vd Vn[] (dup - scalar) IF_DEF(DV_2F, IS_NONE, NONE) // DV_2F ...........iiiii .jjjj.nnnnnddddd Vd[] Vn[] (ins - element) @@ -195,7 +195,10 @@ IF_DEF(DV_2L, IS_NONE, NONE) // DV_2L ........XX...... ......nnnnnddddd V IF_DEF(DV_2M, IS_NONE, NONE) // DV_2M .Q......XX...... ......nnnnnddddd Vd Vn (abs, neg - vector) IF_DEF(DV_2N, IS_NONE, NONE) // DV_2N .........iiiiiii ......nnnnnddddd Vd Vn imm (shift - scalar) IF_DEF(DV_2O, IS_NONE, NONE) // DV_2O .Q.......iiiiiii ......nnnnnddddd Vd Vn imm (shift - vector) -IF_DEF(DV_2P, IS_NONE, NONE) // DV_2P .,.............. ......nnnnnddddd Vd Vn (Vd used as both source and destination) +IF_DEF(DV_2P, IS_NONE, NONE) // DV_2P ................ ......nnnnnddddd Vd Vn (Vd used as both source and destination) +IF_DEF(DV_2Q, IS_NONE, NONE) // DV_2Q .........X...... ......nnnnnddddd Sd Vn (faddp, fmaxnmp, fmaxp, fminnmp, fminp - scalar) +IF_DEF(DV_2R, IS_NONE, NONE) // DV_2R .Q.......X...... ......nnnnnddddd Sd Vn (fmaxnmv, fmaxv, fminnmv, fminv) +IF_DEF(DV_2S, IS_NONE, NONE) // DV_2S ........XX...... ......nnnnnddddd Sd Vn (addp - scalar) IF_DEF(DV_3A, IS_NONE, NONE) // DV_3A .Q......XX.mmmmm ......nnnnnddddd Vd Vn Vm (vector) IF_DEF(DV_3AI, IS_NONE, NONE) // DV_3AI .Q......XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector by elem) diff --git a/src/coreclr/src/jit/instrsarm64.h b/src/coreclr/src/jit/instrsarm64.h index 6c8b196cba2b7..5647c801e9b77 100644 --- a/src/coreclr/src/jit/instrsarm64.h +++ b/src/coreclr/src/jit/instrsarm64.h @@ -462,10 +462,18 @@ INST2(fmax, "fmax", 0, 0, IF_EN2G, 0x0E20F400, 0x1E204800) // fmax Vd,Vn,Vm DV_3B 0Q0011100X1mmmmm 111101nnnnnddddd 0E20 F400 Vd,Vn,Vm (vector) // fmax Vd,Vn,Vm DV_3D 000111100X1mmmmm 010010nnnnnddddd 1E20 4800 Vd,Vn,Vm (scalar) +INST2(fmaxnm, "fmaxnm", 0, 0, IF_EN2G, 0x0E20C400, 0x1E206800) + // fmaxnm Vd,Vn,Vm DV_3B 0Q0011100X1mmmmm 110001nnnnnddddd 0E20 C400 Vd,Vn,Vm (vector) + // fmaxnm Vd,Vn,Vm DV_3D 000111100X1mmmmm 011010nnnnnddddd 1E20 6800 Vd,Vn,Vm (scalar) + INST2(fmin, "fmin", 0, 0, IF_EN2G, 0x0EA0F400, 0x1E205800) // fmin Vd,Vn,Vm DV_3B 0Q0011101X1mmmmm 111101nnnnnddddd 0EA0 F400 Vd,Vn,Vm (vector) // fmin Vd,Vn,Vm DV_3D 000111100X1mmmmm 010110nnnnnddddd 1E20 5800 Vd,Vn,Vm (scalar) +INST2(fminnm, "fminnm", 0, 0, IF_EN2G, 0x0EA0C400, 0x1E207800) + // fminnm Vd,Vn,Vm DV_3B 0Q0011101X1mmmmm 110001nnnnnddddd 0EA0 C400 Vd,Vn,Vm (vector) + // fminnm Vd,Vn,Vm DV_3D 000111100X1mmmmm 011110nnnnnddddd 1E20 7800 Vd,Vn,Vm (scalar) + INST2(fabd, "fabd", 0, 0, IF_EN2G, 0x2EA0D400, 0x7EA0D400) // fabd Vd,Vn,Vm DV_3B 0Q1011101X1mmmmm 110101nnnnnddddd 2EA0 D400 Vd,Vn,Vm (vector) // fabd Vd,Vn,Vm DV_3D 011111101X1mmmmm 110101nnnnnddddd 7EA0 D400 Vd,Vn,Vm (scalar) @@ -639,9 +647,29 @@ INST2(cmtst, "cmtst", 0, 0, IF_EN2O, 0x5EE08C00, 0x0E208C00) // enum name FP LD/ST DV_2G DV_3B INST2(faddp, "faddp", 0, 0, IF_EN2P, 0x7E30D800, 0x2E20D400) - // faddp Vd,Vn DV_2G 011111100X110000 110110nnnnnddddd 7E30 D800 Vd,Vn (scalar) + // faddp Vd,Vn DV_2Q 011111100X110000 110110nnnnnddddd 7E30 D800 Vd,Vn (scalar) // faddp Vd,Vn,Vm DV_3B 0Q1011100X1mmmmm 110101nnnnnddddd 2E20 D400 Vd,Vn,Vm (vector) +INST2(fmaxnmp, "fmaxnmp",0, 0, IF_EN2P, 0x7E30C800, 0x2E20C400) + // fmaxnmp Vd,Vn DV_2Q 011111100X110000 110010nnnnnddddd 7E30 C800 Vd,Vn (scalar) + // fmaxnmp Vd,Vn,Vm DV_3B 0Q1011100X1mmmmm 110001nnnnnddddd 2E20 C400 Vd,Vn,Vm (vector) + +INST2(fmaxp, "fmaxp", 0, 0, IF_EN2P, 0x7E30F800, 0x2E20F400) + // fmaxp Vd,Vn DV_2Q 011111100X110000 111110nnnnnddddd 7E30 F800 Vd,Vn (scalar) + // fmaxp Vd,Vn,Vm DV_3B 0Q1011100X1mmmmm 111101nnnnnddddd 2E20 F400 Vd,Vn,Vm (vector) + +INST2(fminnmp, "fminnmp",0, 0, IF_EN2P, 0x7EB0C800, 0x2EA0C400) + // fminnmp Vd,Vn DV_2Q 011111101X110000 110010nnnnnddddd 7EB0 C800 Vd,Vn (scalar) + // fminnmp Vd,Vn,Vm DV_3B 0Q1011101X1mmmmm 110001nnnnnddddd 2EA0 C400 Vd,Vn,Vm (vector) + +INST2(fminp, "fminp", 0, 0, IF_EN2P, 0x7EB0F800, 0x2EA0F400) + // fminp Vd,Vn DV_2Q 011111101X110000 111110nnnnnddddd 7EB0 F800 Vd,Vn (scalar) + // fminp Vd,Vn,Vm DV_3B 0Q1011101X1mmmmm 111101nnnnnddddd 2EA0 F400 Vd,Vn,Vm (vector) + +INST2(addp, "addp", 0, 0, IF_EN2Q, 0x5E31B800, 0x0E20BC00) + // addp Vd,Vn DV_2S 01011110XX110001 101110nnnnnddddd 5E31 B800 Vd,Vn (scalar) + // addp Vd,Vn,Vm DV_3A 0Q001110XX1mmmmm 101111nnnnnddddd 0E20 BC00 Vd,Vn,Vm (vector) + INST1(ldar, "ldar", 0,LD, IF_LS_2A, 0x88DFFC00) // ldar Rt,[Xn] LS_2A 1X00100011011111 111111nnnnnttttt 88DF FC00 @@ -1221,6 +1249,18 @@ INST1(umaxv, "umaxv", 0, 0, IF_DV_2M, 0x2E30A800) INST1(uminv, "uminv", 0, 0, IF_DV_2M, 0x2E31A800) // uminv Vd,Vn DV_2M 0Q101110XX110001 101010nnnnnddddd 2E31 A800 Vd,Vn (vector) +INST1(fmaxnmv, "fmaxnmv",0, 0, IF_DV_2R, 0x2E30C800) + // fmaxnmv Vd,Vn DV_2R 0Q1011100X110000 110010nnnnnddddd 2E30 C800 Vd,Vn (vector) + +INST1(fmaxv, "fmaxv", 0, 0, IF_DV_2R, 0x2E30F800) + // fmaxv Vd,Vn DV_2R 0Q1011100X110000 111110nnnnnddddd 2E30 F800 Vd,Vn (vector) + +INST1(fminnmv, "fminnmv",0, 0, IF_DV_2R, 0x2EB0C800) + // fminnmv Vd,Vn DV_2R 0Q1011101X110000 110010nnnnnddddd 2EB0 C800 Vd,Vn (vector) + +INST1(fminv, "fminv", 0, 0, IF_DV_2R, 0x2EB0F800) + // fminv Vd,Vn DV_2R 0Q1011101X110000 111110nnnnnddddd 2EB0 F800 Vd,Vn (vector) + INST1(uzp1, "uzp1", 0, 0, IF_DV_3A, 0x0E001800) // uzp1 Vd,Vn,Vm DV_3A 0Q001110XX0mmmmm 000110nnnnnddddd 0E00 1800 Vd,Vn,Vm (vector) @@ -1275,9 +1315,15 @@ INST1(sabd, "sabd", 0, 0, IF_DV_3A, 0x0E207400) INST1(smax, "smax", 0, 0, IF_DV_3A, 0x0E206400) // smax Vd,Vn,Vm DV_3A 0Q001110XX1mmmmm 011001nnnnnddddd 0E20 6400 Vd,Vn,Vm (vector) +INST1(smaxp, "smaxp", 0, 0, IF_DV_3A, 0x0E20A400) + // smaxp Vd,Vn,Vm DV_3A 0Q001110XX1mmmmm 101001nnnnnddddd 0E20 A400 Vd,Vn,Vm (vector) + INST1(smin, "smin", 0, 0, IF_DV_3A, 0x0E206C00) // smax Vd,Vn,Vm DV_3A 0Q001110XX1mmmmm 011011nnnnnddddd 0E20 6C00 Vd,Vn,Vm (vector) +INST1(sminp, "sminp", 0, 0, IF_DV_3A, 0x0E20AC00) + // smax Vd,Vn,Vm DV_3A 0Q001110XX1mmmmm 101011nnnnnddddd 0E20 AC00 Vd,Vn,Vm (vector) + INST1(uaba, "uaba", 0, 0, IF_DV_3A, 0x2E207C00) // uaba Vd,Vn,Vm DV_3A 0Q101110XX1mmmmm 011111nnnnnddddd 2E20 7C00 Vd,Vn,Vm (vector) @@ -1287,9 +1333,15 @@ INST1(uabd, "uabd", 0, 0, IF_DV_3A, 0x2E207400) INST1(umax, "umax", 0, 0, IF_DV_3A, 0x2E206400) // umax Vd,Vn,Vm DV_3A 0Q101110XX1mmmmm 011001nnnnnddddd 2E20 6400 Vd,Vn,Vm (vector) +INST1(umaxp, "umaxp", 0, 0, IF_DV_3A, 0x2E20A400) + // umaxp Vd,Vn,Vm DV_3A 0Q101110XX1mmmmm 101001nnnnnddddd 2E20 A400 Vd,Vn,Vm (vector) + INST1(umin, "umin", 0, 0, IF_DV_3A, 0x2E206C00) // umin Vd,Vn,Vm DV_3A 0Q101110XX1mmmmm 011011nnnnnddddd 2E20 6C00 Vd,Vn,Vm (vector) +INST1(uminp, "uminp", 0, 0, IF_DV_3A, 0x2E20AC00) + // umin Vd,Vn,Vm DV_3A 0Q101110XX1mmmmm 101011nnnnnddddd 2E20 AC00 Vd,Vn,Vm (vector) + INST1(fcvtl, "fcvtl", 0, 0, IF_DV_2G, 0x0E217800) // fcvtl Vd,Vn DV_2G 000011100X100001 011110nnnnnddddd 0E21 7800 Vd,Vn (scalar)