@@ -632,6 +632,7 @@ emit_xconst_v128 (MonoCompile *cfg, MonoClass *klass, guint8 value[16])
632
632
ins -> type = STACK_VTYPE ;
633
633
ins -> dreg = alloc_xreg (cfg );
634
634
ins -> inst_p0 = mono_mem_manager_alloc (cfg -> mem_manager , size );
635
+ ins -> klass = klass ;
635
636
MONO_ADD_INS (cfg -> cbb , ins );
636
637
637
638
memcpy (ins -> inst_p0 , & value [0 ], size );
@@ -1390,6 +1391,76 @@ emit_msb_shift_vector_constant (MonoCompile *cfg, MonoClass *arg_class, MonoType
1390
1391
}
1391
1392
#endif
1392
1393
1394
+ static MonoInst *
1395
+ emit_dot (MonoCompile * cfg , MonoClass * klass , MonoType * vector_type , MonoTypeEnum arg0_type , int sreg1 , int sreg2 ) {
1396
+ if (!is_element_type_primitive (vector_type ))
1397
+ return NULL ;
1398
+ #if defined(TARGET_WASM )
1399
+ if (!COMPILE_LLVM (cfg ) && (arg0_type == MONO_TYPE_I8 || arg0_type == MONO_TYPE_U8 ))
1400
+ return NULL ;
1401
+ #elif defined(TARGET_ARM64 )
1402
+ if (!COMPILE_LLVM (cfg ) && (arg0_type == MONO_TYPE_I8 || arg0_type == MONO_TYPE_U8 || arg0_type == MONO_TYPE_I || arg0_type == MONO_TYPE_U ))
1403
+ return NULL ;
1404
+ #endif
1405
+
1406
+ #if defined(TARGET_ARM64 ) || defined(TARGET_WASM )
1407
+ MonoInst * pairwise_multiply = emit_simd_ins (cfg , klass , OP_XBINOP , sreg1 , sreg2 );
1408
+ pairwise_multiply -> inst_c0 = type_enum_is_float (arg0_type ) ? OP_FMUL : OP_IMUL ;
1409
+ pairwise_multiply -> inst_c1 = arg0_type ;
1410
+ return emit_sum_vector (cfg , vector_type , arg0_type , pairwise_multiply );
1411
+ #elif defined(TARGET_AMD64 )
1412
+ int instc = -1 ;
1413
+ if (type_enum_is_float (arg0_type )) {
1414
+ if (is_SIMD_feature_supported (cfg , MONO_CPU_X86_SSE41 )) {
1415
+ int mask_val = -1 ;
1416
+ switch (arg0_type ) {
1417
+ case MONO_TYPE_R4 :
1418
+ instc = COMPILE_LLVM (cfg ) ? OP_SSE41_DPPS : OP_SSE41_DPPS_IMM ;
1419
+ mask_val = 0xf1 ; // 0xf1 ... 0b11110001
1420
+ break ;
1421
+ case MONO_TYPE_R8 :
1422
+ instc = COMPILE_LLVM (cfg ) ? OP_SSE41_DPPD : OP_SSE41_DPPD_IMM ;
1423
+ mask_val = 0x31 ; // 0x31 ... 0b00110001
1424
+ break ;
1425
+ default :
1426
+ return NULL ;
1427
+ }
1428
+
1429
+ MonoInst * dot ;
1430
+ if (COMPILE_LLVM (cfg )) {
1431
+ int mask_reg = alloc_ireg (cfg );
1432
+ MONO_EMIT_NEW_ICONST (cfg , mask_reg , mask_val );
1433
+
1434
+ dot = emit_simd_ins (cfg , klass , instc , sreg1 , sreg2 );
1435
+ dot -> sreg3 = mask_reg ;
1436
+ } else {
1437
+ dot = emit_simd_ins (cfg , klass , instc , sreg1 , sreg2 );
1438
+ dot -> inst_c0 = mask_val ;
1439
+ }
1440
+ return extract_first_element (cfg , klass , arg0_type , dot -> dreg );
1441
+ } else {
1442
+ instc = OP_FMUL ;
1443
+ }
1444
+ } else {
1445
+ if (arg0_type == MONO_TYPE_I1 || arg0_type == MONO_TYPE_U1 )
1446
+ return NULL ; // We don't support sum vector for byte, sbyte types yet
1447
+
1448
+ // FIXME:
1449
+ if (!COMPILE_LLVM (cfg ))
1450
+ return NULL ;
1451
+
1452
+ instc = OP_IMUL ;
1453
+ }
1454
+ MonoInst * pairwise_multiply = emit_simd_ins (cfg , klass , OP_XBINOP , sreg1 , sreg2 );
1455
+ pairwise_multiply -> inst_c0 = type_enum_is_float (arg0_type ) ? OP_FMUL : OP_IMUL ;
1456
+ pairwise_multiply -> inst_c1 = arg0_type ;
1457
+
1458
+ return emit_sum_vector (cfg , vector_type , arg0_type , pairwise_multiply );
1459
+ #else
1460
+ return NULL ;
1461
+ #endif
1462
+ }
1463
+
1393
1464
/*
1394
1465
* Emit intrinsics in System.Numerics.Vector and System.Runtime.Intrinsics.Vector64/128/256/512.
1395
1466
* If the intrinsic is not supported for some reasons, return NULL, and fall back to the c#
@@ -1768,70 +1839,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
1768
1839
}
1769
1840
}
1770
1841
case SN_Dot : {
1771
- if (!is_element_type_primitive (fsig -> params [0 ]))
1772
- return NULL ;
1773
- #if defined(TARGET_WASM )
1774
- if (!COMPILE_LLVM (cfg ) && (arg0_type == MONO_TYPE_I8 || arg0_type == MONO_TYPE_U8 ))
1775
- return NULL ;
1776
- #elif defined(TARGET_ARM64 )
1777
- if (!COMPILE_LLVM (cfg ) && (arg0_type == MONO_TYPE_I8 || arg0_type == MONO_TYPE_U8 || arg0_type == MONO_TYPE_I || arg0_type == MONO_TYPE_U ))
1778
- return NULL ;
1779
- #endif
1780
-
1781
- #if defined(TARGET_ARM64 ) || defined(TARGET_WASM )
1782
- int instc0 = type_enum_is_float (arg0_type ) ? OP_FMUL : OP_IMUL ;
1783
- MonoInst * pairwise_multiply = emit_simd_ins_for_sig (cfg , klass , OP_XBINOP , instc0 , arg0_type , fsig , args );
1784
- return emit_sum_vector (cfg , fsig -> params [0 ], arg0_type , pairwise_multiply );
1785
- #elif defined(TARGET_AMD64 )
1786
- int instc = -1 ;
1787
- if (type_enum_is_float (arg0_type )) {
1788
- if (is_SIMD_feature_supported (cfg , MONO_CPU_X86_SSE41 )) {
1789
- int mask_val = -1 ;
1790
- switch (arg0_type ) {
1791
- case MONO_TYPE_R4 :
1792
- instc = COMPILE_LLVM (cfg ) ? OP_SSE41_DPPS : OP_SSE41_DPPS_IMM ;
1793
- mask_val = 0xf1 ; // 0xf1 ... 0b11110001
1794
- break ;
1795
- case MONO_TYPE_R8 :
1796
- instc = COMPILE_LLVM (cfg ) ? OP_SSE41_DPPD : OP_SSE41_DPPD_IMM ;
1797
- mask_val = 0x31 ; // 0x31 ... 0b00110001
1798
- break ;
1799
- default :
1800
- return NULL ;
1801
- }
1802
-
1803
- MonoInst * dot ;
1804
- if (COMPILE_LLVM (cfg )) {
1805
- int mask_reg = alloc_ireg (cfg );
1806
- MONO_EMIT_NEW_ICONST (cfg , mask_reg , mask_val );
1807
-
1808
- dot = emit_simd_ins (cfg , klass , instc , args [0 ]-> dreg , args [1 ]-> dreg );
1809
- dot -> sreg3 = mask_reg ;
1810
- } else {
1811
- dot = emit_simd_ins (cfg , klass , instc , args [0 ]-> dreg , args [1 ]-> dreg );
1812
- dot -> inst_c0 = mask_val ;
1813
- }
1814
-
1815
- return extract_first_element (cfg , klass , arg0_type , dot -> dreg );
1816
- } else {
1817
- instc = OP_FMUL ;
1818
- }
1819
- } else {
1820
- if (arg0_type == MONO_TYPE_I1 || arg0_type == MONO_TYPE_U1 )
1821
- return NULL ; // We don't support sum vector for byte, sbyte types yet
1822
-
1823
- // FIXME:
1824
- if (!COMPILE_LLVM (cfg ))
1825
- return NULL ;
1826
-
1827
- instc = OP_IMUL ;
1828
- }
1829
- MonoInst * pairwise_multiply = emit_simd_ins_for_sig (cfg , klass , OP_XBINOP , instc , arg0_type , fsig , args );
1830
-
1831
- return emit_sum_vector (cfg , fsig -> params [0 ], arg0_type , pairwise_multiply );
1832
- #else
1833
- return NULL ;
1834
- #endif
1842
+ return emit_dot (cfg , klass , fsig -> params [0 ], arg0_type , args [0 ]-> dreg , args [1 ]-> dreg );
1835
1843
}
1836
1844
case SN_Equals :
1837
1845
case SN_EqualsAll :
@@ -2910,6 +2918,8 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
2910
2918
value [1 ] = 1.0f ;
2911
2919
value [2 ] = 1.0f ;
2912
2920
value [3 ] = 1.0f ;
2921
+ if (len == 3 )
2922
+ value [3 ] = 0.0f ;
2913
2923
return emit_xconst_v128 (cfg , klass , (guint8 * )value );
2914
2924
}
2915
2925
case SN_set_Item : {
@@ -2988,28 +2998,7 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
2988
2998
return emit_simd_ins_for_binary_op (cfg , klass , fsig , args , MONO_TYPE_R4 , id );
2989
2999
}
2990
3000
case SN_Dot : {
2991
- #if defined(TARGET_ARM64 ) || defined(TARGET_WASM )
2992
- MonoInst * pairwise_multiply = emit_simd_ins_for_sig (cfg , klass , OP_XBINOP , OP_FMUL , MONO_TYPE_R4 , fsig , args );
2993
- return emit_sum_vector (cfg , fsig -> params [0 ], MONO_TYPE_R4 , pairwise_multiply );
2994
- #elif defined(TARGET_AMD64 )
2995
- if (!(mini_get_cpu_features (cfg ) & MONO_CPU_X86_SSE41 ))
2996
- return NULL ;
2997
-
2998
- int mask_reg = alloc_ireg (cfg );
2999
- MONO_EMIT_NEW_ICONST (cfg , mask_reg , 0xf1 );
3000
- MonoInst * dot = emit_simd_ins (cfg , klass , OP_SSE41_DPPS , args [0 ]-> dreg , args [1 ]-> dreg );
3001
- dot -> sreg3 = mask_reg ;
3002
-
3003
- MONO_INST_NEW (cfg , ins , OP_EXTRACT_R4 );
3004
- ins -> dreg = alloc_freg (cfg );
3005
- ins -> sreg1 = dot -> dreg ;
3006
- ins -> inst_c0 = 0 ;
3007
- ins -> inst_c1 = MONO_TYPE_R4 ;
3008
- MONO_ADD_INS (cfg -> cbb , ins );
3009
- return ins ;
3010
- #else
3011
- return NULL ;
3012
- #endif
3001
+ return emit_dot (cfg , klass , fsig -> params [0 ], MONO_TYPE_R4 , args [0 ]-> dreg , args [1 ]-> dreg );
3013
3002
}
3014
3003
case SN_Negate :
3015
3004
case SN_op_UnaryNegation : {
@@ -3061,7 +3050,6 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
3061
3050
#endif
3062
3051
}
3063
3052
case SN_CopyTo :
3064
- // FIXME: https://github.com/dotnet/runtime/issues/91394
3065
3053
return NULL ;
3066
3054
case SN_Clamp : {
3067
3055
if (!(!fsig -> hasthis && fsig -> param_count == 3 && mono_metadata_type_equal (fsig -> ret , type ) && mono_metadata_type_equal (fsig -> params [0 ], type ) && mono_metadata_type_equal (fsig -> params [1 ], type ) && mono_metadata_type_equal (fsig -> params [2 ], type )))
@@ -3077,15 +3065,133 @@ emit_vector_2_3_4 (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f
3077
3065
3078
3066
return min ;
3079
3067
}
3080
- case SN_Conjugate :
3081
- case SN_Distance :
3082
- case SN_DistanceSquared :
3068
+ case SN_Distance :
3069
+ case SN_DistanceSquared : {
3070
+ #if defined(TARGET_ARM64 )
3071
+ MonoInst * diffs = emit_simd_ins_for_sig (cfg , klass , OP_XBINOP , OP_FSUB , MONO_TYPE_R4 , fsig , args );
3072
+ MonoInst * dot = emit_dot (cfg , klass , fsig -> params [0 ], MONO_TYPE_R4 , diffs -> dreg , diffs -> dreg );
3073
+
3074
+ switch (id ) {
3075
+ case SN_Distance : {
3076
+ dot = emit_simd_ins (cfg , klass , OP_EXPAND_R4 , dot -> dreg , -1 );
3077
+ dot -> inst_c1 = MONO_TYPE_R4 ;
3078
+
3079
+ MonoInst * sqrt = emit_simd_ins (cfg , klass , OP_XOP_OVR_X_X , dot -> dreg , -1 );
3080
+ sqrt -> inst_c0 = INTRINS_AARCH64_ADV_SIMD_FSQRT ;
3081
+ sqrt -> inst_c1 = MONO_TYPE_R4 ;
3082
+
3083
+ MonoInst * distance = emit_simd_ins (cfg , klass , OP_EXTRACT_R4 , sqrt -> dreg , -1 );
3084
+ distance -> inst_c0 = 0 ;
3085
+ distance -> inst_c1 = MONO_TYPE_R4 ;
3086
+ return distance ;
3087
+ }
3088
+ case SN_DistanceSquared :
3089
+ return dot ;
3090
+ default :
3091
+ g_assert_not_reached ();
3092
+ }
3093
+ #else
3094
+ return NULL ;
3095
+ #endif
3096
+ }
3083
3097
case SN_Length :
3084
- case SN_LengthSquared :
3085
- case SN_Lerp :
3098
+ case SN_LengthSquared : {
3099
+ #if defined (TARGET_ARM64 )
3100
+ int src1 = load_simd_vreg (cfg , cmethod , args [0 ], NULL );
3101
+ MonoInst * dot = emit_dot (cfg , klass , type , MONO_TYPE_R4 , src1 , src1 );
3102
+
3103
+ switch (id ) {
3104
+ case SN_Length : {
3105
+ dot = emit_simd_ins (cfg , klass , OP_EXPAND_R4 , dot -> dreg , -1 );
3106
+ dot -> inst_c1 = MONO_TYPE_R4 ;
3107
+
3108
+ MonoInst * sqrt = emit_simd_ins (cfg , klass , OP_XOP_OVR_X_X , dot -> dreg , -1 );
3109
+ sqrt -> inst_c0 = INTRINS_AARCH64_ADV_SIMD_FSQRT ;
3110
+ sqrt -> inst_c1 = MONO_TYPE_R4 ;
3111
+
3112
+ MonoInst * length = emit_simd_ins (cfg , klass , OP_EXTRACT_R4 , sqrt -> dreg , -1 );
3113
+ length -> inst_c0 = 0 ;
3114
+ length -> inst_c1 = MONO_TYPE_R4 ;
3115
+ return length ;
3116
+ }
3117
+ case SN_LengthSquared :
3118
+ return dot ;
3119
+ default :
3120
+ g_assert_not_reached ();
3121
+ }
3122
+ #else
3123
+ return NULL ;
3124
+ #endif
3125
+ }
3126
+ case SN_Lerp : {
3127
+ #if defined (TARGET_ARM64 )
3128
+ MonoInst * v1 = args [1 ];
3129
+ if (!strcmp ("Quaternion" , m_class_get_name (klass )))
3130
+ return NULL ;
3131
+
3132
+
3133
+ MonoInst * diffs = emit_simd_ins (cfg , klass , OP_XBINOP , v1 -> dreg , args [0 ]-> dreg );
3134
+ diffs -> inst_c0 = OP_FSUB ;
3135
+ diffs -> inst_c1 = MONO_TYPE_R4 ;
3136
+
3137
+ MonoInst * scaled_diffs = handle_mul_div_by_scalar (cfg , klass , MONO_TYPE_R4 , args [2 ]-> dreg , diffs -> dreg , OP_FMUL );
3138
+
3139
+ MonoInst * result = emit_simd_ins (cfg , klass , OP_XBINOP , args [0 ]-> dreg , scaled_diffs -> dreg );
3140
+ result -> inst_c0 = OP_FADD ;
3141
+ result -> inst_c1 = MONO_TYPE_R4 ;
3142
+
3143
+ return result ;
3144
+ #else
3145
+ return NULL ;
3146
+ #endif
3147
+ }
3086
3148
case SN_Normalize : {
3087
- // FIXME: https://github.com/dotnet/runtime/issues/91394
3149
+ #if defined (TARGET_ARM64 )
3150
+ MonoInst * vec = args [0 ];
3151
+ const char * class_name = m_class_get_name (klass );
3152
+ if (!strcmp ("Plane" , class_name )) {
3153
+ static float r4_0 = 0 ;
3154
+ MonoInst * zero ;
3155
+ int zero_dreg = alloc_freg (cfg );
3156
+ MONO_INST_NEW (cfg , zero , OP_R4CONST );
3157
+ zero -> inst_p0 = (void * )& r4_0 ;
3158
+ zero -> dreg = zero_dreg ;
3159
+ MONO_ADD_INS (cfg -> cbb , zero );
3160
+ vec = emit_vector_insert_element (cfg , klass , vec , MONO_TYPE_R4 , zero , 3 , FALSE);
3161
+ }
3162
+
3163
+ MonoInst * dot = emit_dot (cfg , klass , type , MONO_TYPE_R4 , vec -> dreg , vec -> dreg );
3164
+ dot = emit_simd_ins (cfg , klass , OP_EXPAND_R4 , dot -> dreg , -1 );
3165
+ dot -> inst_c1 = MONO_TYPE_R4 ;
3166
+
3167
+ MonoInst * sqrt_vec = emit_simd_ins (cfg , klass , OP_XOP_OVR_X_X , dot -> dreg , -1 );
3168
+ sqrt_vec -> inst_c0 = INTRINS_AARCH64_ADV_SIMD_FSQRT ;
3169
+ sqrt_vec -> inst_c1 = MONO_TYPE_R4 ;
3170
+
3171
+ MonoInst * normalized_vec = emit_simd_ins (cfg , klass , OP_XBINOP , args [0 ]-> dreg , sqrt_vec -> dreg );
3172
+ normalized_vec -> inst_c0 = OP_FDIV ;
3173
+ normalized_vec -> inst_c1 = MONO_TYPE_R4 ;
3174
+
3175
+ return normalized_vec ;
3176
+ #else
3088
3177
return NULL ;
3178
+ #endif
3179
+ }
3180
+ case SN_Conjugate : {
3181
+ #if defined (TARGET_ARM64 )
3182
+ float value [4 ];
3183
+ value [0 ] = -1.0f ;
3184
+ value [1 ] = -1.0f ;
3185
+ value [2 ] = -1.0f ;
3186
+ value [3 ] = 1.0f ;
3187
+ MonoInst * r = emit_xconst_v128 (cfg , klass , (guint8 * )value );
3188
+ MonoInst * result = emit_simd_ins (cfg , klass , OP_XBINOP , args [0 ]-> dreg , r -> dreg );
3189
+ result -> inst_c0 = OP_FMUL ;
3190
+ result -> inst_c1 = MONO_TYPE_R4 ;
3191
+ return result ;
3192
+ #else
3193
+ return NULL ;
3194
+ #endif
3089
3195
}
3090
3196
default :
3091
3197
g_assert_not_reached ();
0 commit comments