Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[wasm] Enable PackedSimd in the interpreter; interpreter/jiterpreter v128 bug fixes #86136

Closed
wants to merge 25 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions eng/testing/tests.wasm.targets
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@

So, set those parameters explicitly here.
-->
<_ExtraTrimmerArgs Condition="'$(WasmEnableSIMD)' == 'true' and '$(RunAOTCompilation)' == 'true'">$(_ExtraTrimmerArgs) --substitutions &quot;$(MonoProjectRoot)\wasm\build\ILLink.Substitutions.WasmIntrinsics.xml&quot;</_ExtraTrimmerArgs>
<_ExtraTrimmerArgs Condition="'$(WasmEnableSIMD)' != 'true' or '$(RunAOTCompilation)' != 'true'">$(_ExtraTrimmerArgs) --substitutions &quot;$(MonoProjectRoot)\wasm\build\ILLink.Substitutions.NoWasmIntrinsics.xml&quot;</_ExtraTrimmerArgs>
<_ExtraTrimmerArgs Condition="'$(WasmEnableSIMD)' == 'true'">$(_ExtraTrimmerArgs) --substitutions &quot;$(MonoProjectRoot)\wasm\build\ILLink.Substitutions.WasmIntrinsics.xml&quot;</_ExtraTrimmerArgs>
<_ExtraTrimmerArgs Condition="'$(WasmEnableSIMD)' != 'true'">$(_ExtraTrimmerArgs) --substitutions &quot;$(MonoProjectRoot)\wasm\build\ILLink.Substitutions.NoWasmIntrinsics.xml&quot;</_ExtraTrimmerArgs>
</PropertyGroup>

<ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2456,11 +2456,6 @@ internal static Vector128<byte> ShuffleUnsafe(Vector128<byte> vector, Vector128<
return AdvSimd.Arm64.VectorTableLookup(vector, indices);
}

if (PackedSimd.IsSupported)
{
return PackedSimd.Swizzle(vector, indices);
}

return Shuffle(vector, indices);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Numerics;
using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
Expand Down Expand Up @@ -1872,28 +1874,45 @@ public abstract class PackedSimd

// Conversions

private static Vector128<TResult> ConvertNarrowingSaturate<TResult, TIn>(Vector128<TIn> lower, Vector128<TIn> upper)
where TResult : struct, INumber<TResult>
where TIn : struct, INumber<TIn>
{
var result = Vector128<TResult>.Zero;
int c = Vector128<TIn>.Count;
for (int i = 0; i < c; i++)
result = result.WithElement(i, TResult.CreateSaturating(lower[i]));
for (int i = 0; i < c; i++)
result = result.WithElement(i + c, TResult.CreateSaturating(upper[i]));
return result;
}

/// <summary>
/// i8x16.narrow_i16x8_s
/// </summary>
[Intrinsic]
internal static Vector128<sbyte> ConvertNarrowingSignedSaturate(Vector128<short> lower, Vector128<short> upper) => ConvertNarrowingSignedSaturate(lower, upper);
internal static Vector128<sbyte> ConvertNarrowingSignedSaturate(Vector128<short> lower, Vector128<short> upper) =>
ConvertNarrowingSaturate<sbyte, short>(lower, upper);

/// <summary>
/// i16x8.narrow_i32x4_s
/// </summary>
[Intrinsic]
internal static Vector128<short> ConvertNarrowingSignedSaturate(Vector128<int> lower, Vector128<int> upper) => ConvertNarrowingSignedSaturate(lower, upper);
internal static Vector128<short> ConvertNarrowingSignedSaturate(Vector128<int> lower, Vector128<int> upper) =>
ConvertNarrowingSaturate<short, int>(lower, upper);

/// <summary>
/// i8x16.narrow_i16x8_u
/// </summary>
[Intrinsic]
internal static Vector128<byte> ConvertNarrowingUnsignedSaturate(Vector128<short> lower, Vector128<short> upper) => ConvertNarrowingUnsignedSaturate(lower, upper);
internal static Vector128<byte> ConvertNarrowingUnsignedSaturate(Vector128<short> lower, Vector128<short> upper) =>
ConvertNarrowingSaturate<byte, short>(lower, upper);

/// <summary>
/// i16x8.narrow_i32x4_u
/// </summary>
[Intrinsic]
internal static Vector128<ushort> ConvertNarrowingUnsignedSaturate(Vector128<int> lower, Vector128<int> upper) => ConvertNarrowingUnsignedSaturate(lower, upper);
internal static Vector128<ushort> ConvertNarrowingUnsignedSaturate(Vector128<int> lower, Vector128<int> upper) =>
ConvertNarrowingSaturate<ushort, int>(lower, upper);
}
}
4 changes: 4 additions & 0 deletions src/mono/mono/mini/interp/interp.c
Original file line number Diff line number Diff line change
Expand Up @@ -3849,6 +3849,10 @@ mono_interp_exec_method (InterpFrame *frame, ThreadContext *context, FrameClause
memset (locals + ip [1], 0, ip [2]);
ip += 3;
MINT_IN_BREAK;
MINT_IN_CASE(MINT_NIY)
g_printf ("MONO interpreter: NIY encountered in method %s\n", frame->imethod->method->name);
g_assert_not_reached ();
MINT_IN_BREAK;
MINT_IN_CASE(MINT_BREAK)
++ip;
SAVE_INTERP_STATE (frame);
Expand Down
2 changes: 1 addition & 1 deletion src/mono/mono/mini/interp/mintops.def
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#define IROPDEF(opsymbol, opstring, oplength, num_dregs, num_sregs, optype) OPDEF(opsymbol, opstring, oplength, num_dregs, num_sregs, optype)
#endif // IROPDEF

OPDEF(MINT_NIY, "niy", 1, 0, 0, MintOpNoArgs)
OPDEF(MINT_BREAK, "break", 1, 0, 0, MintOpNoArgs)
OPDEF(MINT_BREAKPOINT, "breakpoint", 1, 0, 0, MintOpNoArgs)

Expand Down Expand Up @@ -844,7 +845,6 @@ OPDEF(MINT_TIER_MONITOR_JITERPRETER, "tier_monitor_jiterpreter", 3, 0, 0, MintOp
#endif // HOST_BROWSER

IROPDEF(MINT_NOP, "nop", 1, 0, 0, MintOpNoArgs)
IROPDEF(MINT_NIY, "niy", 1, 0, 0, MintOpNoArgs)
IROPDEF(MINT_DEF, "def", 2, 1, 0, MintOpNoArgs)
IROPDEF(MINT_IL_SEQ_POINT, "il_seq_point", 1, 0, 0, MintOpNoArgs)
IROPDEF(MINT_DUMMY_USE, "dummy_use", 2, 0, 1, MintOpNoArgs)
Expand Down
53 changes: 35 additions & 18 deletions src/mono/mono/mini/interp/transform-simd.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,6 @@ static guint16 sri_vector128_methods [] = {
};

static guint16 sri_vector128_t_methods [] = {
SN_get_AllBitsSet,
SN_get_Count,
SN_get_One,
SN_get_Zero,
SN_op_Addition,
SN_op_BitwiseAnd,
SN_op_BitwiseOr,
Expand All @@ -84,12 +80,29 @@ static guint16 sri_vector128_t_methods [] = {
SN_op_RightShift,
SN_op_Subtraction,
SN_op_UnaryNegation,
SN_op_UnsignedRightShift
SN_op_UnsignedRightShift,
SN_get_AllBitsSet,
SN_get_Count,
SN_get_One,
SN_get_Zero,
};

static guint16 sri_packedsimd_methods [] = {
SN_Add,
SN_And,
SN_Bitmask,
SN_CompareEqual,
SN_CompareNotEqual,
SN_ConvertNarrowingSignedSaturate,
SN_ConvertNarrowingUnsignedSaturate,
SN_Dot,
SN_Multiply,
SN_Negate,
SN_ShiftLeft,
SN_ShiftRightArithmetic,
SN_ShiftRightLogical,
SN_Splat,
SN_Subtract,
SN_Swizzle,
SN_get_IsHardwareAccelerated,
SN_get_IsSupported,
Expand Down Expand Up @@ -534,22 +547,31 @@ static gboolean
emit_sri_packedsimd (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature)
{
int id = lookup_intrins (sri_packedsimd_methods, sizeof (sri_packedsimd_methods), cmethod);
if (id == -1)
return FALSE;
// We don't early-out for an unrecognized method, we will generate an NIY later

MonoClass *vector_klass = mono_class_from_mono_type_internal (csignature->ret);
int vector_size = -1;

if ((id == SN_get_IsSupported) || (id == SN_get_IsHardwareAccelerated)) {
#if HOST_BROWSER
interp_add_ins (td, MINT_LDC_I4_1);
interp_add_ins (td, mono_opt_interp_simd_packedsimd ? MINT_LDC_I4_1 : MINT_LDC_I4_0);
#else
interp_add_ins (td, MINT_LDC_I4_0);
#endif
goto opcode_added;
}

return FALSE;

#if HOST_BROWSER
if (!mono_opt_interp_simd_packedsimd || (id < 0)) {
g_print ("MONO interpreter: Disabled or unimplemented method: System.Runtime.Intrinsics.Wasm.PackedSimd.%s\n", cmethod->name);
// The packedsimd method implementations recurse infinitely and cause a stack overflow,
// so replace them with a NIY opcode instead that will assert
interp_add_ins (td, MINT_NIY);
goto opcode_added;
}

gint16 simd_opcode = -1;
gint16 simd_intrins = -1;
if (!m_class_is_simd_type (vector_klass))
Expand Down Expand Up @@ -644,6 +666,7 @@ emit_sri_packedsimd (TransformData *td, MonoMethod *cmethod, MonoMethodSignature
simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_COMPARENOTEQUAL, TRUE);
break;
}
/*
case SN_ConvertNarrowingSignedSaturate: {
simd_opcode = MINT_SIMD_INTRINS_P_PP;
if (atype == MONO_TYPE_I1)
Expand All @@ -660,6 +683,7 @@ emit_sri_packedsimd (TransformData *td, MonoMethod *cmethod, MonoMethodSignature
simd_intrins = INTERP_SIMD_INTRINSIC_WASM_I16X8_NARROW_I32X4_U;
break;
}
*/
default:
return FALSE;
}
Expand Down Expand Up @@ -710,16 +734,9 @@ interp_emit_simd_intrinsics (TransformData *td, MonoMethod *cmethod, MonoMethodS
return emit_sri_vector128 (td, cmethod, csignature);
else if (!strcmp (class_name, "Vector128`1"))
return emit_sri_vector128_t (td, cmethod, csignature);
} else if (mono_opt_interp_simd_packedsimd && !strcmp (class_ns, "System.Runtime.Intrinsics.Wasm")) {
if (!strcmp (class_name, "PackedSimd")) {
gboolean res = emit_sri_packedsimd (td, cmethod, csignature);
#if HOST_BROWSER
if (!res)
g_print ("MONO interpreter: Unsupported method: System.Runtime.Intrinsics.Wasm.PackedSimd.%s\n", cmethod->name);
g_assert (res);
#endif
return res;
}
} else if (!strcmp (class_ns, "System.Runtime.Intrinsics.Wasm")) {
if (!strcmp (class_name, "PackedSimd"))
return emit_sri_packedsimd (td, cmethod, csignature);
}
return FALSE;
}
8 changes: 2 additions & 6 deletions src/mono/mono/utils/options-def.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,20 +60,16 @@ DEFINE_BOOL_READONLY(readonly_flag, "readonly-flag", FALSE, "Example")
DEFINE_BOOL(wasm_exceptions, "wasm-exceptions", FALSE, "Enable codegen for WASM exceptions")
DEFINE_BOOL(wasm_gc_safepoints, "wasm-gc-safepoints", FALSE, "Use GC safepoints on WASM")
DEFINE_BOOL(aot_lazy_assembly_load, "aot-lazy-assembly-load", FALSE, "Load assemblies referenced by AOT images lazily")
#if HOST_BROWSER
DEFINE_BOOL(interp_simd_v128, "interp-simd-v128", FALSE, "Enable interpreter Vector128 support")
#else
DEFINE_BOOL(interp_simd_v128, "interp-simd-v128", TRUE, "Enable interpreter Vector128 support")
#endif
DEFINE_BOOL(interp_simd_packedsimd, "interp-simd-packedsimd", FALSE, "Enable interpreter WASM PackedSimd support")
DEFINE_BOOL(interp_simd_packedsimd, "interp-simd-packedsimd", TRUE, "Enable interpreter WASM PackedSimd support")

#if HOST_BROWSER

// the jiterpreter is not yet thread safe due to the need to synchronize function pointers
// and wasm modules between threads. before these can be enabled we need to implement all that
#ifdef DISABLE_THREADS
// traces_enabled controls whether the jiterpreter will JIT individual interpreter opcode traces
DEFINE_BOOL(jiterpreter_traces_enabled, "jiterpreter-traces-enabled", TRUE, "JIT interpreter opcode traces into WASM")
DEFINE_BOOL(jiterpreter_traces_enabled, "jiterpreter-traces-enabled", FALSE, "JIT interpreter opcode traces into WASM")
// interp_entry_enabled controls whether specialized interp_entry wrappers will be jitted
DEFINE_BOOL(jiterpreter_interp_entry_enabled, "jiterpreter-interp-entry-enabled", TRUE, "JIT specialized WASM interp_entry wrappers")
// jit_call_enabled controls whether do_jit_call will use specialized trampolines for hot call sites
Expand Down
4 changes: 2 additions & 2 deletions src/mono/wasm/build/WasmApp.targets
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@
<WasmGenerateAppBundle Condition="'$(WasmGenerateAppBundle)' == ''">false</WasmGenerateAppBundle>
<UseAppHost>false</UseAppHost>
<TrimMode Condition="'$(TrimMode)' == ''">full</TrimMode>
<_ExtraTrimmerArgs Condition="'$(WasmEnableSIMD)' == 'true' and '$(RunAOTCompilation)' == 'true'">$(_ExtraTrimmerArgs) --substitutions &quot;$(MSBuildThisFileDirectory)ILLink.Substitutions.WasmIntrinsics.xml&quot;</_ExtraTrimmerArgs>
<_ExtraTrimmerArgs Condition="'$(WasmEnableSIMD)' != 'true' or '$(RunAOTCompilation)' != 'true'">$(_ExtraTrimmerArgs) --substitutions &quot;$(MSBuildThisFileDirectory)ILLink.Substitutions.NoWasmIntrinsics.xml&quot;</_ExtraTrimmerArgs>
<_ExtraTrimmerArgs Condition="'$(WasmEnableSIMD)' == 'true'">$(_ExtraTrimmerArgs) --substitutions &quot;$(MSBuildThisFileDirectory)ILLink.Substitutions.WasmIntrinsics.xml&quot;</_ExtraTrimmerArgs>
<_ExtraTrimmerArgs Condition="'$(WasmEnableSIMD)' != 'true'">$(_ExtraTrimmerArgs) --substitutions &quot;$(MSBuildThisFileDirectory)ILLink.Substitutions.NoWasmIntrinsics.xml&quot;</_ExtraTrimmerArgs>
<_ExtraTrimmerArgs Condition="'$(WasmEnableLegacyJsInterop)' == 'false'">$(_ExtraTrimmerArgs) --substitutions &quot;$(MSBuildThisFileDirectory)ILLink.Substitutions.LegacyJsInterop.xml&quot;</_ExtraTrimmerArgs>

<!-- Temporarily `false`, till sdk gets a fix for supporting the new file -->
Expand Down
63 changes: 34 additions & 29 deletions src/mono/wasm/runtime/jiterpreter-trace-generator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3509,12 +3509,19 @@ const simdShiftTable = new Set<SimdIntrinsic3>([
SimdIntrinsic3.V128_I8_URIGHT_SHIFT,
]);

function append_stloc_simd_zero(builder: WasmBuilder, offset: number) {
builder.local("pLocals");
builder.appendSimd(WasmSimdOpcode.v128_const);
builder.appendBytes(new Uint8Array(sizeOfV128));
append_stloc_tail(builder, offset, WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_store);
}
const bitmaskTable : { [intrinsic: number]: WasmSimdOpcode } = {
[SimdIntrinsic2.V128_I1_EXTRACT_MSB]: WasmSimdOpcode.i8x16_bitmask,
[SimdIntrinsic2.V128_I2_EXTRACT_MSB]: WasmSimdOpcode.i16x8_bitmask,
[SimdIntrinsic2.V128_I4_EXTRACT_MSB]: WasmSimdOpcode.i32x4_bitmask,
[SimdIntrinsic2.V128_I8_EXTRACT_MSB]: WasmSimdOpcode.i64x2_bitmask,
};

const createScalarTable : { [intrinsic: number]: [WasmOpcode, WasmSimdOpcode] } = {
[SimdIntrinsic2.V128_I1_CREATE_SCALAR]: [WasmOpcode.i32_load8_s, WasmSimdOpcode.i8x16_replace_lane],
[SimdIntrinsic2.V128_I2_CREATE_SCALAR]: [WasmOpcode.i32_load16_s, WasmSimdOpcode.i16x8_replace_lane],
[SimdIntrinsic2.V128_I4_CREATE_SCALAR]: [WasmOpcode.i32_load, WasmSimdOpcode.i32x4_replace_lane],
[SimdIntrinsic2.V128_I8_CREATE_SCALAR]: [WasmOpcode.i64_load, WasmSimdOpcode.i64x2_replace_lane],
};

function emit_simd_2(builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrinsic2): boolean {
const simple = <WasmSimdOpcode>cwraps.mono_jiterp_get_simd_opcode(1, index);
Expand All @@ -3525,35 +3532,33 @@ function emit_simd_2(builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrins
return true;
}

const bitmask = bitmaskTable[index];
if (bitmask) {
append_simd_2_load(builder, ip);
builder.appendSimd(bitmask);
append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store);
return true;
}

switch (index) {
case SimdIntrinsic2.V128_I1_CREATE_SCALAR:
// Zero then write scalar component
builder.local("pLocals");
append_stloc_simd_zero(builder, getArgU16(ip, 1));
append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.i32_load8_s);
append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store8);
return true;
case SimdIntrinsic2.V128_I2_CREATE_SCALAR:
// Zero then write scalar component
builder.local("pLocals");
append_stloc_simd_zero(builder, getArgU16(ip, 1));
append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.i32_load16_s);
append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store16);
return true;
case SimdIntrinsic2.V128_I4_CREATE_SCALAR:
// Zero then write scalar component
builder.local("pLocals");
append_stloc_simd_zero(builder, getArgU16(ip, 1));
append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.i32_load);
append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store);
return true;
case SimdIntrinsic2.V128_I8_CREATE_SCALAR:
// Zero then write scalar component
case SimdIntrinsic2.V128_I8_CREATE_SCALAR: {
const tableEntry = createScalarTable[index];
builder.local("pLocals");
append_stloc_simd_zero(builder, getArgU16(ip, 1));
append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.i64_load);
append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i64_store);
// Make a zero vector
builder.i52_const(0);
builder.appendSimd(WasmSimdOpcode.i64x2_splat);
// Load the scalar value
append_ldloc(builder, getArgU16(ip, 2), tableEntry[0]);
// Replace the first lane
builder.appendSimd(tableEntry[1]);
builder.appendU8(0);
// Store result
append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_store);
return true;
}

case SimdIntrinsic2.V128_I1_CREATE:
append_simd_2_load(builder, ip, WasmSimdOpcode.v128_load8_splat);
Expand Down