Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update SIMD support #1553

Merged
merged 5 commits into from
Oct 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 32 additions & 20 deletions src/binary-reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -912,12 +912,12 @@ Result BinaryReader::ReadFunctionBody(Offset end_offset) {
case Opcode::F32Load:
case Opcode::F64Load:
case Opcode::V128Load:
case Opcode::I16X8Load8X8S:
case Opcode::I16X8Load8X8U:
case Opcode::I32X4Load16X4S:
case Opcode::I32X4Load16X4U:
case Opcode::I64X2Load32X2S:
case Opcode::I64X2Load32X2U: {
case Opcode::V128Load8X8S:
case Opcode::V128Load8X8U:
case Opcode::V128Load16X4S:
case Opcode::V128Load16X4U:
case Opcode::V128Load32X2S:
case Opcode::V128Load32X2U: {
Address alignment_log2;
CHECK_RESULT(ReadAlignment(&alignment_log2, "load alignment"));
Address offset;
Expand Down Expand Up @@ -1020,14 +1020,14 @@ Result BinaryReader::ReadFunctionBody(Offset end_offset) {
case Opcode::I16X8Mul:
case Opcode::I32X4Mul:
case Opcode::I64X2Mul:
case Opcode::I8X16AddSaturateS:
case Opcode::I8X16AddSaturateU:
case Opcode::I16X8AddSaturateS:
case Opcode::I16X8AddSaturateU:
case Opcode::I8X16SubSaturateS:
case Opcode::I8X16SubSaturateU:
case Opcode::I16X8SubSaturateS:
case Opcode::I16X8SubSaturateU:
case Opcode::I8X16AddSatS:
case Opcode::I8X16AddSatU:
case Opcode::I16X8AddSatS:
case Opcode::I16X8AddSatU:
case Opcode::I8X16SubSatS:
case Opcode::I8X16SubSatU:
case Opcode::I16X8SubSatS:
case Opcode::I16X8SubSatU:
case Opcode::I8X16MinS:
case Opcode::I16X8MinS:
case Opcode::I32X4MinS:
Expand Down Expand Up @@ -1056,9 +1056,13 @@ Result BinaryReader::ReadFunctionBody(Offset end_offset) {
case Opcode::V128Or:
case Opcode::V128Xor:
case Opcode::F32X4Min:
case Opcode::F32X4PMin:
case Opcode::F64X2Min:
case Opcode::F64X2PMin:
case Opcode::F32X4Max:
case Opcode::F32X4PMax:
case Opcode::F64X2Max:
case Opcode::F64X2PMax:
case Opcode::F32X4Add:
case Opcode::F64X2Add:
case Opcode::F32X4Sub:
Expand All @@ -1067,7 +1071,7 @@ Result BinaryReader::ReadFunctionBody(Offset end_offset) {
case Opcode::F64X2Div:
case Opcode::F32X4Mul:
case Opcode::F64X2Mul:
case Opcode::V8X16Swizzle:
case Opcode::I8X16Swizzle:
case Opcode::I8X16NarrowI16X8S:
case Opcode::I8X16NarrowI16X8U:
case Opcode::I16X8NarrowI32X4S:
Expand Down Expand Up @@ -1197,6 +1201,14 @@ Result BinaryReader::ReadFunctionBody(Offset end_offset) {
case Opcode::I8X16AllTrue:
case Opcode::I16X8AllTrue:
case Opcode::I32X4AllTrue:
case Opcode::F32X4Ceil:
case Opcode::F64X2Ceil:
case Opcode::F32X4Floor:
case Opcode::F64X2Floor:
case Opcode::F32X4Trunc:
case Opcode::F64X2Trunc:
case Opcode::F32X4Nearest:
case Opcode::F64X2Nearest:
case Opcode::F32X4Neg:
case Opcode::F64X2Neg:
case Opcode::F32X4Abs:
Expand Down Expand Up @@ -1244,18 +1256,18 @@ Result BinaryReader::ReadFunctionBody(Offset end_offset) {
break;
}

case Opcode::V8X16Shuffle: {
case Opcode::I8X16Shuffle: {
v128 value;
CHECK_RESULT(ReadV128(&value, "Lane idx [16]"));
CALLBACK(OnSimdShuffleOpExpr, opcode, value);
CALLBACK(OnOpcodeV128, value);
break;
}

case Opcode::V8X16LoadSplat:
case Opcode::V16X8LoadSplat:
case Opcode::V32X4LoadSplat:
case Opcode::V64X2LoadSplat: {
case Opcode::V128Load8Splat:
case Opcode::V128Load16Splat:
case Opcode::V128Load32Splat:
case Opcode::V128Load64Splat: {
Address alignment_log2;
CHECK_RESULT(ReadAlignment(&alignment_log2, "load alignment"));
Address offset;
Expand Down
10 changes: 10 additions & 0 deletions src/interp/interp-math.h
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,11 @@ T WABT_VECTORCALL FloatMin(T lhs, T rhs) {
}
}

template <typename T>
T WABT_VECTORCALL FloatPMin(T lhs, T rhs) {
return std::min(lhs, rhs);
}

template <typename T>
T WABT_VECTORCALL FloatMax(T lhs, T rhs) {
if (WABT_UNLIKELY(std::isnan(lhs) || std::isnan(rhs))) {
Expand All @@ -283,6 +288,11 @@ T WABT_VECTORCALL FloatMax(T lhs, T rhs) {
}
}

template <typename T>
T WABT_VECTORCALL FloatPMax(T lhs, T rhs) {
return std::max(lhs, rhs);
}

template <typename R, typename T> bool WABT_VECTORCALL CanConvert(T val) { return true; }
template <> inline bool WABT_VECTORCALL CanConvert<s32, f32>(f32 val) { return val >= -2147483648.f && val < 2147483648.f; }
template <> inline bool WABT_VECTORCALL CanConvert<s32, f64>(f64 val) { return val > -2147483649. && val < 2147483648.; }
Expand Down
54 changes: 34 additions & 20 deletions src/interp/interp.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1503,11 +1503,11 @@ RunResult Thread::StepInternal(Trap::Ptr* out_trap) {
case O::I8X16ShrS: return DoSimdShift(IntShr<s8>);
case O::I8X16ShrU: return DoSimdShift(IntShr<u8>);
case O::I8X16Add: return DoSimdBinop(Add<u8>);
case O::I8X16AddSaturateS: return DoSimdBinop(IntAddSat<s8>);
case O::I8X16AddSaturateU: return DoSimdBinop(IntAddSat<u8>);
case O::I8X16AddSatS: return DoSimdBinop(IntAddSat<s8>);
case O::I8X16AddSatU: return DoSimdBinop(IntAddSat<u8>);
case O::I8X16Sub: return DoSimdBinop(Sub<u8>);
case O::I8X16SubSaturateS: return DoSimdBinop(IntSubSat<s8>);
case O::I8X16SubSaturateU: return DoSimdBinop(IntSubSat<u8>);
case O::I8X16SubSatS: return DoSimdBinop(IntSubSat<s8>);
case O::I8X16SubSatU: return DoSimdBinop(IntSubSat<u8>);
case O::I8X16MinS: return DoSimdBinop(IntMin<s8>);
case O::I8X16MinU: return DoSimdBinop(IntMin<u8>);
case O::I8X16MaxS: return DoSimdBinop(IntMax<s8>);
Expand All @@ -1521,11 +1521,11 @@ RunResult Thread::StepInternal(Trap::Ptr* out_trap) {
case O::I16X8ShrS: return DoSimdShift(IntShr<s16>);
case O::I16X8ShrU: return DoSimdShift(IntShr<u16>);
case O::I16X8Add: return DoSimdBinop(Add<u16>);
case O::I16X8AddSaturateS: return DoSimdBinop(IntAddSat<s16>);
case O::I16X8AddSaturateU: return DoSimdBinop(IntAddSat<u16>);
case O::I16X8AddSatS: return DoSimdBinop(IntAddSat<s16>);
case O::I16X8AddSatU: return DoSimdBinop(IntAddSat<u16>);
case O::I16X8Sub: return DoSimdBinop(Sub<u16>);
case O::I16X8SubSaturateS: return DoSimdBinop(IntSubSat<s16>);
case O::I16X8SubSaturateU: return DoSimdBinop(IntSubSat<u16>);
case O::I16X8SubSatS: return DoSimdBinop(IntSubSat<s16>);
case O::I16X8SubSatU: return DoSimdBinop(IntSubSat<u16>);
case O::I16X8Mul: return DoSimdBinop(Mul<u16>);
case O::I16X8MinS: return DoSimdBinop(IntMin<s16>);
case O::I16X8MinU: return DoSimdBinop(IntMin<u16>);
Expand Down Expand Up @@ -1555,6 +1555,16 @@ RunResult Thread::StepInternal(Trap::Ptr* out_trap) {
case O::I64X2Sub: return DoSimdBinop(Sub<u64>);
case O::I64X2Mul: return DoSimdBinop(Mul<u64>);

case O::F32X4Ceil: return DoSimdUnop(FloatCeil<f32>);
case O::F32X4Floor: return DoSimdUnop(FloatFloor<f32>);
case O::F32X4Trunc: return DoSimdUnop(FloatTrunc<f32>);
case O::F32X4Nearest: return DoSimdUnop(FloatNearest<f32>);

case O::F64X2Ceil: return DoSimdUnop(FloatCeil<f64>);
case O::F64X2Floor: return DoSimdUnop(FloatFloor<f64>);
case O::F64X2Trunc: return DoSimdUnop(FloatTrunc<f64>);
case O::F64X2Nearest: return DoSimdUnop(FloatNearest<f64>);

case O::F32X4Abs: return DoSimdUnop(FloatAbs<f32>);
case O::F32X4Neg: return DoSimdUnop(FloatNeg<f32>);
case O::F32X4Sqrt: return DoSimdUnop(FloatSqrt<f32>);
Expand All @@ -1564,6 +1574,8 @@ RunResult Thread::StepInternal(Trap::Ptr* out_trap) {
case O::F32X4Div: return DoSimdBinop(FloatDiv<f32>);
case O::F32X4Min: return DoSimdBinop(FloatMin<f32>);
case O::F32X4Max: return DoSimdBinop(FloatMax<f32>);
case O::F32X4PMin: return DoSimdBinop(FloatPMin<f32>);
case O::F32X4PMax: return DoSimdBinop(FloatPMax<f32>);

case O::F64X2Abs: return DoSimdUnop(FloatAbs<f64>);
case O::F64X2Neg: return DoSimdUnop(FloatNeg<f64>);
Expand All @@ -1574,19 +1586,21 @@ RunResult Thread::StepInternal(Trap::Ptr* out_trap) {
case O::F64X2Div: return DoSimdBinop(FloatDiv<f64>);
case O::F64X2Min: return DoSimdBinop(FloatMin<f64>);
case O::F64X2Max: return DoSimdBinop(FloatMax<f64>);
case O::F64X2PMin: return DoSimdBinop(FloatPMin<f64>);
case O::F64X2PMax: return DoSimdBinop(FloatPMax<f64>);

case O::I32X4TruncSatF32X4S: return DoSimdUnop(IntTruncSat<s32, f32>);
case O::I32X4TruncSatF32X4U: return DoSimdUnop(IntTruncSat<u32, f32>);
case O::F32X4ConvertI32X4S: return DoSimdUnop(Convert<f32, s32>);
case O::F32X4ConvertI32X4U: return DoSimdUnop(Convert<f32, u32>);

case O::V8X16Swizzle: return DoSimdSwizzle();
case O::V8X16Shuffle: return DoSimdShuffle(instr);
case O::I8X16Swizzle: return DoSimdSwizzle();
case O::I8X16Shuffle: return DoSimdShuffle(instr);

case O::V8X16LoadSplat: return DoSimdLoadSplat<u8x16, u32>(instr, out_trap);
case O::V16X8LoadSplat: return DoSimdLoadSplat<u16x8, u32>(instr, out_trap);
case O::V32X4LoadSplat: return DoSimdLoadSplat<u32x4, u32>(instr, out_trap);
case O::V64X2LoadSplat: return DoSimdLoadSplat<u64x2, u64>(instr, out_trap);
case O::V128Load8Splat: return DoSimdLoadSplat<u8x16, u32>(instr, out_trap);
case O::V128Load16Splat: return DoSimdLoadSplat<u16x8, u32>(instr, out_trap);
case O::V128Load32Splat: return DoSimdLoadSplat<u32x4, u32>(instr, out_trap);
case O::V128Load64Splat: return DoSimdLoadSplat<u64x2, u64>(instr, out_trap);

case O::I8X16NarrowI16X8S: return DoSimdNarrow<s8x16, s16x8>();
case O::I8X16NarrowI16X8U: return DoSimdNarrow<u8x16, s16x8>();
Expand All @@ -1601,12 +1615,12 @@ RunResult Thread::StepInternal(Trap::Ptr* out_trap) {
case O::I32X4WidenLowI16X8U: return DoSimdWiden<u32x4, u16x8, true>();
case O::I32X4WidenHighI16X8U: return DoSimdWiden<u32x4, u16x8, false>();

case O::I16X8Load8X8S: return DoSimdLoadExtend<s16x8, s8x8>(instr, out_trap);
case O::I16X8Load8X8U: return DoSimdLoadExtend<u16x8, u8x8>(instr, out_trap);
case O::I32X4Load16X4S: return DoSimdLoadExtend<s32x4, s16x4>(instr, out_trap);
case O::I32X4Load16X4U: return DoSimdLoadExtend<u32x4, u16x4>(instr, out_trap);
case O::I64X2Load32X2S: return DoSimdLoadExtend<s64x2, s32x2>(instr, out_trap);
case O::I64X2Load32X2U: return DoSimdLoadExtend<u64x2, u32x2>(instr, out_trap);
case O::V128Load8X8S: return DoSimdLoadExtend<s16x8, s8x8>(instr, out_trap);
case O::V128Load8X8U: return DoSimdLoadExtend<u16x8, u8x8>(instr, out_trap);
case O::V128Load16X4S: return DoSimdLoadExtend<s32x4, s16x4>(instr, out_trap);
case O::V128Load16X4U: return DoSimdLoadExtend<u32x4, u16x4>(instr, out_trap);
case O::V128Load32X2S: return DoSimdLoadExtend<s64x2, s32x2>(instr, out_trap);
case O::V128Load32X2U: return DoSimdLoadExtend<u64x2, u32x2>(instr, out_trap);

case O::V128Andnot: return DoSimdBinop(IntAndNot<u64>);
case O::I8X16AvgrU: return DoSimdBinop(IntAvgr<u8>);
Expand Down
52 changes: 32 additions & 20 deletions src/interp/istream.cc
Original file line number Diff line number Diff line change
Expand Up @@ -130,11 +130,15 @@ Instr Istream::Read(Offset* offset) const {
case Opcode::F32Sqrt:
case Opcode::F32Trunc:
case Opcode::F32X4Abs:
case Opcode::F32X4Ceil:
case Opcode::F32X4ConvertI32X4S:
case Opcode::F32X4ConvertI32X4U:
case Opcode::F32X4Floor:
case Opcode::F32X4Nearest:
case Opcode::F32X4Neg:
case Opcode::F32X4Splat:
case Opcode::F32X4Sqrt:
case Opcode::F32X4Trunc:
case Opcode::F64Abs:
case Opcode::F64Ceil:
case Opcode::F64ConvertI32S:
Expand All @@ -149,9 +153,13 @@ Instr Istream::Read(Offset* offset) const {
case Opcode::F64Sqrt:
case Opcode::F64Trunc:
case Opcode::F64X2Abs:
case Opcode::F64X2Ceil:
case Opcode::F64X2Floor:
case Opcode::F64X2Nearest:
case Opcode::F64X2Neg:
case Opcode::F64X2Splat:
case Opcode::F64X2Sqrt:
case Opcode::F64X2Trunc:
case Opcode::I16X8AllTrue:
case Opcode::I16X8AnyTrue:
case Opcode::I16X8Bitmask:
Expand Down Expand Up @@ -246,6 +254,8 @@ Instr Istream::Read(Offset* offset) const {
case Opcode::F32X4Min:
case Opcode::F32X4Mul:
case Opcode::F32X4Ne:
case Opcode::F32X4PMax:
case Opcode::F32X4PMin:
case Opcode::F32X4Sub:
case Opcode::F64Add:
case Opcode::F64Copysign:
Expand All @@ -271,10 +281,12 @@ Instr Istream::Read(Offset* offset) const {
case Opcode::F64X2Min:
case Opcode::F64X2Mul:
case Opcode::F64X2Ne:
case Opcode::F64X2PMax:
case Opcode::F64X2PMin:
case Opcode::F64X2Sub:
case Opcode::I16X8Add:
case Opcode::I16X8AddSaturateS:
case Opcode::I16X8AddSaturateU:
case Opcode::I16X8AddSatS:
case Opcode::I16X8AddSatU:
case Opcode::I16X8AvgrU:
case Opcode::I16X8Eq:
case Opcode::I16X8GeS:
Expand All @@ -297,8 +309,8 @@ Instr Istream::Read(Offset* offset) const {
case Opcode::I16X8ShrS:
case Opcode::I16X8ShrU:
case Opcode::I16X8Sub:
case Opcode::I16X8SubSaturateS:
case Opcode::I16X8SubSaturateU:
case Opcode::I16X8SubSatS:
case Opcode::I16X8SubSatU:
case Opcode::I32Add:
case Opcode::I32And:
case Opcode::I32DivS:
Expand Down Expand Up @@ -376,8 +388,8 @@ Instr Istream::Read(Offset* offset) const {
case Opcode::I64X2Mul:
case Opcode::I64Xor:
case Opcode::I8X16Add:
case Opcode::I8X16AddSaturateS:
case Opcode::I8X16AddSaturateU:
case Opcode::I8X16AddSatS:
case Opcode::I8X16AddSatU:
case Opcode::I8X16AvgrU:
case Opcode::I8X16Eq:
case Opcode::I8X16GeS:
Expand All @@ -399,14 +411,14 @@ Instr Istream::Read(Offset* offset) const {
case Opcode::I8X16ShrS:
case Opcode::I8X16ShrU:
case Opcode::I8X16Sub:
case Opcode::I8X16SubSaturateS:
case Opcode::I8X16SubSaturateU:
case Opcode::I8X16SubSatS:
case Opcode::I8X16SubSatU:
case Opcode::V128And:
case Opcode::V128Andnot:
case Opcode::V128BitSelect:
case Opcode::V128Or:
case Opcode::V128Xor:
case Opcode::V8X16Swizzle:
case Opcode::I8X16Swizzle:
// 0 immediates, 2 operands
instr.kind = InstrKind::Imm_0_Op_2;
break;
Expand Down Expand Up @@ -493,9 +505,9 @@ Instr Istream::Read(Offset* offset) const {

case Opcode::F32Load:
case Opcode::F64Load:
case Opcode::I16X8Load8X8S:
case Opcode::I16X8Load8X8U:
case Opcode::V16X8LoadSplat:
case Opcode::V128Load8X8S:
case Opcode::V128Load8X8U:
case Opcode::V128Load16Splat:
case Opcode::I32AtomicLoad:
case Opcode::I32AtomicLoad16U:
case Opcode::I32AtomicLoad8U:
Expand All @@ -504,9 +516,9 @@ Instr Istream::Read(Offset* offset) const {
case Opcode::I32Load16U:
case Opcode::I32Load8S:
case Opcode::I32Load8U:
case Opcode::I32X4Load16X4S:
case Opcode::I32X4Load16X4U:
case Opcode::V32X4LoadSplat:
case Opcode::V128Load16X4S:
case Opcode::V128Load16X4U:
case Opcode::V128Load32Splat:
case Opcode::I64AtomicLoad:
case Opcode::I64AtomicLoad16U:
case Opcode::I64AtomicLoad32U:
Expand All @@ -518,10 +530,10 @@ Instr Istream::Read(Offset* offset) const {
case Opcode::I64Load32U:
case Opcode::I64Load8S:
case Opcode::I64Load8U:
case Opcode::I64X2Load32X2S:
case Opcode::I64X2Load32X2U:
case Opcode::V64X2LoadSplat:
case Opcode::V8X16LoadSplat:
case Opcode::V128Load32X2S:
case Opcode::V128Load32X2U:
case Opcode::V128Load64Splat:
case Opcode::V128Load8Splat:
case Opcode::V128Load:
// Index + memory offset immediates, 1 operand.
instr.kind = InstrKind::Imm_Index_Offset_Op_1;
Expand Down Expand Up @@ -673,7 +685,7 @@ Instr Istream::Read(Offset* offset) const {
instr.imm_v128 = ReadAt<v128>(offset);
break;

case Opcode::V8X16Shuffle:
case Opcode::I8X16Shuffle:
// v128 immediate, 2 operands.
instr.kind = InstrKind::Imm_V128_Op_2;
instr.imm_v128 = ReadAt<v128>(offset);
Expand Down
2 changes: 1 addition & 1 deletion src/interp/istream.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ enum class InstrKind {
Imm_I8_Op_1, // i32x4.extract_lane
Imm_I8_Op_2, // i32x4.replace_lane
Imm_V128_Op_0, // v128.const
Imm_V128_Op_2, // v8x16.shuffle
Imm_V128_Op_2, // i8x16.shuffle
};

struct Instr {
Expand Down
Loading