Skip to content

Commit

Permalink
[interpreter] Implement SIMD extended multiply instructions (WebAssem…
Browse files Browse the repository at this point in the history
…bly#438)

These were accepted into the proposal in WebAssembly#376.

There are 12 instructions in total:

- i16x8.extmul_{low,high}_i8x16_{s,u}
- i32x4.extmul_{low,high}_i16x8_{s,u}
- i64x2.extmul_{low,high}_i32x4_{s,u}

The implementation is straightforward, widen (using existing
operations), then a multiply with the wider shape.

The binary opcodes are not decided yet, they currently follow the ones
used in V8, when those are finalized, we can change it to match.

Added a test generation script that reuses some logic in the generator
for arithmetic instructions. Since these instructions have different
src and dst shapes, I tweaked the base class to allow for having
different shapes.
  • Loading branch information
ngzhian authored Feb 3, 2021
1 parent 98915d5 commit 7c37165
Show file tree
Hide file tree
Showing 15 changed files with 1,435 additions and 18 deletions.
12 changes: 12 additions & 0 deletions interpreter/binary/decode.ml
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,11 @@ let simd_prefix s =
| 0x97l -> i16x8_min_u
| 0x98l -> i16x8_max_s
| 0x99l -> i16x8_max_u
| 0x9al -> i16x8_extmul_low_i8x16_s
| 0x9bl -> i16x8_avgr_u
| 0x9dl -> i16x8_extmul_high_i8x16_s
| 0x9el -> i16x8_extmul_low_i8x16_u
| 0x9fl -> i16x8_extmul_high_i8x16_u
| 0xa0l -> i32x4_abs
| 0xa1l -> i32x4_neg
| 0xa3l -> i32x4_all_true
Expand All @@ -385,6 +389,10 @@ let simd_prefix s =
| 0xb8l -> i32x4_max_s
| 0xb9l -> i32x4_max_u
| 0xbal -> i32x4_dot_i16x8_s
| 0xbbl -> i32x4_extmul_low_i16x8_s
| 0xbdl -> i32x4_extmul_high_i16x8_s
| 0xbel -> i32x4_extmul_low_i16x8_u
| 0xbfl -> i32x4_extmul_high_i16x8_u
| 0xc0l -> i64x2_eq
| 0xc1l -> i64x2_neg
| 0xcbl -> i64x2_shl
Expand All @@ -393,6 +401,10 @@ let simd_prefix s =
| 0xcel -> i64x2_add
| 0xd0l -> i64x2_ne
| 0xd1l -> i64x2_sub
| 0xd2l -> i64x2_extmul_low_i32x4_s
| 0xd3l -> i64x2_extmul_high_i32x4_s
| 0xd6l -> i64x2_extmul_low_i32x4_u
| 0xd7l -> i64x2_extmul_high_i32x4_u
| 0xd5l -> i64x2_mul
| 0xd8l -> f32x4_ceil
| 0xd9l -> f32x4_floor
Expand Down
12 changes: 12 additions & 0 deletions interpreter/binary/encode.ml
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,10 @@ let encode m =
| Binary (V128 V128Op.(I16x8 MaxS)) -> simd_op 0x98l
| Binary (V128 V128Op.(I16x8 MaxU)) -> simd_op 0x99l
| Binary (V128 V128Op.(I16x8 AvgrU)) -> simd_op 0x9bl
| Binary (V128 V128Op.(I16x8 ExtMulLowS)) -> simd_op 0x9al
| Binary (V128 V128Op.(I16x8 ExtMulHighS)) -> simd_op 0x9dl
| Binary (V128 V128Op.(I16x8 ExtMulLowU)) -> simd_op 0x9el
| Binary (V128 V128Op.(I16x8 ExtMulHighU)) -> simd_op 0x9fl
| Binary (V128 V128Op.(I32x4 Add)) -> simd_op 0xael
| Binary (V128 V128Op.(I32x4 Sub)) -> simd_op 0xb1l
| Binary (V128 V128Op.(I32x4 MinS)) -> simd_op 0xb6l
Expand All @@ -485,11 +489,19 @@ let encode m =
| Binary (V128 V128Op.(I32x4 LeU)) -> simd_op 0x3el
| Binary (V128 V128Op.(I32x4 GeS)) -> simd_op 0x3fl
| Binary (V128 V128Op.(I32x4 GeU)) -> simd_op 0x40l
| Binary (V128 V128Op.(I32x4 ExtMulLowS)) -> simd_op 0xbbl
| Binary (V128 V128Op.(I32x4 ExtMulHighS)) -> simd_op 0xbdl
| Binary (V128 V128Op.(I32x4 ExtMulLowU)) -> simd_op 0xbel
| Binary (V128 V128Op.(I32x4 ExtMulHighU)) -> simd_op 0xbfl
| Binary (V128 V128Op.(I64x2 Add)) -> simd_op 0xcel
| Binary (V128 V128Op.(I64x2 Sub)) -> simd_op 0xd1l
| Binary (V128 V128Op.(I64x2 Mul)) -> simd_op 0xd5l
| Binary (V128 V128Op.(I64x2 Eq)) -> simd_op 0xc0l
| Binary (V128 V128Op.(I64x2 Ne)) -> simd_op 0xd0l
| Binary (V128 V128Op.(I64x2 ExtMulLowS)) -> simd_op 0xd2l
| Binary (V128 V128Op.(I64x2 ExtMulHighS)) -> simd_op 0xd3l
| Binary (V128 V128Op.(I64x2 ExtMulLowU)) -> simd_op 0xd6l
| Binary (V128 V128Op.(I64x2 ExtMulHighU)) -> simd_op 0xd7l
| Binary (V128 V128Op.(F32x4 Eq)) -> simd_op 0x41l
| Binary (V128 V128Op.(F32x4 Ne)) -> simd_op 0x42l
| Binary (V128 V128Op.(F32x4 Lt)) -> simd_op 0x43l
Expand Down
12 changes: 12 additions & 0 deletions interpreter/exec/eval_simd.ml
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ module SimdOp (SXX : Simd.S) (Value : ValueType with type t = SXX.t) = struct
| I16x8 MaxS -> SXX.I16x8.max_s
| I16x8 MaxU -> SXX.I16x8.max_u
| I16x8 AvgrU -> SXX.I16x8.avgr_u
| I16x8 ExtMulLowS -> SXX.I16x8_convert.extmul_low_s
| I16x8 ExtMulHighS -> SXX.I16x8_convert.extmul_high_s
| I16x8 ExtMulLowU -> SXX.I16x8_convert.extmul_low_u
| I16x8 ExtMulHighU -> SXX.I16x8_convert.extmul_high_u
| I32x4 Add -> SXX.I32x4.add
| I32x4 Sub -> SXX.I32x4.sub
| I32x4 MinS -> SXX.I32x4.min_s
Expand All @@ -121,9 +125,17 @@ module SimdOp (SXX : Simd.S) (Value : ValueType with type t = SXX.t) = struct
| I32x4 DotI16x8S -> SXX.I32x4_convert.dot_i16x8_s
| I64x2 Eq -> SXX.I64x2.eq
| I64x2 Ne -> SXX.I64x2.ne
| I32x4 ExtMulLowS -> SXX.I32x4_convert.extmul_low_s
| I32x4 ExtMulHighS -> SXX.I32x4_convert.extmul_high_s
| I32x4 ExtMulLowU -> SXX.I32x4_convert.extmul_low_u
| I32x4 ExtMulHighU -> SXX.I32x4_convert.extmul_high_u
| I64x2 Add -> SXX.I64x2.add
| I64x2 Sub -> SXX.I64x2.sub
| I64x2 Mul -> SXX.I64x2.mul
| I64x2 ExtMulLowS -> SXX.I64x2_convert.extmul_low_s
| I64x2 ExtMulHighS -> SXX.I64x2_convert.extmul_high_s
| I64x2 ExtMulLowU -> SXX.I64x2_convert.extmul_low_u
| I64x2 ExtMulHighU -> SXX.I64x2_convert.extmul_high_u
| F32x4 Eq -> SXX.F32x4.eq
| F32x4 Ne -> SXX.F32x4.ne
| F32x4 Lt -> SXX.F32x4.lt
Expand Down
38 changes: 34 additions & 4 deletions interpreter/exec/simd.ml
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,10 @@ sig
val widen_high_s : t -> t
val widen_low_u : t -> t
val widen_high_u : t -> t
val extmul_low_s : t -> t -> t
val extmul_high_s : t -> t -> t
val extmul_low_u : t -> t -> t
val extmul_high_u : t -> t -> t
end
module I32x4_convert : sig
val trunc_sat_f32x4_s : t -> t
Expand All @@ -186,10 +190,20 @@ sig
val widen_low_u : t -> t
val widen_high_u : t -> t
val dot_i16x8_s : t -> t -> t
val extmul_low_s : t -> t -> t
val extmul_high_s : t -> t -> t
val extmul_low_u : t -> t -> t
val extmul_high_u : t -> t -> t
end
module I64x2_convert : sig
val widen_low_s : t -> t
val widen_high_s : t -> t
val widen_low_u : t -> t
val widen_high_u : t -> t
val extmul_low_s : t -> t -> t
val extmul_high_s : t -> t -> t
val extmul_low_u : t -> t -> t
val extmul_high_u : t -> t -> t
end
module F32x4_convert : sig
val convert_i32x4_s : t -> t
Expand Down Expand Up @@ -417,6 +431,10 @@ struct
let widen_low_u = widen Lib.List.take 0xffl
let widen_high_u = widen Lib.List.drop 0xffl

let extmul_low_s x y = I16x8.mul (widen_low_s x) (widen_low_s y)
let extmul_high_s x y = I16x8.mul (widen_high_s x) (widen_high_s y)
let extmul_low_u x y = I16x8.mul (widen_low_u x) (widen_low_u y)
let extmul_high_u x y = I16x8.mul (widen_high_u x) (widen_high_u y)
end

module I32x4_convert = struct
Expand All @@ -441,16 +459,28 @@ struct
| [], [] -> []
| _, _ -> assert false
in Rep.of_i32x4 (dot xs ys)

let extmul_low_s x y = I32x4.mul (widen_low_s x) (widen_low_s y)
let extmul_high_s x y = I32x4.mul (widen_high_s x) (widen_high_s y)
let extmul_low_u x y = I32x4.mul (widen_low_u x) (widen_low_u y)
let extmul_high_u x y = I32x4.mul (widen_high_u x) (widen_high_u y)
end

module I64x2_convert = struct
let widen mask x =
let widen take_or_drop mask x =
Rep.of_i64x2
(List.map
(fun i32 -> Int64.(logand mask (of_int32 i32)))
(Lib.List.take 2 (Rep.to_i32x4 x)))
let widen_low_s = widen 0xffffffffffffffffL
let widen_low_u = widen 0xffffffffL
(take_or_drop 2 (Rep.to_i32x4 x)))
let widen_low_s = widen Lib.List.take 0xffffffffffffffffL
let widen_high_s = widen Lib.List.drop 0xffffffffffffffffL
let widen_low_u = widen Lib.List.take 0xffffffffL
let widen_high_u = widen Lib.List.drop 0xffffffffL

let extmul_low_s x y = I64x2.mul (widen_low_s x) (widen_low_s y)
let extmul_high_s x y = I64x2.mul (widen_high_s x) (widen_high_s y)
let extmul_low_u x y = I64x2.mul (widen_low_u x) (widen_low_u y)
let extmul_high_u x y = I64x2.mul (widen_high_u x) (widen_high_u y)
end

module F32x4_convert = struct
Expand Down
1 change: 1 addition & 0 deletions interpreter/syntax/ast.ml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ struct
| Swizzle | Shuffle of int list | NarrowS | NarrowU
| AddSatS | AddSatU | SubSatS | SubSatU
| DotI16x8S
| ExtMulLowS | ExtMulHighS | ExtMulLowU | ExtMulHighU
type funop = Abs | Neg | Sqrt
| Ceil | Floor | Trunc | Nearest
| ConvertI32x4S | ConvertI32x4U
Expand Down
12 changes: 12 additions & 0 deletions interpreter/syntax/operators.ml
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,10 @@ let i16x8_min_u = Binary (V128 V128Op.(I16x8 MinU))
let i16x8_max_s = Binary (V128 V128Op.(I16x8 MaxS))
let i16x8_max_u = Binary (V128 V128Op.(I16x8 MaxU))
let i16x8_avgr_u = Binary (V128 V128Op.(I16x8 AvgrU))
let i16x8_extmul_low_i8x16_s = Binary (V128 V128Op.(I16x8 ExtMulLowS))
let i16x8_extmul_high_i8x16_s = Binary (V128 V128Op.(I16x8 ExtMulHighS))
let i16x8_extmul_low_i8x16_u = Binary (V128 V128Op.(I16x8 ExtMulLowU))
let i16x8_extmul_high_i8x16_u = Binary (V128 V128Op.(I16x8 ExtMulHighU))

let i32x4_splat = Convert (V128 V128Op.(I32x4 Splat))
let i32x4_extract_lane imm = SimdExtract (V128Op.I32x4 (ZX, imm))
Expand Down Expand Up @@ -375,6 +379,10 @@ let i32x4_mul = Binary (V128 V128Op.(I32x4 Mul))
let i32x4_trunc_sat_f32x4_s = Unary (V128 V128Op.(I32x4 TruncSatF32x4S))
let i32x4_trunc_sat_f32x4_u = Unary (V128 V128Op.(I32x4 TruncSatF32x4U))
let i32x4_dot_i16x8_s = Binary (V128 V128Op.(I32x4 DotI16x8S))
let i32x4_extmul_low_i16x8_s = Binary (V128 V128Op.(I32x4 ExtMulLowS))
let i32x4_extmul_high_i16x8_s = Binary (V128 V128Op.(I32x4 ExtMulHighS))
let i32x4_extmul_low_i16x8_u = Binary (V128 V128Op.(I32x4 ExtMulLowU))
let i32x4_extmul_high_i16x8_u = Binary (V128 V128Op.(I32x4 ExtMulHighU))

let i64x2_splat = Convert (V128 V128Op.(I64x2 Splat))
let i64x2_extract_lane imm = SimdExtract (V128Op.I64x2 (ZX, imm))
Expand All @@ -388,6 +396,10 @@ let i64x2_mul = Binary (V128 V128Op.(I64x2 Mul))
let i64x2_shl = SimdShift V128Op.(I64x2 Shl)
let i64x2_shr_s = SimdShift V128Op.(I64x2 ShrS)
let i64x2_shr_u = SimdShift V128Op.(I64x2 ShrU)
let i64x2_extmul_low_i32x4_s = Binary (V128 V128Op.(I64x2 ExtMulLowS))
let i64x2_extmul_high_i32x4_s = Binary (V128 V128Op.(I64x2 ExtMulHighS))
let i64x2_extmul_low_i32x4_u = Binary (V128 V128Op.(I64x2 ExtMulLowU))
let i64x2_extmul_high_i32x4_u = Binary (V128 V128Op.(I64x2 ExtMulHighU))

let f32x4_splat = Convert (V128 V128Op.(F32x4 Splat))
let f32x4_extract_lane imm = SimdExtract (V128Op.F32x4 (ZX, imm))
Expand Down
12 changes: 12 additions & 0 deletions interpreter/text/arrange.ml
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,10 @@ struct
| I16x8 MaxS -> "i16x8.max_s"
| I16x8 MaxU -> "i16x8.max_u"
| I16x8 AvgrU -> "i16x8.avgr_u"
| I16x8 ExtMulLowS -> "i16x8.extmul_low_i8x16_s"
| I16x8 ExtMulHighS -> "i16x8.extmul_high_i8x16_s"
| I16x8 ExtMulLowU -> "i16x8.extmul_low_i8x16_u"
| I16x8 ExtMulHighU -> "i16x8.extmul_high_i8x16_u"
| I32x4 Add -> "i32x4.add"
| I32x4 Sub -> "i32x4.sub"
| I32x4 Mul -> "i32x4.mul"
Expand All @@ -306,9 +310,17 @@ struct
| I32x4 MaxS -> "i32x4.max_s"
| I32x4 MaxU -> "i32x4.max_u"
| I32x4 DotI16x8S -> "i32x4.dot_i16x8_s"
| I32x4 ExtMulLowS -> "i32x4.extmul_low_i16x8_s"
| I32x4 ExtMulHighS -> "i32x4.extmul_high_i16x8_s"
| I32x4 ExtMulLowU -> "i32x4.extmul_low_i16x8_u"
| I32x4 ExtMulHighU -> "i32x4.extmul_high_i16x8_u"
| I64x2 Add -> "i64x2.add"
| I64x2 Sub -> "i64x2.sub"
| I64x2 Mul -> "i64x2.mul"
| I64x2 ExtMulLowS -> "i64x2.extmul_low_i32x4_s"
| I64x2 ExtMulHighS -> "i64x2.extmul_high_i32x4_s"
| I64x2 ExtMulLowU -> "i64x2.extmul_low_i32x4_u"
| I64x2 ExtMulHighU -> "i64x2.extmul_high_i32x4_u"
| F32x4 Eq -> "f32x4.eq"
| F32x4 Ne -> "f32x4.ne"
| F32x4 Lt -> "f32x4.lt"
Expand Down
13 changes: 13 additions & 0 deletions interpreter/text/lexer.mll
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,19 @@ rule token = parse
| "i32x4.dot_i16x8_s"
{ BINARY i32x4_dot_i16x8_s }

| "i16x8.extmul_low_i8x16_"(sign as s)
{ BINARY (ext s i16x8_extmul_low_i8x16_s i16x8_extmul_low_i8x16_u) }
| "i16x8.extmul_high_i8x16_"(sign as s)
{ BINARY (ext s i16x8_extmul_high_i8x16_s i16x8_extmul_high_i8x16_u) }
| "i32x4.extmul_low_i16x8_"(sign as s)
{ BINARY (ext s i32x4_extmul_low_i16x8_s i32x4_extmul_low_i16x8_u) }
| "i32x4.extmul_high_i16x8_"(sign as s)
{ BINARY (ext s i32x4_extmul_high_i16x8_s i32x4_extmul_high_i16x8_u) }
| "i64x2.extmul_low_i32x4_"(sign as s)
{ BINARY (ext s i64x2_extmul_low_i32x4_s i64x2_extmul_low_i32x4_u) }
| "i64x2.extmul_high_i32x4_"(sign as s)
{ BINARY (ext s i64x2_extmul_high_i32x4_s i64x2_extmul_high_i32x4_u) }

| (simd_shape as s) { SIMD_SHAPE (simd_shape s) }

| name as s { VAR s }
Expand Down
1 change: 1 addition & 0 deletions test/core/simd/meta/gen_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
'simd_f64x2_pmin_pmax',
'simd_i32x4_dot_i16x8',
'simd_load_lane',
'simd_ext_mul',
)


Expand Down
21 changes: 17 additions & 4 deletions test/core/simd/meta/simd_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,27 @@ def __str__(self):
def lane(self):
return self.LANE_VALUE.get(self.LANE_TYPE)

@property
def dst_lane(self):
return self.lane

@property
def src_lane(self):
# Used for arithmetic that extends the lane, e.g. i16x8 lanes, which
# are extended multiply to i32x4.
if hasattr(self, 'SRC_LANE_TYPE'):
return self.LANE_VALUE.get(self.SRC_LANE_TYPE)
else:
return self.lane

@property
def normal_unary_op_test_data(self):
lane = self.lane
lane = self.src_lane
return [0, 1, -1, lane.max - 1, lane.min + 1, lane.min, lane.max, lane.mask]

@property
def normal_binary_op_test_data(self):
lane = self.lane
lane = self.src_lane
return [
(0, 0),
(0, 1),
Expand Down Expand Up @@ -170,7 +183,7 @@ def get_case_data(self):
for data_group, v128_forms in self.bin_test_data:
for data in data_group:
case_data.append([op_name, [str(data[0]), str(data[1])],
str(o.binary_op(data[0], data[1], self.lane)),
str(o.binary_op(data[0], data[1], self.src_lane, self.dst_lane)),
v128_forms])
for data_group in self.full_bin_test_data:
for data in data_group.get(op_name):
Expand All @@ -183,7 +196,7 @@ def get_case_data(self):
for data_group, v128_forms in self.unary_test_data:
for data in data_group:
case_data.append([op_name, [str(data)],
str(o.unary_op(data, self.lane)),
str(o.unary_op(data, self.dst_lane)),
v128_forms])

return case_data
Expand Down
75 changes: 75 additions & 0 deletions test/core/simd/meta/simd_ext_mul.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/usr/bin/env python3

""" Base class for generating extended multiply instructions. These
instructions 2 inputs of the same (narrower) lane shape, multiplies
corresponding lanes with extension (no overflow/wraparound), producing 1 output
of a (wider) shape. These instructions can choose to work on the low or high
halves of the inputs, and perform signed or unsigned multiply.
Subclasses need to define 3 attributes:
- LANE_TYPE (this is the output shape)
- SRC_LANE_TYPE (this is the input (narrower) shape)
- BINARY_OPS (list of operations)
"""

from simd_arithmetic import SimdArithmeticCase


class SimdExtMulCase(SimdArithmeticCase):
UNARY_OPS = ()

@property
def full_bin_test_data(self):
return []

def get_combine_cases(self):
return ''

@property
def bin_test_data(self):
lane_forms = [self.SRC_LANE_TYPE, self.SRC_LANE_TYPE, self.LANE_TYPE]
return [(self.normal_binary_op_test_data, lane_forms)]

@property
def hex_binary_op_test_data(self):
return []

def gen_test_cases(self):
wast_filename = '../simd_{wide}_extmul_{narrow}.wast'.format(
wide=self.LANE_TYPE, narrow=self.SRC_LANE_TYPE)
with open(wast_filename, 'w') as fp:
fp.write(self.get_all_cases())


class SimdI16x8ExtMulCase(SimdExtMulCase):
LANE_TYPE = 'i16x8'
SRC_LANE_TYPE = 'i8x16'
BINARY_OPS = ('extmul_low_i8x16_s', 'extmul_high_i8x16_s',
'extmul_low_i8x16_u', 'extmul_high_i8x16_u')


class SimdI32x4ExtMulCase(SimdExtMulCase):
LANE_TYPE = 'i32x4'
SRC_LANE_TYPE = 'i16x8'
BINARY_OPS = ('extmul_low_i16x8_s', 'extmul_high_i16x8_s',
'extmul_low_i16x8_u', 'extmul_high_i16x8_u')


class SimdI64x2ExtMulCase(SimdExtMulCase):
LANE_TYPE = 'i64x2'
SRC_LANE_TYPE = 'i32x4'
BINARY_OPS = ('extmul_low_i32x4_s', 'extmul_high_i32x4_s',
'extmul_low_i32x4_u', 'extmul_high_i32x4_u')


def gen_test_cases():
simd_i16x8_ext_mul_case = SimdI16x8ExtMulCase()
simd_i16x8_ext_mul_case.gen_test_cases()
simd_i32x4_ext_mul_case = SimdI32x4ExtMulCase()
simd_i32x4_ext_mul_case.gen_test_cases()
simd_i64x2_ext_mul_case = SimdI64x2ExtMulCase()
simd_i64x2_ext_mul_case.gen_test_cases()


if __name__ == '__main__':
gen_test_cases()
Loading

0 comments on commit 7c37165

Please sign in to comment.