Skip to content

Commit

Permalink
cranelift-interpreter: Fix SIMD shifts and rotates (#6939)
Browse files Browse the repository at this point in the history
* cranelift-interpreter: Fix SIMD `ishl`/`{s,u}`shr

* fuzzgen: Enable a few more ops

* cranelift: Fix tests for {u,s}shr

* fuzzgen: Change pattern matching arms for shifts

Co-Authored-By: Jamey Sharp <jsharp@fastly.com>

---------

Co-authored-by: Jamey Sharp <jsharp@fastly.com>
  • Loading branch information
afonso360 and jameysharp authored Aug 31, 2023
1 parent 326837d commit 6a5bddf
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 129 deletions.
1 change: 1 addition & 0 deletions cranelift/filetests/filetests/runtests/simd-ishl.clif
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
test interpret
test run
target aarch64
target s390x
Expand Down
3 changes: 2 additions & 1 deletion cranelift/filetests/filetests/runtests/simd-sshr.clif
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
test interpret
test run
target aarch64
target s390x
Expand Down Expand Up @@ -70,7 +71,7 @@ block0(v0: i8x16):
v2 = sshr v0, v1
return v2
}
; run: %i8x16_shl_const([0x01 0x02 0x04 0x08 0x10 0x20 0x40 0x80 0 0 0 0 0 0 0 0]) == [0 0 0x01 0x02 0x04 0x08 0x10 0xe0 0 0 0 0 0 0 0 0]
; run: %i8x16_sshr_const([0x01 0x02 0x04 0x08 0x10 0x20 0x40 0x80 0 0 0 0 0 0 0 0]) == [0 0 0x01 0x02 0x04 0x08 0x10 0xe0 0 0 0 0 0 0 0 0]

function %i16x8_sshr_const(i16x8) -> i16x8 {
block0(v0: i16x8):
Expand Down
3 changes: 2 additions & 1 deletion cranelift/filetests/filetests/runtests/simd-ushr.clif
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
test interpret
test run
target aarch64
target s390x
Expand Down Expand Up @@ -54,7 +55,7 @@ block0(v0: i8x16):
v2 = ushr v0, v1
return v2
}
; run: %i8x16_shl_const([0x01 0x02 0x04 0x08 0x10 0x20 0x40 0x80 0 0 0 0 0 0 0 0]) == [0 0 0x01 0x02 0x04 0x08 0x10 0x20 0 0 0 0 0 0 0 0]
; run: %i8x16_ushr_const([0x01 0x02 0x04 0x08 0x10 0x20 0x40 0x80 0 0 0 0 0 0 0 0]) == [0 0 0x01 0x02 0x04 0x08 0x10 0x20 0 0 0 0 0 0 0 0]

function %i16x8_ushr_const(i16x8) -> i16x8 {
block0(v0: i16x8):
Expand Down
137 changes: 27 additions & 110 deletions cranelift/fuzzgen/src/function_generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,8 @@ fn valid_for_target(triple: &Triple, op: Opcode, args: &[Type], rets: &[Type]) -
(Opcode::Cls, &[I32], &[I32]),
(Opcode::Cls, &[I64], &[I64]),
(Opcode::Cls, &[I128], &[I128]),
// TODO
(Opcode::Bitselect, &[_, _, _], &[F32 | F64]),
// https://github.com/bytecodealliance/wasmtime/issues/4897
// https://github.com/bytecodealliance/wasmtime/issues/4899
(
Expand Down Expand Up @@ -589,6 +591,15 @@ fn valid_for_target(triple: &Triple, op: Opcode, args: &[Type], rets: &[Type]) -
&[],
&[I8X16 | I16X8 | I32X4 | I64X2 | F32X4 | F64X2]
),
// TODO
(
Opcode::Sshr | Opcode::Ushr | Opcode::Ishl,
&[I8X16 | I16X8 | I32X4 | I64X2, I128]
),
(
Opcode::Rotr | Opcode::Rotl,
&[I8X16 | I16X8 | I32X4 | I64X2, _]
),
)
}

Expand Down Expand Up @@ -647,6 +658,17 @@ fn valid_for_target(triple: &Triple, op: Opcode, args: &[Type], rets: &[Type]) -
// https://github.com/bytecodealliance/wasmtime/issues/6104
(Opcode::Bitcast, &[I128], &[_]),
(Opcode::Bitcast, &[_], &[I128]),
// TODO
(
Opcode::Sshr | Opcode::Ushr | Opcode::Ishl,
&[I8X16 | I16X8 | I32X4 | I64X2, I128]
),
(
Opcode::Rotr | Opcode::Rotl,
&[I8X16 | I16X8 | I32X4 | I64X2, _]
),
// TODO
(Opcode::Bitselect, &[_, _, _], &[F32 | F64]),
)
}

Expand Down Expand Up @@ -689,6 +711,8 @@ fn valid_for_target(triple: &Triple, op: Opcode, args: &[Type], rets: &[Type]) -
// https://github.com/bytecodealliance/wasmtime/issues/6104
(Opcode::Bitcast, &[I128], &[_]),
(Opcode::Bitcast, &[_], &[I128]),
// TODO
(Opcode::Bitselect, &[_, _, _], &[F32 | F64]),
)
}

Expand Down Expand Up @@ -737,8 +761,9 @@ fn valid_for_target(triple: &Triple, op: Opcode, args: &[Type], rets: &[Type]) -
(Opcode::Bitcast, &[I128], &[_]),
(Opcode::Bitcast, &[_], &[I128]),
// TODO
(Opcode::SelectSpectreGuard, &[_, _, _], &[F32]),
(Opcode::SelectSpectreGuard, &[_, _, _], &[F64]),
(Opcode::SelectSpectreGuard, &[_, _, _], &[F32 | F64]),
// TODO
(Opcode::Bitselect, &[_, _, _], &[F32 | F64]),
)
}

Expand Down Expand Up @@ -896,10 +921,6 @@ static OPCODE_SIGNATURES: Lazy<Vec<OpcodeSignature>> = Lazy::new(|| {
(Opcode::ScalarToVector),
(Opcode::X86Pmaddubsw),
(Opcode::X86Cvtt2dq),
(Opcode::Bitselect, &[F32, F32, F32], &[F32]),
(Opcode::Bitselect, &[F64, F64, F64], &[F64]),
(Opcode::Bitselect, &[F32X4, F32X4, F32X4], &[F32X4]),
(Opcode::Bitselect, &[F64X2, F64X2, F64X2], &[F64X2]),
(Opcode::VanyTrue, &[F32X4], &[I8]),
(Opcode::VanyTrue, &[F64X2], &[I8]),
(Opcode::VhighBits, &[F32X4], &[I8]),
Expand Down Expand Up @@ -952,10 +973,6 @@ static OPCODE_SIGNATURES: Lazy<Vec<OpcodeSignature>> = Lazy::new(|| {
(Opcode::VhighBits, &[I64X2], &[I64X2]),
(Opcode::VhighBits, &[F32X4], &[I64X2]),
(Opcode::VhighBits, &[F64X2], &[I64X2]),
(Opcode::Ineg, &[I8X16], &[I8X16]),
(Opcode::Ineg, &[I16X8], &[I16X8]),
(Opcode::Ineg, &[I32X4], &[I32X4]),
(Opcode::Ineg, &[I64X2], &[I64X2]),
(Opcode::Umulhi, &[I128, I128], &[I128]),
(Opcode::Smulhi, &[I128, I128], &[I128]),
// https://github.com/bytecodealliance/wasmtime/issues/6073
Expand All @@ -966,106 +983,6 @@ static OPCODE_SIGNATURES: Lazy<Vec<OpcodeSignature>> = Lazy::new(|| {
(Opcode::Isplit, &[I64], &[I32, I32]),
(Opcode::Isplit, &[I32], &[I16, I16]),
(Opcode::Isplit, &[I16], &[I8, I8]),
(Opcode::Rotl, &[I8X16, I8], &[I8X16]),
(Opcode::Rotl, &[I8X16, I16], &[I8X16]),
(Opcode::Rotl, &[I8X16, I32], &[I8X16]),
(Opcode::Rotl, &[I8X16, I64], &[I8X16]),
(Opcode::Rotl, &[I8X16, I128], &[I8X16]),
(Opcode::Rotl, &[I16X8, I8], &[I16X8]),
(Opcode::Rotl, &[I16X8, I16], &[I16X8]),
(Opcode::Rotl, &[I16X8, I32], &[I16X8]),
(Opcode::Rotl, &[I16X8, I64], &[I16X8]),
(Opcode::Rotl, &[I16X8, I128], &[I16X8]),
(Opcode::Rotl, &[I32X4, I8], &[I32X4]),
(Opcode::Rotl, &[I32X4, I16], &[I32X4]),
(Opcode::Rotl, &[I32X4, I32], &[I32X4]),
(Opcode::Rotl, &[I32X4, I64], &[I32X4]),
(Opcode::Rotl, &[I32X4, I128], &[I32X4]),
(Opcode::Rotl, &[I64X2, I8], &[I64X2]),
(Opcode::Rotl, &[I64X2, I16], &[I64X2]),
(Opcode::Rotl, &[I64X2, I32], &[I64X2]),
(Opcode::Rotl, &[I64X2, I64], &[I64X2]),
(Opcode::Rotl, &[I64X2, I128], &[I64X2]),
(Opcode::Rotr, &[I8X16, I8], &[I8X16]),
(Opcode::Rotr, &[I8X16, I16], &[I8X16]),
(Opcode::Rotr, &[I8X16, I32], &[I8X16]),
(Opcode::Rotr, &[I8X16, I64], &[I8X16]),
(Opcode::Rotr, &[I8X16, I128], &[I8X16]),
(Opcode::Rotr, &[I16X8, I8], &[I16X8]),
(Opcode::Rotr, &[I16X8, I16], &[I16X8]),
(Opcode::Rotr, &[I16X8, I32], &[I16X8]),
(Opcode::Rotr, &[I16X8, I64], &[I16X8]),
(Opcode::Rotr, &[I16X8, I128], &[I16X8]),
(Opcode::Rotr, &[I32X4, I8], &[I32X4]),
(Opcode::Rotr, &[I32X4, I16], &[I32X4]),
(Opcode::Rotr, &[I32X4, I32], &[I32X4]),
(Opcode::Rotr, &[I32X4, I64], &[I32X4]),
(Opcode::Rotr, &[I32X4, I128], &[I32X4]),
(Opcode::Rotr, &[I64X2, I8], &[I64X2]),
(Opcode::Rotr, &[I64X2, I16], &[I64X2]),
(Opcode::Rotr, &[I64X2, I32], &[I64X2]),
(Opcode::Rotr, &[I64X2, I64], &[I64X2]),
(Opcode::Rotr, &[I64X2, I128], &[I64X2]),
(Opcode::Ishl, &[I8X16, I8], &[I8X16]),
(Opcode::Ishl, &[I8X16, I16], &[I8X16]),
(Opcode::Ishl, &[I8X16, I32], &[I8X16]),
(Opcode::Ishl, &[I8X16, I64], &[I8X16]),
(Opcode::Ishl, &[I8X16, I128], &[I8X16]),
(Opcode::Ishl, &[I16X8, I8], &[I16X8]),
(Opcode::Ishl, &[I16X8, I16], &[I16X8]),
(Opcode::Ishl, &[I16X8, I32], &[I16X8]),
(Opcode::Ishl, &[I16X8, I64], &[I16X8]),
(Opcode::Ishl, &[I16X8, I128], &[I16X8]),
(Opcode::Ishl, &[I32X4, I8], &[I32X4]),
(Opcode::Ishl, &[I32X4, I16], &[I32X4]),
(Opcode::Ishl, &[I32X4, I32], &[I32X4]),
(Opcode::Ishl, &[I32X4, I64], &[I32X4]),
(Opcode::Ishl, &[I32X4, I128], &[I32X4]),
(Opcode::Ishl, &[I64X2, I8], &[I64X2]),
(Opcode::Ishl, &[I64X2, I16], &[I64X2]),
(Opcode::Ishl, &[I64X2, I32], &[I64X2]),
(Opcode::Ishl, &[I64X2, I64], &[I64X2]),
(Opcode::Ishl, &[I64X2, I128], &[I64X2]),
(Opcode::Ushr, &[I8X16, I8], &[I8X16]),
(Opcode::Ushr, &[I8X16, I16], &[I8X16]),
(Opcode::Ushr, &[I8X16, I32], &[I8X16]),
(Opcode::Ushr, &[I8X16, I64], &[I8X16]),
(Opcode::Ushr, &[I8X16, I128], &[I8X16]),
(Opcode::Ushr, &[I16X8, I8], &[I16X8]),
(Opcode::Ushr, &[I16X8, I16], &[I16X8]),
(Opcode::Ushr, &[I16X8, I32], &[I16X8]),
(Opcode::Ushr, &[I16X8, I64], &[I16X8]),
(Opcode::Ushr, &[I16X8, I128], &[I16X8]),
(Opcode::Ushr, &[I32X4, I8], &[I32X4]),
(Opcode::Ushr, &[I32X4, I16], &[I32X4]),
(Opcode::Ushr, &[I32X4, I32], &[I32X4]),
(Opcode::Ushr, &[I32X4, I64], &[I32X4]),
(Opcode::Ushr, &[I32X4, I128], &[I32X4]),
(Opcode::Ushr, &[I64X2, I8], &[I64X2]),
(Opcode::Ushr, &[I64X2, I16], &[I64X2]),
(Opcode::Ushr, &[I64X2, I32], &[I64X2]),
(Opcode::Ushr, &[I64X2, I64], &[I64X2]),
(Opcode::Ushr, &[I64X2, I128], &[I64X2]),
(Opcode::Sshr, &[I8X16, I8], &[I8X16]),
(Opcode::Sshr, &[I8X16, I16], &[I8X16]),
(Opcode::Sshr, &[I8X16, I32], &[I8X16]),
(Opcode::Sshr, &[I8X16, I64], &[I8X16]),
(Opcode::Sshr, &[I8X16, I128], &[I8X16]),
(Opcode::Sshr, &[I16X8, I8], &[I16X8]),
(Opcode::Sshr, &[I16X8, I16], &[I16X8]),
(Opcode::Sshr, &[I16X8, I32], &[I16X8]),
(Opcode::Sshr, &[I16X8, I64], &[I16X8]),
(Opcode::Sshr, &[I16X8, I128], &[I16X8]),
(Opcode::Sshr, &[I32X4, I8], &[I32X4]),
(Opcode::Sshr, &[I32X4, I16], &[I32X4]),
(Opcode::Sshr, &[I32X4, I32], &[I32X4]),
(Opcode::Sshr, &[I32X4, I64], &[I32X4]),
(Opcode::Sshr, &[I32X4, I128], &[I32X4]),
(Opcode::Sshr, &[I64X2, I8], &[I64X2]),
(Opcode::Sshr, &[I64X2, I16], &[I64X2]),
(Opcode::Sshr, &[I64X2, I32], &[I64X2]),
(Opcode::Sshr, &[I64X2, I64], &[I64X2]),
(Opcode::Sshr, &[I64X2, I128], &[I64X2]),
(Opcode::Fmin, &[F32X4, F32X4], &[F32X4]),
(Opcode::Fmin, &[F64X2, F64X2], &[F64X2]),
(Opcode::Fmax, &[F32X4, F32X4], &[F32X4]),
Expand Down
42 changes: 25 additions & 17 deletions cranelift/interpreter/src/step.rs
Original file line number Diff line number Diff line change
Expand Up @@ -795,16 +795,16 @@ where
Opcode::BandImm => binary(DataValueExt::and, arg(0), imm_as_ctrl_ty()?)?,
Opcode::BorImm => binary(DataValueExt::or, arg(0), imm_as_ctrl_ty()?)?,
Opcode::BxorImm => binary(DataValueExt::xor, arg(0), imm_as_ctrl_ty()?)?,
Opcode::Rotl => binary(DataValueExt::rotl, arg(0), arg(1))?,
Opcode::Rotr => binary(DataValueExt::rotr, arg(0), arg(1))?,
Opcode::RotlImm => binary(DataValueExt::rotl, arg(0), imm_as_ctrl_ty()?)?,
Opcode::RotrImm => binary(DataValueExt::rotr, arg(0), imm_as_ctrl_ty()?)?,
Opcode::Ishl => binary(DataValueExt::shl, arg(0), arg(1))?,
Opcode::Ushr => binary(DataValueExt::ushr, arg(0), arg(1))?,
Opcode::Sshr => binary(DataValueExt::sshr, arg(0), arg(1))?,
Opcode::IshlImm => binary(DataValueExt::shl, arg(0), imm_as_ctrl_ty()?)?,
Opcode::UshrImm => binary(DataValueExt::ushr, arg(0), imm_as_ctrl_ty()?)?,
Opcode::SshrImm => binary(DataValueExt::sshr, arg(0), imm_as_ctrl_ty()?)?,
Opcode::Rotl => binary(DataValueExt::rotl, arg(0), shift_amt(ctrl_ty, arg(1))?)?,
Opcode::Rotr => binary(DataValueExt::rotr, arg(0), shift_amt(ctrl_ty, arg(1))?)?,
Opcode::RotlImm => binary(DataValueExt::rotl, arg(0), shift_amt(ctrl_ty, imm())?)?,
Opcode::RotrImm => binary(DataValueExt::rotr, arg(0), shift_amt(ctrl_ty, imm())?)?,
Opcode::Ishl => binary(DataValueExt::shl, arg(0), shift_amt(ctrl_ty, arg(1))?)?,
Opcode::Ushr => binary(DataValueExt::ushr, arg(0), shift_amt(ctrl_ty, arg(1))?)?,
Opcode::Sshr => binary(DataValueExt::sshr, arg(0), shift_amt(ctrl_ty, arg(1))?)?,
Opcode::IshlImm => binary(DataValueExt::shl, arg(0), shift_amt(ctrl_ty, imm())?)?,
Opcode::UshrImm => binary(DataValueExt::ushr, arg(0), shift_amt(ctrl_ty, imm())?)?,
Opcode::SshrImm => binary(DataValueExt::sshr, arg(0), shift_amt(ctrl_ty, imm())?)?,
Opcode::Bitrev => unary(DataValueExt::reverse_bits, arg(0))?,
Opcode::Bswap => unary(DataValueExt::swap_bytes, arg(0))?,
Opcode::Clz => unary(DataValueExt::leading_zeros, arg(0))?,
Expand Down Expand Up @@ -994,13 +994,7 @@ where
}
assign(DataValueExt::vector(new, types::I8X16)?)
}
Opcode::Splat => {
let mut new_vector = SimdVec::new();
for _ in 0..ctrl_ty.lane_count() {
new_vector.push(arg(0));
}
assign(vectorizelanes(&new_vector, ctrl_ty)?)
}
Opcode::Splat => assign(splat(ctrl_ty, arg(0))?),
Opcode::Insertlane => {
let idx = imm().into_int_unsigned()? as usize;
let mut vector = extractlanes(&arg(0), ctrl_ty)?;
Expand Down Expand Up @@ -1575,3 +1569,17 @@ fn bitselect(c: DataValue, x: DataValue, y: DataValue) -> ValueResult<DataValue>
let mask_y = DataValueExt::and(DataValueExt::not(c)?, y)?;
DataValueExt::or(mask_x, mask_y)
}

fn splat(ty: Type, val: DataValue) -> ValueResult<DataValue> {
let mut new_vector = SimdVec::new();
for _ in 0..ty.lane_count() {
new_vector.push(val.clone());
}
vectorizelanes(&new_vector, ty)
}

// Prepares the shift amount for a shift/rotate operation.
// The shift amount must be the same type and have the same number of lanes as the vector.
fn shift_amt(ty: Type, val: DataValue) -> ValueResult<DataValue> {
splat(ty, val.convert(ValueConversionKind::Exact(ty.lane_type()))?)
}

0 comments on commit 6a5bddf

Please sign in to comment.