Skip to content

Commit

Permalink
Add extend-add-pairwise instructions x64
Browse files Browse the repository at this point in the history
  • Loading branch information
jlb6740 committed Jun 24, 2021
1 parent 5737558 commit 4c97443
Show file tree
Hide file tree
Showing 9 changed files with 135 additions and 10 deletions.
4 changes: 0 additions & 4 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,10 +192,8 @@ fn x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool {

match (testsuite, testname) {
("simd", "simd_conversions") => return true, // unknown operator or unexpected token: tests/spec_testsuite/proposals/simd/simd_conversions.wast:724:6
("simd", "simd_i16x8_extadd_pairwise_i8x16") => return true,
("simd", "simd_i16x8_extmul_i8x16") => return true,
("simd", "simd_i16x8_q15mulr_sat_s") => return true,
("simd", "simd_i32x4_extadd_pairwise_i16x8") => return true,
("simd", "simd_i32x4_extmul_i16x8") => return true,
("simd", "simd_i32x4_trunc_sat_f64x2") => return true,
("simd", "simd_i64x2_extmul_i32x4") => return true,
Expand Down Expand Up @@ -232,10 +230,8 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
// These are new instructions that are not really implemented in any backend.
("simd", "simd_i8x16_arith2")
| ("simd", "simd_conversions")
| ("simd", "simd_i16x8_extadd_pairwise_i8x16")
| ("simd", "simd_i16x8_extmul_i8x16")
| ("simd", "simd_i16x8_q15mulr_sat_s")
| ("simd", "simd_i32x4_extadd_pairwise_i16x8")
| ("simd", "simd_i32x4_extmul_i16x8")
| ("simd", "simd_i32x4_trunc_sat_f64x2")
| ("simd", "simd_i64x2_extmul_i32x4") => return true,
Expand Down
29 changes: 28 additions & 1 deletion cranelift/codegen/meta/src/shared/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4068,7 +4068,34 @@ pub(crate) fn define(
Inst::new(
"uwiden_high",
r#"
Widen the high lanes of `x` using unsigned extension.
Lane-wise integer extended pairwise addition producing extended results
(twice wider results than the input)
"#,
&formats.unary,
)
.operands_in(vec![x])
.operands_out(vec![a]),
);

ig.push(
Inst::new(
"extended_pairwise_add_signed",
r#"
Widen the high lanes of `x` using signed extension.
This will double the lane width and halve the number of lanes.
"#,
&formats.unary,
)
.operands_in(vec![x])
.operands_out(vec![a]),
);

ig.push(
Inst::new(
"extended_pairwise_add_unsigned",
r#"
Widen the high lanes of `x` extending with zeros.
This will double the lane width and halve the number of lanes.
"#,
Expand Down
2 changes: 2 additions & 0 deletions cranelift/codegen/src/isa/aarch64/lower_inst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3359,6 +3359,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"),
Opcode::FvpromoteLow => unimplemented!("FvpromoteLow"),
Opcode::Fvdemote => unimplemented!("Fvdemote"),
Opcode::ExtendedPairwiseAddSigned => unimplemented!("ExtendedPairwiseAddSigned"),
Opcode::ExtendedPairwiseAddUnsigned => unimplemented!("ExtendedPairwiseAddUnsigned"),
}

Ok(())
Expand Down
4 changes: 3 additions & 1 deletion cranelift/codegen/src/isa/s390x/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2550,7 +2550,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::UwidenHigh
| Opcode::WideningPairwiseDotProductS
| Opcode::FvpromoteLow
| Opcode::Fvdemote => {
| Opcode::Fvdemote
| Opcode::ExtendedPairwiseAddSigned
| Opcode::ExtendedPairwiseAddUnsigned => {
// TODO
panic!("Vector ops not implemented.");
}
Expand Down
3 changes: 3 additions & 0 deletions cranelift/codegen/src/isa/x64/inst/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,7 @@ pub enum SseOpcode {
Pinsrb,
Pinsrw,
Pinsrd,
Pmaddubsw,
Pmaddwd,
Pmaxsb,
Pmaxsw,
Expand Down Expand Up @@ -734,6 +735,7 @@ impl SseOpcode {
| SseOpcode::Pcmpgtd
| SseOpcode::Pextrw
| SseOpcode::Pinsrw
| SseOpcode::Pmaddubsw
| SseOpcode::Pmaddwd
| SseOpcode::Pmaxsw
| SseOpcode::Pmaxub
Expand Down Expand Up @@ -925,6 +927,7 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Pinsrb => "pinsrb",
SseOpcode::Pinsrw => "pinsrw",
SseOpcode::Pinsrd => "pinsrd",
SseOpcode::Pmaddubsw => "pmaddubsw",
SseOpcode::Pmaddwd => "pmaddwd",
SseOpcode::Pmaxsb => "pmaxsb",
SseOpcode::Pmaxsw => "pmaxsw",
Expand Down
1 change: 1 addition & 0 deletions cranelift/codegen/src/isa/x64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1483,6 +1483,7 @@ pub(crate) fn emit(
SseOpcode::Paddsw => (LegacyPrefixes::_66, 0x0FED, 2),
SseOpcode::Paddusb => (LegacyPrefixes::_66, 0x0FDC, 2),
SseOpcode::Paddusw => (LegacyPrefixes::_66, 0x0FDD, 2),
SseOpcode::Pmaddubsw => (LegacyPrefixes::_66, 0x0F3804, 3),
SseOpcode::Pand => (LegacyPrefixes::_66, 0x0FDB, 2),
SseOpcode::Pandn => (LegacyPrefixes::_66, 0x0FDF, 2),
SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),
Expand Down
80 changes: 80 additions & 0 deletions cranelift/codegen/src/isa/x64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4472,6 +4472,86 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
}
}
Opcode::ExtendedPairwiseAddSigned | Opcode::ExtendedPairwiseAddUnsigned => {

// Extended pairwise addition instructions computes extended sums within adjacent
// pairs of lanes of a SIMD vector, producing a SIMD vector with half as many lanes.
// Instruction sequences taken from instruction SPEC PR https://github.com/WebAssembly/simd/pull/380

let input_ty = ctx.input_ty(insn, 0);
let output_ty = ctx.output_ty(insn, 0);
let src = put_input_in_reg(ctx, inputs[0]);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
match op {
Opcode::ExtendedPairwiseAddSigned => match (input_ty, output_ty) {
(types::I8X16, types::I16X8) => {
static MUL_CONST: [u8; 16] = [0x01; 16];
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16));
ctx.emit(Inst::xmm_mov(
SseOpcode::Movdqa,
RegMem::reg(mul_const_reg.to_reg()),
dst,
));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddubsw, RegMem::reg(src), dst));
},
(types::I16X8, types::I32X4) => {
static MUL_CONST: [u8; 16] = [0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00];
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
let mul_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I16X8));
ctx.emit(Inst::xmm_mov(
SseOpcode::Movdqa,
RegMem::reg(src),
dst,
));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddwd, RegMem::reg(mul_const_reg.to_reg()), dst));
},
_ => unreachable!("Type pattern not supported {:?}-{:?} not supported for {:?}.", input_ty, output_ty, op),
},
Opcode::ExtendedPairwiseAddUnsigned => match (input_ty, output_ty) {
(types::I8X16, types::I16X8) => {
static MUL_CONST: [u8; 16] = [0x01; 16];
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16));
ctx.emit(Inst::xmm_mov(
SseOpcode::Movdqa,
RegMem::reg(src),
dst,
));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddubsw, RegMem::reg(mul_const_reg.to_reg()), dst));
},
(types::I16X8, types::I32X4) => {
static PXOR_CONST: [u8; 16] = [0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80];
let pxor_const = ctx.use_constant(VCodeConstantData::WellKnown(&PXOR_CONST));
let pxor_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
ctx.emit(Inst::xmm_load_const(pxor_const, pxor_const_reg, types::I16X8));
ctx.emit(Inst::xmm_mov(
SseOpcode::Movdqa,
RegMem::reg(src),
dst,
));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::reg(pxor_const_reg.to_reg()), dst));

static MADD_CONST: [u8; 16] = [0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00];
let madd_const = ctx.use_constant(VCodeConstantData::WellKnown(&MADD_CONST));
let madd_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
ctx.emit(Inst::xmm_load_const(madd_const, madd_const_reg, types::I16X8));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddwd, RegMem::reg(madd_const_reg.to_reg()), dst));

static ADDD_CONST2: [u8; 16] = [0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00];
let addd_const2 = ctx.use_constant(VCodeConstantData::WellKnown(&ADDD_CONST2));
let addd_const2_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
ctx.emit(Inst::xmm_load_const(addd_const2, addd_const2_reg, types::I16X8));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Paddd, RegMem::reg(addd_const2_reg.to_reg()), dst));
},
_ => unreachable!("Type pattern not supported {:?}-{:?} not supported for {:?}.", input_ty, output_ty, op),
},
_ => unreachable!("{:?} not supported.", op),
}
}
Opcode::UwidenHigh | Opcode::UwidenLow | Opcode::SwidenHigh | Opcode::SwidenLow => {
let input_ty = ctx.input_ty(insn, 0);
let output_ty = ctx.output_ty(insn, 0);
Expand Down
2 changes: 2 additions & 0 deletions cranelift/interpreter/src/step.rs
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,8 @@ where
Opcode::AtomicStore => unimplemented!("AtomicStore"),
Opcode::Fence => unimplemented!("Fence"),
Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"),
Opcode::ExtendedPairwiseAddSigned => unimplemented!("ExtendedPairwiseAddSigned"),
Opcode::ExtendedPairwiseAddUnsigned => unimplemented!("ExtendedPairwiseAddUnsigned"),

// TODO: these instructions should be removed once the new backend makes these obsolete
// (see https://github.com/bytecodealliance/wasmtime/issues/1936); additionally, the
Expand Down
20 changes: 16 additions & 4 deletions cranelift/wasm/src/code_translator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1859,6 +1859,22 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
let a = pop1_with_bitcast(state, I32X4, builder);
state.push1(builder.ins().uwiden_high(a))
}
Operator::I16x8ExtAddPairwiseI8x16S => {
let a = pop1_with_bitcast(state, I8X16, builder);
state.push1(builder.ins().extended_pairwise_add_signed(a))
}
Operator::I32x4ExtAddPairwiseI16x8S => {
let a = pop1_with_bitcast(state, I16X8, builder);
state.push1(builder.ins().extended_pairwise_add_signed(a))
}
Operator::I16x8ExtAddPairwiseI8x16U => {
let a = pop1_with_bitcast(state, I8X16, builder);
state.push1(builder.ins().extended_pairwise_add_unsigned(a))
}
Operator::I32x4ExtAddPairwiseI16x8U => {
let a = pop1_with_bitcast(state, I16X8, builder);
state.push1(builder.ins().extended_pairwise_add_unsigned(a))
}
Operator::F32x4Ceil | Operator::F64x2Ceil => {
// This is something of a misuse of `type_of`, because that produces the return type
// of `op`. In this case we want the arg type, but we know it's the same as the
Expand Down Expand Up @@ -1899,10 +1915,6 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
| Operator::I64x2ExtMulHighI32x4S
| Operator::I64x2ExtMulLowI32x4U
| Operator::I64x2ExtMulHighI32x4U
| Operator::I16x8ExtAddPairwiseI8x16S
| Operator::I16x8ExtAddPairwiseI8x16U
| Operator::I32x4ExtAddPairwiseI16x8S
| Operator::I32x4ExtAddPairwiseI16x8U
| Operator::F64x2ConvertLowI32x4U
| Operator::I32x4TruncSatF64x2SZero
| Operator::I32x4TruncSatF64x2UZero => {
Expand Down

0 comments on commit 4c97443

Please sign in to comment.