Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add extend-add-pairwise instructions x64 #3031

Merged
merged 2 commits into from
Aug 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 7 additions & 23 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,10 +156,8 @@ fn write_testsuite_tests(
let testname = extract_name(path);

writeln!(out, "#[test]")?;
if x64_should_panic(testsuite, &testname, strategy) {
writeln!(out, r#"#[should_panic]"#)?;
// Ignore when using QEMU for running tests (limited memory).
} else if ignore(testsuite, &testname, strategy) || (pooling && platform_is_emulated()) {
if ignore(testsuite, &testname, strategy) || (pooling && platform_is_emulated()) {
writeln!(out, "#[ignore]")?;
}

Expand All @@ -182,22 +180,6 @@ fn write_testsuite_tests(
Ok(())
}

/// For x64 backend features that are not supported yet, mark tests as panicking, so
/// they stop "passing" once the features are properly implemented.
fn x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool {
if !platform_is_x64() || strategy != "Cranelift" {
return false;
}

match (testsuite, testname) {
("simd", "simd_i16x8_extadd_pairwise_i8x16") => return true,
cfallin marked this conversation as resolved.
Show resolved Hide resolved
("simd", "simd_i32x4_extadd_pairwise_i16x8") => return true,
("simd", _) => return false,
_ => {}
}
false
}

/// Ignore tests that aren't supported yet.
fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
match strategy {
Expand All @@ -220,11 +202,13 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
("simd", _) if cfg!(feature = "old-x86-backend") => return true,
// No simd support yet for s390x.
("simd", _) if platform_is_s390x() => return true,

// These are new instructions that are not really implemented in any backend.
// These are new instructions that are only known to be supported for x64.
("simd", "simd_i16x8_extadd_pairwise_i8x16")
| ("simd", "simd_i32x4_extadd_pairwise_i16x8") => return true,

| ("simd", "simd_i32x4_extadd_pairwise_i16x8")
if !platform_is_x64() =>
{
return true
}
_ => {}
},
_ => panic!("unrecognized strategy"),
Expand Down
25 changes: 23 additions & 2 deletions cranelift/codegen/meta/src/shared/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4114,16 +4114,37 @@ pub(crate) fn define(
Inst::new(
"uwiden_high",
r#"
Widen the high lanes of `x` using unsigned extension.
Widen the high lanes of `x` using unsigned extension.

This will double the lane width and halve the number of lanes.
This will double the lane width and halve the number of lanes.
"#,
&formats.unary,
)
.operands_in(vec![x])
.operands_out(vec![a]),
);

let x = &Operand::new("x", I8or16or32xN);
let y = &Operand::new("y", I8or16or32xN);
let a = &Operand::new("a", I8or16or32xN);

ig.push(
Inst::new(
"iadd_pairwise",
r#"
Does lane-wise integer pairwise addition on two operands, putting the
combined results into a single vector result. Here a pair refers to adjacent
lanes in a vector, i.e. i*2 + (i*2+1) for i == num_lanes/2. The first operand
pairwise add results will make up the low half of the resulting vector while
the second operand pairwise add results will make up the upper half of the
resulting vector.
"#,
&formats.binary,
)
.operands_in(vec![x, y])
.operands_out(vec![a]),
);

let I16x8 = &TypeVar::new(
"I16x8",
"A SIMD vector type containing 8 integer lanes each 16 bits wide.",
Expand Down
4 changes: 3 additions & 1 deletion cranelift/codegen/src/isa/aarch64/lower_inst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3519,7 +3519,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
});
}

Opcode::ConstAddr | Opcode::Vconcat | Opcode::Vsplit => unimplemented!("lowering {}", op),
Opcode::IaddPairwise | Opcode::ConstAddr | Opcode::Vconcat | Opcode::Vsplit => {
unimplemented!("lowering {}", op)
}
}

Ok(())
Expand Down
3 changes: 2 additions & 1 deletion cranelift/codegen/src/isa/s390x/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2868,7 +2868,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::WideningPairwiseDotProductS
| Opcode::SqmulRoundSat
| Opcode::FvpromoteLow
| Opcode::Fvdemote => {
| Opcode::Fvdemote
| Opcode::IaddPairwise => {
// TODO
unimplemented!("Vector ops not implemented.");
}
Expand Down
3 changes: 3 additions & 0 deletions cranelift/codegen/src/isa/x64/inst/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,7 @@ pub enum SseOpcode {
Pinsrb,
Pinsrw,
Pinsrd,
Pmaddubsw,
Pmaddwd,
Pmaxsb,
Pmaxsw,
Expand Down Expand Up @@ -746,6 +747,7 @@ impl SseOpcode {
| SseOpcode::Pcmpgtd
| SseOpcode::Pextrw
| SseOpcode::Pinsrw
| SseOpcode::Pmaddubsw
| SseOpcode::Pmaddwd
| SseOpcode::Pmaxsw
| SseOpcode::Pmaxub
Expand Down Expand Up @@ -944,6 +946,7 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Pinsrb => "pinsrb",
SseOpcode::Pinsrw => "pinsrw",
SseOpcode::Pinsrd => "pinsrd",
SseOpcode::Pmaddubsw => "pmaddubsw",
SseOpcode::Pmaddwd => "pmaddwd",
SseOpcode::Pmaxsb => "pmaxsb",
SseOpcode::Pmaxsw => "pmaxsw",
Expand Down
1 change: 1 addition & 0 deletions cranelift/codegen/src/isa/x64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1483,6 +1483,7 @@ pub(crate) fn emit(
SseOpcode::Paddsw => (LegacyPrefixes::_66, 0x0FED, 2),
SseOpcode::Paddusb => (LegacyPrefixes::_66, 0x0FDC, 2),
SseOpcode::Paddusw => (LegacyPrefixes::_66, 0x0FDD, 2),
SseOpcode::Pmaddubsw => (LegacyPrefixes::_66, 0x0F3804, 3),
SseOpcode::Pand => (LegacyPrefixes::_66, 0x0FDB, 2),
SseOpcode::Pandn => (LegacyPrefixes::_66, 0x0FDF, 2),
SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),
Expand Down
159 changes: 159 additions & 0 deletions cranelift/codegen/src/isa/x64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4927,6 +4927,165 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
}
}
Opcode::IaddPairwise => {
if let (Some(swiden_low), Some(swiden_high)) = (
matches_input(ctx, inputs[0], Opcode::SwidenLow),
matches_input(ctx, inputs[1], Opcode::SwidenHigh),
) {
let swiden_input = &[
InsnInput {
insn: swiden_low,
input: 0,
},
InsnInput {
insn: swiden_high,
input: 0,
},
];

let input_ty = ctx.input_ty(swiden_low, 0);
let output_ty = ctx.output_ty(insn, 0);
let src0 = put_input_in_reg(ctx, swiden_input[0]);
let src1 = put_input_in_reg(ctx, swiden_input[1]);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
if src0 != src1 {
unimplemented!(
"iadd_pairwise not implemented for general case with different inputs"
);
}
match (input_ty, output_ty) {
(types::I8X16, types::I16X8) => {
static MUL_CONST: [u8; 16] = [0x01; 16];
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16));
ctx.emit(Inst::xmm_mov(
SseOpcode::Movdqa,
RegMem::reg(mul_const_reg.to_reg()),
dst,
));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddubsw, RegMem::reg(src0), dst));
}
(types::I16X8, types::I32X4) => {
static MUL_CONST: [u8; 16] = [
0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00,
0x01, 0x00, 0x01, 0x00,
];
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
let mul_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I16X8));
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst));
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pmaddwd,
RegMem::reg(mul_const_reg.to_reg()),
dst,
));
}
_ => {
unimplemented!("Type not supported for {:?}", op);
}
}
} else if let (Some(uwiden_low), Some(uwiden_high)) = (
matches_input(ctx, inputs[0], Opcode::UwidenLow),
matches_input(ctx, inputs[1], Opcode::UwidenHigh),
) {
let uwiden_input = &[
InsnInput {
insn: uwiden_low,
input: 0,
},
InsnInput {
insn: uwiden_high,
input: 0,
},
];

let input_ty = ctx.input_ty(uwiden_low, 0);
let output_ty = ctx.output_ty(insn, 0);
let src0 = put_input_in_reg(ctx, uwiden_input[0]);
let src1 = put_input_in_reg(ctx, uwiden_input[1]);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
if src0 != src1 {
unimplemented!(
"iadd_pairwise not implemented for general case with different inputs"
);
}
match (input_ty, output_ty) {
(types::I8X16, types::I16X8) => {
static MUL_CONST: [u8; 16] = [0x01; 16];
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16));
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst));
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pmaddubsw,
RegMem::reg(mul_const_reg.to_reg()),
dst,
));
}
(types::I16X8, types::I32X4) => {
static PXOR_CONST: [u8; 16] = [
0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
0x00, 0x80, 0x00, 0x80,
];
let pxor_const =
ctx.use_constant(VCodeConstantData::WellKnown(&PXOR_CONST));
let pxor_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
ctx.emit(Inst::xmm_load_const(
pxor_const,
pxor_const_reg,
types::I16X8,
));
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst));
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pxor,
RegMem::reg(pxor_const_reg.to_reg()),
dst,
));

static MADD_CONST: [u8; 16] = [
0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00,
0x01, 0x00, 0x01, 0x00,
];
let madd_const =
ctx.use_constant(VCodeConstantData::WellKnown(&MADD_CONST));
let madd_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
ctx.emit(Inst::xmm_load_const(
madd_const,
madd_const_reg,
types::I16X8,
));
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pmaddwd,
RegMem::reg(madd_const_reg.to_reg()),
dst,
));
static ADDD_CONST2: [u8; 16] = [
0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00,
0x00, 0x00, 0x01, 0x00,
];
let addd_const2 =
ctx.use_constant(VCodeConstantData::WellKnown(&ADDD_CONST2));
let addd_const2_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
ctx.emit(Inst::xmm_load_const(
addd_const2,
addd_const2_reg,
types::I16X8,
));
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Paddd,
RegMem::reg(addd_const2_reg.to_reg()),
dst,
));
}
_ => {
unimplemented!("Type not supported for {:?}", op);
}
}
} else {
unimplemented!("Operands not supported for {:?}", op);
}
cfallin marked this conversation as resolved.
Show resolved Hide resolved
}
Opcode::UwidenHigh | Opcode::UwidenLow | Opcode::SwidenHigh | Opcode::SwidenLow => {
let input_ty = ctx.input_ty(insn, 0);
let output_ty = ctx.output_ty(insn, 0);
Expand Down
Binary file modified cranelift/codegen/src/preopt.serialized
Binary file not shown.
1 change: 1 addition & 0 deletions cranelift/interpreter/src/step.rs
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,7 @@ where
Opcode::Fence => unimplemented!("Fence"),
Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"),
Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"),
Opcode::IaddPairwise => unimplemented!("IaddPairwise"),

// TODO: these instructions should be removed once the new backend makes these obsolete
// (see https://github.com/bytecodealliance/wasmtime/issues/1936); additionally, the
Expand Down
30 changes: 24 additions & 6 deletions cranelift/wasm/src/code_translator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1879,6 +1879,30 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
let a = pop1_with_bitcast(state, I32X4, builder);
state.push1(builder.ins().uwiden_high(a))
}
Operator::I16x8ExtAddPairwiseI8x16S => {
let a = pop1_with_bitcast(state, I8X16, builder);
let widen_low = builder.ins().swiden_low(a);
let widen_high = builder.ins().swiden_high(a);
state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
}
Operator::I32x4ExtAddPairwiseI16x8S => {
let a = pop1_with_bitcast(state, I16X8, builder);
let widen_low = builder.ins().swiden_low(a);
let widen_high = builder.ins().swiden_high(a);
state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
}
Operator::I16x8ExtAddPairwiseI8x16U => {
let a = pop1_with_bitcast(state, I8X16, builder);
let widen_low = builder.ins().uwiden_low(a);
let widen_high = builder.ins().uwiden_high(a);
state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
}
Operator::I32x4ExtAddPairwiseI16x8U => {
let a = pop1_with_bitcast(state, I16X8, builder);
let widen_low = builder.ins().uwiden_low(a);
let widen_high = builder.ins().uwiden_high(a);
state.push1(builder.ins().iadd_pairwise(widen_low, widen_high));
}
Operator::F32x4Ceil | Operator::F64x2Ceil => {
// This is something of a misuse of `type_of`, because that produces the return type
// of `op`. In this case we want the arg type, but we know it's the same as the
Expand Down Expand Up @@ -1982,12 +2006,6 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
let b_high = builder.ins().uwiden_high(b);
state.push1(builder.ins().imul(a_high, b_high));
}
Operator::I16x8ExtAddPairwiseI8x16S
| Operator::I16x8ExtAddPairwiseI8x16U
| Operator::I32x4ExtAddPairwiseI16x8S
| Operator::I32x4ExtAddPairwiseI16x8U => {
return Err(wasm_unsupported!("proposed simd operator {:?}", op));
}
Operator::ReturnCall { .. } | Operator::ReturnCallIndirect { .. } => {
return Err(wasm_unsupported!("proposed tail-call operator {:?}", op));
}
Expand Down