From e519fca61c905423dcab39bb93f9944d8c5d66fd Mon Sep 17 00:00:00 2001 From: Johnnie Birch Date: Thu, 29 Jul 2021 20:19:38 -0700 Subject: [PATCH] Refactor and turn on lowering for extend-add-pairwise --- build.rs | 24 ++--- .../codegen/meta/src/shared/instructions.rs | 36 +++---- .../codegen/src/isa/aarch64/lower_inst.rs | 8 +- cranelift/codegen/src/isa/s390x/lower.rs | 3 +- cranelift/codegen/src/isa/x64/lower.rs | 97 ++++++++++++------ cranelift/codegen/src/preopt.serialized | Bin 5511 -> 5511 bytes cranelift/interpreter/src/step.rs | 3 +- cranelift/wasm/src/code_translator.rs | 16 ++- 8 files changed, 107 insertions(+), 80 deletions(-) diff --git a/build.rs b/build.rs index 20de13c6e2ea..edf1d3e290cd 100644 --- a/build.rs +++ b/build.rs @@ -156,10 +156,8 @@ fn write_testsuite_tests( let testname = extract_name(path); writeln!(out, "#[test]")?; - if x64_should_panic(testsuite, &testname, strategy) { - writeln!(out, r#"#[should_panic]"#)?; // Ignore when using QEMU for running tests (limited memory). - } else if ignore(testsuite, &testname, strategy) || (pooling && platform_is_emulated()) { + if ignore(testsuite, &testname, strategy) || (pooling && platform_is_emulated()) { writeln!(out, "#[ignore]")?; } @@ -182,19 +180,6 @@ fn write_testsuite_tests( Ok(()) } -/// For x64 backend features that are not supported yet, mark tests as panicking, so -/// they stop "passing" once the features are properly implemented. -fn x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool { - if !platform_is_x64() || strategy != "Cranelift" { - return false; - } - - match (testsuite, testname) { - _ => {} - } - false -} - /// Ignore tests that aren't supported yet. fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { match strategy { @@ -217,6 +202,13 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { ("simd", _) if cfg!(feature = "old-x86-backend") => return true, // No simd support yet for s390x. ("simd", _) if platform_is_s390x() => return true, + // These are new instructions that are only known to be supported for x64. + ("simd", "simd_i16x8_extadd_pairwise_i8x16") + | ("simd", "simd_i32x4_extadd_pairwise_i16x8") + if !platform_is_x64() => + { + return true + } _ => {} }, _ => panic!("unrecognized strategy"), diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 9603ecc7f134..ffded34f9505 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -4114,22 +4114,9 @@ pub(crate) fn define( Inst::new( "uwiden_high", r#" - Lane-wise integer extended pairwise addition producing extended results - (twice wider results than the input) - "#, - &formats.unary, - ) - .operands_in(vec![x]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "extended_pairwise_add_signed", - r#" - Widen the high lanes of `x` using signed extension. + Widen the high lanes of `x` using unsigned extension. - This will double the lane width and halve the number of lanes. + This will double the lane width and halve the number of lanes. "#, &formats.unary, ) @@ -4137,17 +4124,24 @@ pub(crate) fn define( .operands_out(vec![a]), ); + let x = &Operand::new("x", I8or16or32xN); + let y = &Operand::new("y", I8or16or32xN); + let a = &Operand::new("a", I8or16or32xN); + ig.push( Inst::new( - "extended_pairwise_add_unsigned", + "iadd_pairwise", r#" - Widen the high lanes of `x` extending with zeros. - - This will double the lane width and halve the number of lanes. + Does lane-wise integer pairwise addition on two operands, putting the + combined results into a single vector result. Here a pair refers to adjacent + lanes in a vector, i.e. i*2 + (i*2+1) for i == num_lanes/2. The first operand + pairwise add results will make up the low half of the resulting vector while + the second operand pairwise add results will make up the upper half of the + resulting vector. "#, - &formats.unary, + &formats.binary, ) - .operands_in(vec![x]) + .operands_in(vec![x, y]) .operands_out(vec![a]), ); diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index a3670e823f38..c07fb9259618 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -3519,11 +3519,9 @@ pub(crate) fn lower_insn_to_regs>( }); } - Opcode::ExtendedPairwiseAddSigned - | Opcode::ExtendedPairwiseAddUnsigned - | Opcode::ConstAddr - | Opcode::Vconcat - | Opcode::Vsplit => unimplemented!("lowering {}", op), + Opcode::IaddPairwise | Opcode::ConstAddr | Opcode::Vconcat | Opcode::Vsplit => { + unimplemented!("lowering {}", op) + } } Ok(()) diff --git a/cranelift/codegen/src/isa/s390x/lower.rs b/cranelift/codegen/src/isa/s390x/lower.rs index 8ae6b32ecaad..b13edc4bb2f7 100644 --- a/cranelift/codegen/src/isa/s390x/lower.rs +++ b/cranelift/codegen/src/isa/s390x/lower.rs @@ -2869,8 +2869,7 @@ fn lower_insn_to_regs>( | Opcode::SqmulRoundSat | Opcode::FvpromoteLow | Opcode::Fvdemote - | Opcode::ExtendedPairwiseAddSigned - | Opcode::ExtendedPairwiseAddUnsigned => { + | Opcode::IaddPairwise => { // TODO unimplemented!("Vector ops not implemented."); } diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index f9f824cc23dc..b4c05cee8fc2 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -4927,18 +4927,33 @@ fn lower_insn_to_regs>( } } } - Opcode::ExtendedPairwiseAddSigned | Opcode::ExtendedPairwiseAddUnsigned => { - // Extended pairwise addition instructions computes extended sums within adjacent - // pairs of lanes of a SIMD vector, producing a SIMD vector with half as many lanes. - // Instruction sequences taken from instruction SPEC PR https://github.com/WebAssembly/simd/pull/380 - /* - let input_ty = ctx.input_ty(insn, 0); - let output_ty = ctx.output_ty(insn, 0); - let src = put_input_in_reg(ctx, inputs[0]); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - unreachable!(); - match op { - Opcode::ExtendedPairwiseAddSigned => match (input_ty, output_ty) { + Opcode::IaddPairwise => { + if let (Some(swiden_low), Some(swiden_high)) = ( + matches_input(ctx, inputs[0], Opcode::SwidenLow), + matches_input(ctx, inputs[1], Opcode::SwidenHigh), + ) { + let swiden_input = &[ + InsnInput { + insn: swiden_low, + input: 0, + }, + InsnInput { + insn: swiden_high, + input: 0, + }, + ]; + + let input_ty = ctx.input_ty(swiden_low, 0); + let output_ty = ctx.output_ty(insn, 0); + let src0 = put_input_in_reg(ctx, swiden_input[0]); + let src1 = put_input_in_reg(ctx, swiden_input[1]); + let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + if src0 != src1 { + unimplemented!( + "iadd_pairwise not implemented for general case with different inputs" + ); + } + match (input_ty, output_ty) { (types::I8X16, types::I16X8) => { static MUL_CONST: [u8; 16] = [0x01; 16]; let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST)); @@ -4949,7 +4964,7 @@ fn lower_insn_to_regs>( RegMem::reg(mul_const_reg.to_reg()), dst, )); - ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddubsw, RegMem::reg(src), dst)); + ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddubsw, RegMem::reg(src0), dst)); } (types::I16X8, types::I32X4) => { static MUL_CONST: [u8; 16] = [ @@ -4959,25 +4974,49 @@ fn lower_insn_to_regs>( let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST)); let mul_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap(); ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I16X8)); - ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst)); + ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst)); ctx.emit(Inst::xmm_rm_r( SseOpcode::Pmaddwd, RegMem::reg(mul_const_reg.to_reg()), dst, )); } - _ => unreachable!( - "Type pattern not supported {:?}-{:?} not supported for {:?}.", - input_ty, output_ty, op - ), - }, - Opcode::ExtendedPairwiseAddUnsigned => match (input_ty, output_ty) { + _ => { + unimplemented!("Type not supported for {:?}", op); + } + } + } else if let (Some(uwiden_low), Some(uwiden_high)) = ( + matches_input(ctx, inputs[0], Opcode::UwidenLow), + matches_input(ctx, inputs[1], Opcode::UwidenHigh), + ) { + let uwiden_input = &[ + InsnInput { + insn: uwiden_low, + input: 0, + }, + InsnInput { + insn: uwiden_high, + input: 0, + }, + ]; + + let input_ty = ctx.input_ty(uwiden_low, 0); + let output_ty = ctx.output_ty(insn, 0); + let src0 = put_input_in_reg(ctx, uwiden_input[0]); + let src1 = put_input_in_reg(ctx, uwiden_input[1]); + let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + if src0 != src1 { + unimplemented!( + "iadd_pairwise not implemented for general case with different inputs" + ); + } + match (input_ty, output_ty) { (types::I8X16, types::I16X8) => { static MUL_CONST: [u8; 16] = [0x01; 16]; let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST)); let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap(); ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16)); - ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst)); + ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst)); ctx.emit(Inst::xmm_rm_r( SseOpcode::Pmaddubsw, RegMem::reg(mul_const_reg.to_reg()), @@ -4997,7 +5036,7 @@ fn lower_insn_to_regs>( pxor_const_reg, types::I16X8, )); - ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst)); + ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst)); ctx.emit(Inst::xmm_rm_r( SseOpcode::Pxor, RegMem::reg(pxor_const_reg.to_reg()), @@ -5021,7 +5060,6 @@ fn lower_insn_to_regs>( RegMem::reg(madd_const_reg.to_reg()), dst, )); - static ADDD_CONST2: [u8; 16] = [ 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, @@ -5040,14 +5078,13 @@ fn lower_insn_to_regs>( dst, )); } - _ => unreachable!( - "Type pattern not supported {:?}-{:?} not supported for {:?}.", - input_ty, output_ty, op - ), - }, - _ => unreachable!("{:?} not supported.", op), + _ => { + unimplemented!("Type not supported for {:?}", op); + } + } + } else { + unimplemented!("Operands not supported for {:?}", op); } - */ } Opcode::UwidenHigh | Opcode::UwidenLow | Opcode::SwidenHigh | Opcode::SwidenLow => { let input_ty = ctx.input_ty(insn, 0); diff --git a/cranelift/codegen/src/preopt.serialized b/cranelift/codegen/src/preopt.serialized index 10e3d5c36eda6f685ab53df7ef77d4e978f68c84..353373491a3b2e704f137c67f4e115f2b251b001 100644 GIT binary patch delta 68 zcmZqIZr9$R$+CGrs}+|`T$AJ3 L#Wvfrd+`APNE8;} delta 68 zcmZqIZr9$R$-;PR@&Q&`Ao&kW+OUDxRbX-lm`q@e->k`E#W>lKUuL2J$K-leuF3K2 KVw-K*z4!o1vKCbU diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index 0e60ed089a22..c9c03729809b 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -630,8 +630,7 @@ where Opcode::Fence => unimplemented!("Fence"), Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"), Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"), - Opcode::ExtendedPairwiseAddSigned => unimplemented!("ExtendedPairwiseAddSigned"), - Opcode::ExtendedPairwiseAddUnsigned => unimplemented!("ExtendedPairwiseAddUnsigned"), + Opcode::IaddPairwise => unimplemented!("IaddPairwise"), // TODO: these instructions should be removed once the new backend makes these obsolete // (see https://github.com/bytecodealliance/wasmtime/issues/1936); additionally, the diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs index 21a9e9eef05e..31efed58da42 100644 --- a/cranelift/wasm/src/code_translator.rs +++ b/cranelift/wasm/src/code_translator.rs @@ -1881,19 +1881,27 @@ pub fn translate_operator( } Operator::I16x8ExtAddPairwiseI8x16S => { let a = pop1_with_bitcast(state, I8X16, builder); - state.push1(builder.ins().extended_pairwise_add_signed(a)) + let widen_low = builder.ins().swiden_low(a); + let widen_high = builder.ins().swiden_high(a); + state.push1(builder.ins().iadd_pairwise(widen_low, widen_high)); } Operator::I32x4ExtAddPairwiseI16x8S => { let a = pop1_with_bitcast(state, I16X8, builder); - state.push1(builder.ins().extended_pairwise_add_signed(a)) + let widen_low = builder.ins().swiden_low(a); + let widen_high = builder.ins().swiden_high(a); + state.push1(builder.ins().iadd_pairwise(widen_low, widen_high)); } Operator::I16x8ExtAddPairwiseI8x16U => { let a = pop1_with_bitcast(state, I8X16, builder); - state.push1(builder.ins().extended_pairwise_add_unsigned(a)) + let widen_low = builder.ins().uwiden_low(a); + let widen_high = builder.ins().uwiden_high(a); + state.push1(builder.ins().iadd_pairwise(widen_low, widen_high)); } Operator::I32x4ExtAddPairwiseI16x8U => { let a = pop1_with_bitcast(state, I16X8, builder); - state.push1(builder.ins().extended_pairwise_add_unsigned(a)) + let widen_low = builder.ins().uwiden_low(a); + let widen_high = builder.ins().uwiden_high(a); + state.push1(builder.ins().iadd_pairwise(widen_low, widen_high)); } Operator::F32x4Ceil | Operator::F64x2Ceil => { // This is something of a misuse of `type_of`, because that produces the return type