Skip to content

Commit

Permalink
Enable the simd_i16x8_q15mulr_sat_s test on AArch64
Browse files Browse the repository at this point in the history
Copyright (c) 2021, Arm Limited.
  • Loading branch information
akirilov-arm committed Jun 25, 2021
1 parent 1047c4e commit bd879ce
Show file tree
Hide file tree
Showing 9 changed files with 96 additions and 176 deletions.
1 change: 0 additions & 1 deletion build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
("simd", "simd_conversions")
| ("simd", "simd_i16x8_extadd_pairwise_i8x16")
| ("simd", "simd_i16x8_extmul_i8x16")
| ("simd", "simd_i16x8_q15mulr_sat_s")
| ("simd", "simd_i32x4_extadd_pairwise_i16x8")
| ("simd", "simd_i32x4_extmul_i16x8")
| ("simd", "simd_i32x4_trunc_sat_f64x2")
Expand Down
27 changes: 27 additions & 0 deletions cranelift/codegen/meta/src/shared/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2479,6 +2479,33 @@ pub(crate) fn define(
.operands_out(vec![a]),
);

let I16or32 = &TypeVar::new(
"I16or32",
"A scalar or vector integer type with 16- or 32-bit numbers",
TypeSetBuilder::new().ints(16..32).simd_lanes(4..8).build(),
);

let qx = &Operand::new("x", I16or32);
let qy = &Operand::new("y", I16or32);
let qa = &Operand::new("a", I16or32);

ig.push(
Inst::new(
"sqmul_round_sat",
r#"
Fixed-point multiplication of numbers in the QN format, where N + 1
is the number bitwidth:
`a := signed_saturate((x * y + 1 << (Q - 1)) >> Q)`
Polymorphic over all integer types (scalar and vector) with 16- or
32-bit numbers.
"#,
&formats.binary,
)
.operands_in(vec![qx, qy])
.operands_out(vec![qa]),
);

ig.push(
Inst::new(
"udiv",
Expand Down
8 changes: 8 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2161,6 +2161,14 @@ impl MachInstEmit for Inst {
VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
VecALUOp::Smull => (0b000_01110_00_1 | enc_size << 1, 0b110000),
VecALUOp::Smull2 => (0b010_01110_00_1 | enc_size << 1, 0b110000),
VecALUOp::Sqrdmulh => {
debug_assert!(
size.lane_size() == ScalarSize::Size16
|| size.lane_size() == ScalarSize::Size32
);

(0b001_01110_00_1 | enc_size << 1, 0b101101)
}
};
let top11 = match alu_op {
VecALUOp::Smull | VecALUOp::Smull2 => top11,
Expand Down
24 changes: 24 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3610,6 +3610,30 @@ fn test_aarch64_binemit() {
"smull2 v8.2d, v12.4s, v14.4s",
));

insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Sqrdmulh,
rd: writable_vreg(31),
rn: vreg(0),
rm: vreg(31),
size: VectorSize::Size16x8,
},
"1FB47F6E",
"sqrdmulh v31.8h, v0.8h, v31.8h",
));

insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Sqrdmulh,
rd: writable_vreg(7),
rn: vreg(7),
rm: vreg(23),
size: VectorSize::Size32x2,
},
"E7B4B72E",
"sqrdmulh v7.2s, v7.2s, v23.2s",
));

insns.push((
Inst::VecMisc {
op: VecMisc2::Not,
Expand Down
3 changes: 3 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,8 @@ pub enum VecALUOp {
Smull,
/// Signed multiply long (high halves)
Smull2,
/// Signed saturating rounding doubling multiply returning high half
Sqrdmulh,
}

/// A Vector miscellaneous operation with two registers.
Expand Down Expand Up @@ -3980,6 +3982,7 @@ impl Inst {
VecALUOp::Zip1 => ("zip1", size),
VecALUOp::Smull => ("smull", size),
VecALUOp::Smull2 => ("smull2", size),
VecALUOp::Sqrdmulh => ("sqrdmulh", size),
};
let rd_size = match alu_op {
VecALUOp::Umlal | VecALUOp::Smull | VecALUOp::Smull2 => size.widen(),
Expand Down
34 changes: 24 additions & 10 deletions cranelift/codegen/src/isa/aarch64/lower_inst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1650,8 +1650,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
panic!("table_addr should have been removed by legalization!");
}

Opcode::ConstAddr => unimplemented!(),

Opcode::Nop => {
// Nothing.
}
Expand Down Expand Up @@ -2684,11 +2682,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
});
}

Opcode::Vsplit | Opcode::Vconcat => {
// TODO
panic!("Vector ops not implemented.");
}

Opcode::Isplit => {
assert_eq!(
ctx.input_ty(insn, 0),
Expand Down Expand Up @@ -3524,9 +3517,30 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
},

Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"),
Opcode::FvpromoteLow => unimplemented!("FvpromoteLow"),
Opcode::Fvdemote => unimplemented!("Fvdemote"),
Opcode::SqmulRoundSat => {
let ty = ty.unwrap();

if !ty.is_vector() || (ty.lane_type() != I16 && ty.lane_type() != I32) {
return Err(CodegenError::Unsupported(format!(
"Unsupported type: {:?}",
ty
)));
}

let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);

ctx.emit(Inst::VecRRR {
alu_op: VecALUOp::Sqrdmulh,
rd,
rn,
rm,
size: VectorSize::from_ty(ty),
});
}

_ => unimplemented!("lowering {}", op),
}

Ok(())
Expand Down
67 changes: 2 additions & 65 deletions cranelift/codegen/src/isa/s390x/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -976,9 +976,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::AluRUImm32 { alu_op, rd, imm });
}

Opcode::UaddSat | Opcode::SaddSat => unimplemented!(),
Opcode::UsubSat | Opcode::SsubSat => unimplemented!(),

Opcode::Iabs => {
let ty = ty.unwrap();
let op = choose_32_64(ty, UnaryOp::Abs32, UnaryOp::Abs64);
Expand Down Expand Up @@ -1891,8 +1888,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
});
}

Opcode::Bitrev => unimplemented!(),

Opcode::Popcnt => {
let ty = ty.unwrap();
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
Expand Down Expand Up @@ -2151,8 +2146,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
}

Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"),

Opcode::Bitcast => {
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
Expand Down Expand Up @@ -2399,8 +2392,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(inst);
}

Opcode::ConstAddr => unimplemented!(),

Opcode::FuncAddr => {
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let (extname, dist) = ctx.call_target(insn).unwrap();
Expand Down Expand Up @@ -2457,14 +2448,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
panic!("global_value should have been removed by legalization!");
}

Opcode::TlsValue => {
panic!("Thread-local storage support not implemented!");
}

Opcode::GetPinnedReg | Opcode::SetPinnedReg => {
panic!("Pinned register support not implemented!");
}

Opcode::Icmp => {
let condcode = ctx.data(insn).cond_code().unwrap();
let cond = Cond::from_intcc(condcode);
Expand Down Expand Up @@ -2825,54 +2808,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::Fence);
}

Opcode::RawBitcast
| Opcode::Splat
| Opcode::Swizzle
| Opcode::Insertlane
| Opcode::Extractlane
| Opcode::Imin
| Opcode::Umin
| Opcode::Imax
| Opcode::Umax
| Opcode::AvgRound
| Opcode::FminPseudo
| Opcode::FmaxPseudo
| Opcode::Uload8x8
| Opcode::Uload8x8Complex
| Opcode::Sload8x8
| Opcode::Sload8x8Complex
| Opcode::Uload16x4
| Opcode::Uload16x4Complex
| Opcode::Sload16x4
| Opcode::Sload16x4Complex
| Opcode::Uload32x2
| Opcode::Uload32x2Complex
| Opcode::Sload32x2
| Opcode::Sload32x2Complex
| Opcode::Vconst
| Opcode::Shuffle
| Opcode::Vsplit
| Opcode::Vconcat
| Opcode::Vselect
| Opcode::VanyTrue
| Opcode::VallTrue
| Opcode::VhighBits
| Opcode::ScalarToVector
| Opcode::Snarrow
| Opcode::Unarrow
| Opcode::SwidenLow
| Opcode::SwidenHigh
| Opcode::UwidenLow
| Opcode::UwidenHigh
| Opcode::WideningPairwiseDotProductS
| Opcode::FvpromoteLow
| Opcode::Fvdemote => {
// TODO
panic!("Vector ops not implemented.");
}

Opcode::Isplit | Opcode::Iconcat => panic!("Wide integer ops not implemented."),

Opcode::Spill
| Opcode::Fill
| Opcode::FillNop
Expand Down Expand Up @@ -2988,6 +2923,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::X86MachoTlsGetAddr => {
panic!("x86-specific opcode in supposedly arch-neutral IR!");
}

_ => unimplemented!("lowering {}", op),
}

Ok(())
Expand Down
Loading

0 comments on commit bd879ce

Please sign in to comment.