Skip to content

Commit

Permalink
x64: lower i64x2.imul to VPMULLQ when possible
Browse files Browse the repository at this point in the history
This adds the machinery to encode the VPMULLQ instruction which is
available in AVX512VL and AVX512DQ. When these feature sets are
available, we use this instruction instead of a lengthy 12-instruction
sequence.
  • Loading branch information
abrown committed May 13, 2021
1 parent f0509ee commit c42fe4e
Show file tree
Hide file tree
Showing 5 changed files with 196 additions and 92 deletions.
4 changes: 4 additions & 0 deletions cranelift/codegen/src/isa/x64/inst/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,7 @@ pub(crate) enum InstructionSet {
BMI2,
AVX512F,
AVX512VL,
AVX512DQ,
}

/// Some SSE operations requiring 2 operands r/m and r.
Expand Down Expand Up @@ -994,13 +995,15 @@ impl fmt::Display for SseOpcode {
#[derive(Clone)]
pub enum Avx512Opcode {
Vpabsq,
Vpmullq,
}

impl Avx512Opcode {
/// Which `InstructionSet`s support the opcode?
pub(crate) fn available_from(&self) -> SmallVec<[InstructionSet; 2]> {
match self {
Avx512Opcode::Vpabsq => smallvec![InstructionSet::AVX512F, InstructionSet::AVX512VL],
Avx512Opcode::Vpmullq => smallvec![InstructionSet::AVX512VL, InstructionSet::AVX512DQ],
}
}
}
Expand All @@ -1009,6 +1012,7 @@ impl fmt::Debug for Avx512Opcode {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
let name = match self {
Avx512Opcode::Vpabsq => "vpabsq",
Avx512Opcode::Vpmullq => "vpmullq",
};
write!(fmt, "{}", name)
}
Expand Down
27 changes: 27 additions & 0 deletions cranelift/codegen/src/isa/x64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ pub(crate) fn emit(
InstructionSet::BMI2 => info.isa_flags.has_bmi2(),
InstructionSet::AVX512F => info.isa_flags.has_avx512f(),
InstructionSet::AVX512VL => info.isa_flags.has_avx512vl(),
InstructionSet::AVX512DQ => info.isa_flags.has_avx512dq(),
}
};

Expand Down Expand Up @@ -1409,6 +1410,7 @@ pub(crate) fn emit(
Inst::XmmUnaryRmREvex { op, src, dst } => {
let opcode = match op {
Avx512Opcode::Vpabsq => 0x1f,
_ => unimplemented!("Opcode {:?} not implemented", op),
};
match src {
RegMem::Reg { reg: src } => EvexInstruction::new()
Expand Down Expand Up @@ -1545,6 +1547,31 @@ pub(crate) fn emit(
}
}

Inst::XmmRmREvex {
op,
src1,
src2,
dst,
} => {
let opcode = match op {
Avx512Opcode::Vpmullq => 0x40,
_ => unimplemented!("Opcode {:?} not implemented", op),
};
match src1 {
RegMem::Reg { reg: src } => EvexInstruction::new()
.length(EvexVectorLength::V128)
.prefix(LegacyPrefixes::_66)
.map(OpcodeMap::_0F38)
.w(true)
.opcode(opcode)
.reg(dst.to_reg().get_hw_encoding())
.rm(src.get_hw_encoding())
.vvvvv(src2.get_hw_encoding())
.encode(sink),
_ => todo!(),
};
}

Inst::XmmMinMaxSeq {
size,
is_min,
Expand Down
7 changes: 7 additions & 0 deletions cranelift/codegen/src/isa/x64/inst/emit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3555,6 +3555,12 @@ fn test_x64_emit() {
"pmullw %xmm14, %xmm1",
));

insns.push((
Inst::xmm_rm_r_evex(Avx512Opcode::Vpmullq, RegMem::reg(xmm14), xmm10, w_xmm1),
"62D2AD0840CE",
"vpmullq %xmm14, %xmm10, %xmm1",
));

insns.push((
Inst::xmm_rm_r(SseOpcode::Pmuludq, RegMem::reg(xmm8), w_xmm9),
"66450FF4C8",
Expand Down Expand Up @@ -4283,6 +4289,7 @@ fn test_x64_emit() {
isa_flag_builder.enable("has_ssse3").unwrap();
isa_flag_builder.enable("has_sse41").unwrap();
isa_flag_builder.enable("has_avx512f").unwrap();
isa_flag_builder.enable("has_avx512dq").unwrap();
let isa_flags = x64::settings::Flags::new(&flags, isa_flag_builder);

let rru = regs::create_reg_universe_systemv(&flags);
Expand Down
57 changes: 56 additions & 1 deletion cranelift/codegen/src/isa/x64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,13 @@ pub enum Inst {
dst: Writable<Reg>,
},

XmmRmREvex {
op: Avx512Opcode,
src1: RegMem,
src2: Reg,
dst: Writable<Reg>,
},

/// XMM (scalar or vector) unary op: mov between XMM registers (32 64) (reg addr) reg, sqrt,
/// etc.
///
Expand Down Expand Up @@ -577,7 +584,7 @@ impl Inst {
| Inst::XmmToGpr { op, .. }
| Inst::XmmUnaryRmR { op, .. } => smallvec![op.available_from()],

Inst::XmmUnaryRmREvex { op, .. } => op.available_from(),
Inst::XmmUnaryRmREvex { op, .. } | Inst::XmmRmREvex { op, .. } => op.available_from(),
}
}
}
Expand Down Expand Up @@ -724,6 +731,23 @@ impl Inst {
Inst::XmmRmR { op, src, dst }
}

pub(crate) fn xmm_rm_r_evex(
op: Avx512Opcode,
src1: RegMem,
src2: Reg,
dst: Writable<Reg>,
) -> Self {
src1.assert_regclass_is(RegClass::V128);
debug_assert!(src2.get_class() == RegClass::V128);
debug_assert!(dst.to_reg().get_class() == RegClass::V128);
Inst::XmmRmREvex {
op,
src1,
src2,
dst,
}
}

pub(crate) fn xmm_uninit_value(dst: Writable<Reg>) -> Self {
debug_assert!(dst.to_reg().get_class() == RegClass::V128);
Inst::XmmUninitializedValue { dst }
Expand Down Expand Up @@ -1425,6 +1449,20 @@ impl PrettyPrint for Inst {
show_ireg_sized(dst.to_reg(), mb_rru, 8),
),

Inst::XmmRmREvex {
op,
src1,
src2,
dst,
..
} => format!(
"{} {}, {}, {}",
ljustify(op.to_string()),
src1.show_rru_sized(mb_rru, 8),
show_ireg_sized(*src2, mb_rru, 8),
show_ireg_sized(dst.to_reg(), mb_rru, 8),
),

Inst::XmmMinMaxSeq {
lhs,
rhs_dst,
Expand Down Expand Up @@ -1898,6 +1936,13 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
collector.add_mod(*dst);
}
}
Inst::XmmRmREvex {
src1, src2, dst, ..
} => {
src1.get_regs_as_uses(collector);
collector.add_use(*src2);
collector.add_def(*dst);
}
Inst::XmmRmRImm { op, src, dst, .. } => {
if inst.produces_const() {
// No need to account for src, since src == dst.
Expand Down Expand Up @@ -2283,6 +2328,16 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
map_mod(mapper, dst);
}
}
Inst::XmmRmREvex {
ref mut src1,
ref mut src2,
ref mut dst,
..
} => {
src1.map_uses(mapper);
map_use(mapper, src2);
map_def(mapper, dst);
}
Inst::XmmRmiReg {
ref mut src,
ref mut dst,
Expand Down
Loading

0 comments on commit c42fe4e

Please sign in to comment.