Skip to content

Commit

Permalink
riscv64: Implement iadd_pairwise (#6568)
Browse files Browse the repository at this point in the history
* riscv64: Add Mov and VSlideUp

* riscv64: Implement `iadd_pairwise`

* riscv64: Use `late_use` in `VecAluRRRImm5`

* machinst: Add `OperandCollector::reg_fixed_late_use`
  • Loading branch information
afonso360 authored Jun 16, 2023
1 parent 9a67597 commit 62019b2
Show file tree
Hide file tree
Showing 13 changed files with 534 additions and 48 deletions.
3 changes: 0 additions & 3 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -248,12 +248,9 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
"simd_f64x2_rounding",
"simd_i16x8_arith2",
"simd_i16x8_cmp",
"simd_i16x8_extadd_pairwise_i8x16",
"simd_i16x8_q15mulr_sat_s",
"simd_i32x4_arith2",
"simd_i32x4_cmp",
"simd_i32x4_dot_i16x8",
"simd_i32x4_extadd_pairwise_i16x8",
"simd_i32x4_trunc_sat_f32x4",
"simd_i32x4_trunc_sat_f64x2",
"simd_i64x2_arith2",
Expand Down
9 changes: 9 additions & 0 deletions cranelift/codegen/src/isa/riscv64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,15 @@
(probe_count u32)
(tmp WritableReg))

(VecAluRRRImm5
(op VecAluOpRRRImm5)
(vd WritableReg)
(vd_src Reg)
(vs2 Reg)
(imm Imm5)
(mask VecOpMasking)
(vstate VState))

(VecAluRRR
(op VecAluOpRRR)
(vd WritableReg)
Expand Down
89 changes: 61 additions & 28 deletions cranelift/codegen/src/isa/riscv64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -464,13 +464,18 @@ impl Inst {
| Inst::Cltz { .. }
| Inst::Brev8 { .. }
| Inst::StackProbeLoop { .. } => None,

// VecSetState does not expect any vstate, rather it updates it.
Inst::VecSetState { .. } => None,

// `vmv` instructions copy a set of registers and ignore vstate.
Inst::VecAluRRImm5 { op: VecAluOpRRImm5::VmvrV, .. } => None,

Inst::VecAluRR { vstate, .. } |
Inst::VecAluRRR { vstate, .. } |
Inst::VecAluRImm5 { vstate, .. } |
Inst::VecAluRRImm5 { vstate, .. } |
Inst::VecAluRRRImm5 { vstate, .. } |
// TODO: Unit-stride loads and stores only need the AVL to be correct, not
// the full vtype. A future optimization could be to decouple these two when
// updating vstate. This would allow us to avoid emitting a VecSetState in
Expand Down Expand Up @@ -951,34 +956,44 @@ impl MachInstEmit for Inst {
}

&Inst::Mov { rd, rm, ty } => {
debug_assert_ne!(rd.to_reg().class(), RegClass::Vector);
debug_assert_ne!(rm.class(), RegClass::Vector);
if rd.to_reg() != rm {
let rm = allocs.next(rm);
let rd = allocs.next_writable(rd);
if ty.is_float() {
Inst::FpuRRR {
alu_op: if ty == F32 {
FpuOPRRR::FsgnjS
} else {
FpuOPRRR::FsgnjD
},
frm: None,
rd: rd,
rs1: rm,
rs2: rm,
}
.emit(&[], sink, emit_info, state);
} else {
let x = Inst::AluRRImm12 {
alu_op: AluOPRRI::Ori,
rd: rd,
rs: rm,
imm12: Imm12::zero(),
};
x.emit(&[], sink, emit_info, state);
}
debug_assert_eq!(rd.to_reg().class(), rm.class());
if rd.to_reg() == rm {
return;
}

let rm = allocs.next(rm);
let rd = allocs.next_writable(rd);

match rm.class() {
RegClass::Int => Inst::AluRRImm12 {
alu_op: AluOPRRI::Ori,
rd: rd,
rs: rm,
imm12: Imm12::zero(),
},
RegClass::Float => Inst::FpuRRR {
alu_op: if ty == F32 {
FpuOPRRR::FsgnjS
} else {
FpuOPRRR::FsgnjD
},
frm: None,
rd: rd,
rs1: rm,
rs2: rm,
},
RegClass::Vector => Inst::VecAluRRImm5 {
op: VecAluOpRRImm5::VmvrV,
vd: rd,
vs2: rm,
// Imm 0 means copy 1 register.
imm: Imm5::maybe_from_i8(0).unwrap(),
mask: VecOpMasking::Disabled,
// Vstate for this instruction is ignored.
vstate: VState::from_type(ty),
},
}
.emit(&[], sink, emit_info, state);
}

&Inst::MovFromPReg { rd, rm } => {
Expand Down Expand Up @@ -2827,6 +2842,24 @@ impl MachInstEmit for Inst {
.emit(&[], sink, emit_info, state);
sink.bind_label(label_done, &mut state.ctrl_plane);
}
&Inst::VecAluRRRImm5 {
op,
vd,
vd_src,
imm,
vs2,
ref mask,
..
} => {
let vs2 = allocs.next(vs2);
let vd_src = allocs.next(vd_src);
let vd = allocs.next_writable(vd);
let mask = mask.with_allocs(&mut allocs);

debug_assert_eq!(vd.to_reg(), vd_src);

sink.put4(encode_valu_rrr_imm(op, vd, imm, vs2, mask));
}
&Inst::VecAluRRR {
op,
vd,
Expand Down Expand Up @@ -2854,7 +2887,7 @@ impl MachInstEmit for Inst {
let vd = allocs.next_writable(vd);
let mask = mask.with_allocs(&mut allocs);

sink.put4(encode_valu_imm(op, vd, imm, vs2, mask));
sink.put4(encode_valu_rr_imm(op, vd, imm, vs2, mask));
}
&Inst::VecAluRR {
op,
Expand Down
25 changes: 22 additions & 3 deletions cranelift/codegen/src/isa/riscv64/inst/encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
use super::{Imm12, Imm5, UImm5, VType};
use crate::isa::riscv64::inst::reg_to_gpr_num;
use crate::isa::riscv64::lower::isle::generated_code::{
VecAluOpRImm5, VecAluOpRR, VecAluOpRRImm5, VecAluOpRRR, VecElementWidth, VecOpCategory,
VecOpMasking,
VecAluOpRImm5, VecAluOpRR, VecAluOpRRImm5, VecAluOpRRR, VecAluOpRRRImm5, VecElementWidth,
VecOpCategory, VecOpMasking,
};
use crate::machinst::isle::WritableReg;
use crate::Reg;
Expand Down Expand Up @@ -127,7 +127,7 @@ pub fn encode_valu(
/// - funct6 (6 bits)
///
/// See: https://github.com/riscv/riscv-v-spec/blob/master/valu-format.adoc
pub fn encode_valu_imm(
pub fn encode_valu_rr_imm(
op: VecAluOpRRImm5,
vd: WritableReg,
imm: Imm5,
Expand All @@ -146,6 +146,25 @@ pub fn encode_valu_imm(
)
}

pub fn encode_valu_rrr_imm(
op: VecAluOpRRRImm5,
vd: WritableReg,
imm: Imm5,
vs2: Reg,
masking: VecOpMasking,
) -> u32 {
let funct7 = (op.funct6() << 1) | masking.encode();
let imm = imm.bits() as u32;
encode_r_type_bits(
op.opcode(),
reg_to_gpr_num(vd.to_reg()),
op.funct3(),
imm,
reg_to_gpr_num(vs2),
funct7,
)
}

pub fn encode_valu_rr(op: VecAluOpRR, vd: WritableReg, vs: Reg, masking: VecOpMasking) -> u32 {
let funct7 = (op.funct6() << 1) | masking.encode();

Expand Down
75 changes: 68 additions & 7 deletions cranelift/codegen/src/isa/riscv64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,17 @@ fn vec_mask_operands<F: Fn(VReg) -> VReg>(
VecOpMasking::Disabled => {}
}
}
fn vec_mask_late_operands<F: Fn(VReg) -> VReg>(
mask: &VecOpMasking,
collector: &mut OperandCollector<'_, F>,
) {
match mask {
VecOpMasking::Enabled { reg } => {
collector.reg_fixed_late_use(*reg, pv_reg(0).into());
}
VecOpMasking::Disabled => {}
}
}

fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCollector<'_, F>) {
match inst {
Expand Down Expand Up @@ -642,6 +653,32 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
// gen_prologue is called at emit stage.
// no need let reg alloc know.
}
&Inst::VecAluRRRImm5 {
op,
vd,
vd_src,
vs2,
ref mask,
..
} => {
debug_assert_eq!(vd_src.class(), RegClass::Vector);
debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);
debug_assert_eq!(vs2.class(), RegClass::Vector);

// If the operation forbids source/destination overlap we need to
// ensure that the source and destination registers are different.
if op.forbids_src_dst_overlaps() {
collector.reg_late_use(vs2);
collector.reg_use(vd_src);
collector.reg_reuse_def(vd, 1); // `vd` == `vd_src`.
vec_mask_late_operands(mask, collector);
} else {
collector.reg_use(vs2);
collector.reg_use(vd_src);
collector.reg_reuse_def(vd, 1); // `vd` == `vd_src`.
vec_mask_operands(mask, collector);
}
}
&Inst::VecAluRRR {
op,
vd,
Expand Down Expand Up @@ -1614,14 +1651,15 @@ impl Inst {
&MInst::Mov { rd, rm, ty } => {
let rd = format_reg(rd.to_reg(), allocs);
let rm = format_reg(rm, allocs);
let v = if ty == F32 {
"fmv.s"
} else if ty == F64 {
"fmv.d"
} else {
"mv"

let op = match ty {
F32 => "fmv.s",
F64 => "fmv.d",
ty if ty.is_vector() => "vmv1r.v",
_ => "mv",
};
format!("{} {},{}", v, rd, rm)

format!("{op} {rd},{rm}")
}
&MInst::MovFromPReg { rd, rm } => {
let rd = format_reg(rd.to_reg(), allocs);
Expand Down Expand Up @@ -1654,6 +1692,29 @@ impl Inst {
&MInst::Udf { trap_code } => format!("udf##trap_code={}", trap_code),
&MInst::EBreak {} => String::from("ebreak"),
&MInst::ECall {} => String::from("ecall"),
&Inst::VecAluRRRImm5 {
op,
vd,
imm,
vs2,
ref mask,
ref vstate,
..
} => {
let vs2_s = format_reg(vs2, allocs);
let vd_s = format_reg(vd.to_reg(), allocs);
let mask = format_mask(mask, allocs);

// Some opcodes interpret the immediate as unsigned, lets show the
// correct number here.
let imm_s = if op.imm_is_unsigned() {
format!("{}", imm.bits())
} else {
format!("{}", imm)
};

format!("{op} {vd_s},{vs2_s},{imm_s}{mask} {vstate}")
}
&Inst::VecAluRRR {
op,
vd,
Expand Down
Loading

0 comments on commit 62019b2

Please sign in to comment.