Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

riscv64: Implement iadd_pairwise #6568

Merged
merged 4 commits into from
Jun 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -248,12 +248,9 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
"simd_f64x2_rounding",
"simd_i16x8_arith2",
"simd_i16x8_cmp",
"simd_i16x8_extadd_pairwise_i8x16",
"simd_i16x8_q15mulr_sat_s",
"simd_i32x4_arith2",
"simd_i32x4_cmp",
"simd_i32x4_dot_i16x8",
"simd_i32x4_extadd_pairwise_i16x8",
"simd_i32x4_trunc_sat_f32x4",
"simd_i32x4_trunc_sat_f64x2",
"simd_i64x2_arith2",
Expand Down
9 changes: 9 additions & 0 deletions cranelift/codegen/src/isa/riscv64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,15 @@
(probe_count u32)
(tmp WritableReg))

(VecAluRRRImm5
(op VecAluOpRRRImm5)
(vd WritableReg)
(vd_src Reg)
(vs2 Reg)
(imm Imm5)
(mask VecOpMasking)
(vstate VState))

(VecAluRRR
(op VecAluOpRRR)
(vd WritableReg)
Expand Down
89 changes: 61 additions & 28 deletions cranelift/codegen/src/isa/riscv64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -463,13 +463,18 @@ impl Inst {
| Inst::Cltz { .. }
| Inst::Brev8 { .. }
| Inst::StackProbeLoop { .. } => None,

// VecSetState does not expect any vstate, rather it updates it.
Inst::VecSetState { .. } => None,

// `vmv` instructions copy a set of registers and ignore vstate.
Inst::VecAluRRImm5 { op: VecAluOpRRImm5::VmvrV, .. } => None,

Inst::VecAluRR { vstate, .. } |
Inst::VecAluRRR { vstate, .. } |
Inst::VecAluRImm5 { vstate, .. } |
Inst::VecAluRRImm5 { vstate, .. } |
Inst::VecAluRRRImm5 { vstate, .. } |
// TODO: Unit-stride loads and stores only need the AVL to be correct, not
// the full vtype. A future optimization could be to decouple these two when
// updating vstate. This would allow us to avoid emitting a VecSetState in
Expand Down Expand Up @@ -936,34 +941,44 @@ impl MachInstEmit for Inst {
}

&Inst::Mov { rd, rm, ty } => {
debug_assert_ne!(rd.to_reg().class(), RegClass::Vector);
debug_assert_ne!(rm.class(), RegClass::Vector);
if rd.to_reg() != rm {
let rm = allocs.next(rm);
let rd = allocs.next_writable(rd);
if ty.is_float() {
Inst::FpuRRR {
alu_op: if ty == F32 {
FpuOPRRR::FsgnjS
} else {
FpuOPRRR::FsgnjD
},
frm: None,
rd: rd,
rs1: rm,
rs2: rm,
}
.emit(&[], sink, emit_info, state);
} else {
let x = Inst::AluRRImm12 {
alu_op: AluOPRRI::Ori,
rd: rd,
rs: rm,
imm12: Imm12::zero(),
};
x.emit(&[], sink, emit_info, state);
}
debug_assert_eq!(rd.to_reg().class(), rm.class());
if rd.to_reg() == rm {
return;
}

let rm = allocs.next(rm);
let rd = allocs.next_writable(rd);

match rm.class() {
RegClass::Int => Inst::AluRRImm12 {
alu_op: AluOPRRI::Ori,
rd: rd,
rs: rm,
imm12: Imm12::zero(),
},
RegClass::Float => Inst::FpuRRR {
alu_op: if ty == F32 {
FpuOPRRR::FsgnjS
} else {
FpuOPRRR::FsgnjD
},
frm: None,
rd: rd,
rs1: rm,
rs2: rm,
},
RegClass::Vector => Inst::VecAluRRImm5 {
op: VecAluOpRRImm5::VmvrV,
vd: rd,
vs2: rm,
// Imm 0 means copy 1 register.
imm: Imm5::maybe_from_i8(0).unwrap(),
mask: VecOpMasking::Disabled,
// Vstate for this instruction is ignored.
vstate: VState::from_type(ty),
},
}
.emit(&[], sink, emit_info, state);
}

&Inst::MovFromPReg { rd, rm } => {
Expand Down Expand Up @@ -2812,6 +2827,24 @@ impl MachInstEmit for Inst {
.emit(&[], sink, emit_info, state);
sink.bind_label(label_done, &mut state.ctrl_plane);
}
&Inst::VecAluRRRImm5 {
op,
vd,
vd_src,
imm,
vs2,
ref mask,
..
} => {
let vs2 = allocs.next(vs2);
let vd_src = allocs.next(vd_src);
let vd = allocs.next_writable(vd);
let mask = mask.with_allocs(&mut allocs);

debug_assert_eq!(vd.to_reg(), vd_src);

sink.put4(encode_valu_rrr_imm(op, vd, imm, vs2, mask));
}
&Inst::VecAluRRR {
op,
vd,
Expand Down Expand Up @@ -2839,7 +2872,7 @@ impl MachInstEmit for Inst {
let vd = allocs.next_writable(vd);
let mask = mask.with_allocs(&mut allocs);

sink.put4(encode_valu_imm(op, vd, imm, vs2, mask));
sink.put4(encode_valu_rr_imm(op, vd, imm, vs2, mask));
}
&Inst::VecAluRR {
op,
Expand Down
25 changes: 22 additions & 3 deletions cranelift/codegen/src/isa/riscv64/inst/encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
use super::{Imm12, Imm5, UImm5, VType};
use crate::isa::riscv64::inst::reg_to_gpr_num;
use crate::isa::riscv64::lower::isle::generated_code::{
VecAluOpRImm5, VecAluOpRR, VecAluOpRRImm5, VecAluOpRRR, VecElementWidth, VecOpCategory,
VecOpMasking,
VecAluOpRImm5, VecAluOpRR, VecAluOpRRImm5, VecAluOpRRR, VecAluOpRRRImm5, VecElementWidth,
VecOpCategory, VecOpMasking,
};
use crate::machinst::isle::WritableReg;
use crate::Reg;
Expand Down Expand Up @@ -127,7 +127,7 @@ pub fn encode_valu(
/// - funct6 (6 bits)
///
/// See: https://github.com/riscv/riscv-v-spec/blob/master/valu-format.adoc
pub fn encode_valu_imm(
pub fn encode_valu_rr_imm(
op: VecAluOpRRImm5,
vd: WritableReg,
imm: Imm5,
Expand All @@ -146,6 +146,25 @@ pub fn encode_valu_imm(
)
}

pub fn encode_valu_rrr_imm(
op: VecAluOpRRRImm5,
vd: WritableReg,
imm: Imm5,
vs2: Reg,
masking: VecOpMasking,
) -> u32 {
let funct7 = (op.funct6() << 1) | masking.encode();
let imm = imm.bits() as u32;
encode_r_type_bits(
op.opcode(),
reg_to_gpr_num(vd.to_reg()),
op.funct3(),
imm,
reg_to_gpr_num(vs2),
funct7,
)
}

pub fn encode_valu_rr(op: VecAluOpRR, vd: WritableReg, vs: Reg, masking: VecOpMasking) -> u32 {
let funct7 = (op.funct6() << 1) | masking.encode();

Expand Down
75 changes: 68 additions & 7 deletions cranelift/codegen/src/isa/riscv64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,17 @@ fn vec_mask_operands<F: Fn(VReg) -> VReg>(
VecOpMasking::Disabled => {}
}
}
fn vec_mask_late_operands<F: Fn(VReg) -> VReg>(
mask: &VecOpMasking,
collector: &mut OperandCollector<'_, F>,
) {
match mask {
VecOpMasking::Enabled { reg } => {
collector.reg_fixed_late_use(*reg, pv_reg(0).into());
}
VecOpMasking::Disabled => {}
}
}

fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCollector<'_, F>) {
match inst {
Expand Down Expand Up @@ -640,6 +651,32 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
// gen_prologue is called at emit stage.
// no need let reg alloc know.
}
&Inst::VecAluRRRImm5 {
op,
vd,
vd_src,
vs2,
ref mask,
..
} => {
debug_assert_eq!(vd_src.class(), RegClass::Vector);
debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);
debug_assert_eq!(vs2.class(), RegClass::Vector);

// If the operation forbids source/destination overlap we need to
// ensure that the source and destination registers are different.
if op.forbids_src_dst_overlaps() {
collector.reg_late_use(vs2);
collector.reg_use(vd_src);
collector.reg_reuse_def(vd, 1); // `vd` == `vd_src`.
vec_mask_late_operands(mask, collector);
} else {
collector.reg_use(vs2);
collector.reg_use(vd_src);
collector.reg_reuse_def(vd, 1); // `vd` == `vd_src`.
vec_mask_operands(mask, collector);
}
}
&Inst::VecAluRRR {
op,
vd,
Expand Down Expand Up @@ -1612,14 +1649,15 @@ impl Inst {
&MInst::Mov { rd, rm, ty } => {
let rd = format_reg(rd.to_reg(), allocs);
let rm = format_reg(rm, allocs);
let v = if ty == F32 {
"fmv.s"
} else if ty == F64 {
"fmv.d"
} else {
"mv"

let op = match ty {
F32 => "fmv.s",
F64 => "fmv.d",
ty if ty.is_vector() => "vmv1r.v",
_ => "mv",
};
format!("{} {},{}", v, rd, rm)

format!("{op} {rd},{rm}")
}
&MInst::MovFromPReg { rd, rm } => {
let rd = format_reg(rd.to_reg(), allocs);
Expand Down Expand Up @@ -1652,6 +1690,29 @@ impl Inst {
&MInst::Udf { trap_code } => format!("udf##trap_code={}", trap_code),
&MInst::EBreak {} => String::from("ebreak"),
&MInst::ECall {} => String::from("ecall"),
&Inst::VecAluRRRImm5 {
op,
vd,
imm,
vs2,
ref mask,
ref vstate,
..
} => {
let vs2_s = format_reg(vs2, allocs);
let vd_s = format_reg(vd.to_reg(), allocs);
let mask = format_mask(mask, allocs);

// Some opcodes interpret the immediate as unsigned, lets show the
// correct number here.
let imm_s = if op.imm_is_unsigned() {
format!("{}", imm.bits())
} else {
format!("{}", imm)
};

format!("{op} {vd_s},{vs2_s},{imm_s}{mask} {vstate}")
}
&Inst::VecAluRRR {
op,
vd,
Expand Down
Loading