From 867551a6f9aa8c91f44cb9ccc006ca24258f332a Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Sun, 23 Apr 2023 23:01:45 +0100 Subject: [PATCH 01/14] riscv64: Use `LoadAddr` on `Load`/`Store` --- .../codegen/src/isa/riscv64/inst/emit.rs | 100 +++++++----------- 1 file changed, 41 insertions(+), 59 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index ac5d72cc9d4c..e8d319c08db7 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -664,80 +664,62 @@ impl MachInstEmit for Inst { from, flags, } => { - let x; let base = from.get_base_register(); let base = allocs.next(base); let rd = allocs.next_writable(rd); let offset = from.get_offset_with_state(state); - if let Some(imm12) = Imm12::maybe_from_u64(offset as u64) { - let srcloc = state.cur_srcloc(); - if !srcloc.is_default() && !flags.notrap() { - // Register the offset at which the actual load instruction starts. - sink.add_trap(TrapCode::HeapOutOfBounds); - } - x = op.op_code() - | reg_to_gpr_num(rd.to_reg()) << 7 - | op.funct3() << 12 - | reg_to_gpr_num(base) << 15 - | (imm12.as_u32()) << 20; - sink.put4(x); + + let (addr, imm12) = if let Some(imm12) = Imm12::maybe_from_u64(offset as u64) { + // If the offset fits into an imm12 we can directly encode it. + (base, imm12) } else { + // Otherwise load the address it into a reg and load from it. let tmp = writable_spilltmp_reg(); - let mut insts = - LoadConstant::U64(offset as u64).load_constant_and_add(tmp, base); - let srcloc = state.cur_srcloc(); - if !srcloc.is_default() && !flags.notrap() { - // Register the offset at which the actual load instruction starts. - sink.add_trap(TrapCode::HeapOutOfBounds); - } - insts.push(Inst::Load { - op, - from: AMode::RegOffset(tmp.to_reg(), 0, I64), - rd, - flags, - }); - insts - .into_iter() - .for_each(|inst| inst.emit(&[], sink, emit_info, state)); + Inst::LoadAddr { rd: tmp, mem: from }.emit(&[], sink, emit_info, state); + (tmp.to_reg(), Imm12::zero()) + }; + + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() && !flags.notrap() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(TrapCode::HeapOutOfBounds); } + sink.put4( + op.op_code() + | reg_to_gpr_num(rd.to_reg()) << 7 + | op.funct3() << 12 + | reg_to_gpr_num(addr) << 15 + | (imm12.as_u32()) << 20, + ); } &Inst::Store { op, src, flags, to } => { let base = allocs.next(to.get_base_register()); let src = allocs.next(src); let offset = to.get_offset_with_state(state); - let x; - if let Some(imm12) = Imm12::maybe_from_u64(offset as u64) { - let srcloc = state.cur_srcloc(); - if !srcloc.is_default() && !flags.notrap() { - // Register the offset at which the actual load instruction starts. - sink.add_trap(TrapCode::HeapOutOfBounds); - } - x = op.op_code() - | (imm12.as_u32() & 0x1f) << 7 - | op.funct3() << 12 - | reg_to_gpr_num(base) << 15 - | reg_to_gpr_num(src) << 20 - | (imm12.as_u32() >> 5) << 25; - sink.put4(x); + + let (addr, imm12) = if let Some(imm12) = Imm12::maybe_from_u64(offset as u64) { + // If the offset fits into an imm12 we can directly encode it. + (base, imm12) } else { + // Otherwise load the address it into a reg and load from it. let tmp = writable_spilltmp_reg(); - let mut insts = - LoadConstant::U64(offset as u64).load_constant_and_add(tmp, base); - let srcloc = state.cur_srcloc(); - if !srcloc.is_default() && !flags.notrap() { - // Register the offset at which the actual load instruction starts. - sink.add_trap(TrapCode::HeapOutOfBounds); - } - insts.push(Inst::Store { - op, - to: AMode::RegOffset(tmp.to_reg(), 0, I64), - flags, - src, - }); - insts - .into_iter() - .for_each(|inst| inst.emit(&[], sink, emit_info, state)); + Inst::LoadAddr { rd: tmp, mem: to }.emit(&[], sink, emit_info, state); + (tmp.to_reg(), Imm12::zero()) + }; + + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() && !flags.notrap() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(TrapCode::HeapOutOfBounds); } + sink.put4( + op.op_code() + | (imm12.as_u32() & 0x1f) << 7 + | op.funct3() << 12 + | reg_to_gpr_num(addr) << 15 + | reg_to_gpr_num(src) << 20 + | (imm12.as_u32() >> 5) << 25, + ); } &Inst::Args { .. } => { // Nothing: this is a pseudoinstruction that serves From 9b9268dea789893b7119e7218773c388eb234fbc Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Sun, 23 Apr 2023 23:04:17 +0100 Subject: [PATCH 02/14] riscv64: Add I Type encoding --- cranelift/codegen/src/isa/riscv64/inst/emit.rs | 15 ++++++++------- .../codegen/src/isa/riscv64/inst/encode.rs | 17 ++++++++++++++++- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index e8d319c08db7..2b79c16b9684 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -684,13 +684,14 @@ impl MachInstEmit for Inst { // Register the offset at which the actual load instruction starts. sink.add_trap(TrapCode::HeapOutOfBounds); } - sink.put4( - op.op_code() - | reg_to_gpr_num(rd.to_reg()) << 7 - | op.funct3() << 12 - | reg_to_gpr_num(addr) << 15 - | (imm12.as_u32()) << 20, - ); + + sink.put4(encode_i_type( + op.op_code(), + rd.to_reg(), + op.funct3(), + addr, + imm12, + )); } &Inst::Store { op, src, flags, to } => { let base = allocs.next(to.get_base_register()); diff --git a/cranelift/codegen/src/isa/riscv64/inst/encode.rs b/cranelift/codegen/src/isa/riscv64/inst/encode.rs index 69ddbafb9d4b..51a5edefbb6c 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/encode.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/encode.rs @@ -6,7 +6,7 @@ //! Some instructions especially in extensions have slight variations from //! the base RISC-V specification. -use super::{Imm5, UImm5, VType}; +use super::{Imm12, Imm5, UImm5, VType}; use crate::isa::riscv64::inst::reg_to_gpr_num; use crate::isa::riscv64::lower::isle::generated_code::{ VecAluOpRRImm5, VecAluOpRRR, VecElementWidth, VecOpCategory, VecOpMasking, @@ -52,6 +52,21 @@ pub fn encode_r_type( ) } +/// Encode an I-type instruction. +/// +/// Layout: +/// 0-------6-7-------11-12------14-15------19-20------------------31 +/// | Opcode | rd | width | rs1 | Offset[11:0] | +pub fn encode_i_type(opcode: u32, rd: Reg, width: u32, rs1: Reg, offset: Imm12) -> u32 { + let mut bits = 0; + bits |= opcode & 0b1111111; + bits |= reg_to_gpr_num(rd) << 7; + bits |= (width & 0b111) << 12; + bits |= reg_to_gpr_num(rs1) << 15; + bits |= (offset.as_u32() & 0b1111_1111_1111) << 20; + bits +} + /// Encodes a Vector ALU instruction. /// /// Fields: From e420731351ede8f724c37b6f30fbf71a33fcc195 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Sun, 23 Apr 2023 23:07:04 +0100 Subject: [PATCH 03/14] riscv64: Add S Type encoding --- cranelift/codegen/src/isa/riscv64/inst/emit.rs | 10 ++-------- cranelift/codegen/src/isa/riscv64/inst/encode.rs | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index 2b79c16b9684..6f017a7c859b 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -713,14 +713,8 @@ impl MachInstEmit for Inst { // Register the offset at which the actual load instruction starts. sink.add_trap(TrapCode::HeapOutOfBounds); } - sink.put4( - op.op_code() - | (imm12.as_u32() & 0x1f) << 7 - | op.funct3() << 12 - | reg_to_gpr_num(addr) << 15 - | reg_to_gpr_num(src) << 20 - | (imm12.as_u32() >> 5) << 25, - ); + + sink.put4(encode_s_type(op.op_code(), op.funct3(), addr, src, imm12)); } &Inst::Args { .. } => { // Nothing: this is a pseudoinstruction that serves diff --git a/cranelift/codegen/src/isa/riscv64/inst/encode.rs b/cranelift/codegen/src/isa/riscv64/inst/encode.rs index 51a5edefbb6c..693518d7495d 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/encode.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/encode.rs @@ -67,6 +67,22 @@ pub fn encode_i_type(opcode: u32, rd: Reg, width: u32, rs1: Reg, offset: Imm12) bits } +/// Encode an S-type instruction. +/// +/// Layout: +/// 0-------6-7-------11-12------14-15------19-20---24-25-------------31 +/// | Opcode | imm[4:0] | width | base | src | imm[11:5] | +pub fn encode_s_type(opcode: u32, width: u32, base: Reg, src: Reg, offset: Imm12) -> u32 { + let mut bits = 0; + bits |= opcode & 0b1111111; + bits |= (offset.as_u32() & 0b11111) << 7; + bits |= (width & 0b111) << 12; + bits |= reg_to_gpr_num(base) << 15; + bits |= reg_to_gpr_num(src) << 20; + bits |= ((offset.as_u32() >> 5) & 0b1111111) << 25; + bits +} + /// Encodes a Vector ALU instruction. /// /// Fields: From ac5fbd3ccd2a59e0968b7561ca032bd692c3711d Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Sun, 23 Apr 2023 23:40:45 +0100 Subject: [PATCH 04/14] riscv64: Use `LoadAddr` on `VecLoad`/`VecStore` --- .../codegen/src/isa/riscv64/inst/args.rs | 8 + .../codegen/src/isa/riscv64/inst/emit.rs | 55 ++- .../codegen/src/isa/riscv64/inst/vector.rs | 9 + .../filetests/isa/riscv64/simd-abi.clif | 407 +++--------------- .../filetests/isa/riscv64/simd-loads.clif | 32 +- .../filetests/isa/riscv64/simd-stores.clif | 32 +- 6 files changed, 115 insertions(+), 428 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/inst/args.rs b/cranelift/codegen/src/isa/riscv64/inst/args.rs index 89e980bd0256..585e1f2bfc11 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/args.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/args.rs @@ -42,6 +42,14 @@ impl AMode { AMode::RegOffset(reg, imm, ty) } + pub(crate) fn with_allocs(self, allocs: &mut AllocationConsumer<'_>) -> Self { + let reg = allocs.next(self.get_base_register()); + match self { + AMode::RegOffset(_, offset, ty) => AMode::RegOffset(reg, offset, ty), + AMode::SPOffset(..) | AMode::FPOffset(..) | AMode::NominalSPOffset(..) => self, + } + } + pub(crate) fn get_base_register(&self) -> Reg { match self { &AMode::RegOffset(reg, ..) => reg, diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index 6f017a7c859b..c524ed277a18 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -664,10 +664,10 @@ impl MachInstEmit for Inst { from, flags, } => { + let from = from.clone().with_allocs(&mut allocs); let base = from.get_base_register(); - let base = allocs.next(base); - let rd = allocs.next_writable(rd); let offset = from.get_offset_with_state(state); + let rd = allocs.next_writable(rd); let (addr, imm12) = if let Some(imm12) = Imm12::maybe_from_u64(offset as u64) { // If the offset fits into an imm12 we can directly encode it. @@ -694,9 +694,10 @@ impl MachInstEmit for Inst { )); } &Inst::Store { op, src, flags, to } => { - let base = allocs.next(to.get_base_register()); - let src = allocs.next(src); + let to = to.clone().with_allocs(&mut allocs); + let base = to.get_base_register(); let offset = to.get_offset_with_state(state); + let src = allocs.next(src); let (addr, imm12) = if let Some(imm12) = Imm12::maybe_from_u64(offset as u64) { // If the offset fits into an imm12 we can directly encode it. @@ -1178,10 +1179,10 @@ impl MachInstEmit for Inst { } .emit(&[], sink, emit_info, state); } else { - let insts = LoadConstant::U64(offset as u64).load_constant_and_add(rd, base); - insts + LoadConstant::U64(offset as u64) + .load_constant_and_add(rd, base) .into_iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); + .for_each(|inst| inst.emit(&[], sink, emit_info, state)); } } @@ -2798,16 +2799,20 @@ impl MachInstEmit for Inst { flags, .. } => { - let offset = from.get_offset_with_state(state); - let from_reg = allocs.next(from.get_base_register()); + let from = from.clone().with_allocs(&mut allocs); let to = allocs.next_writable(to); // Vector Loads don't support immediate offsets, so we need to load it into a register. - let addr = writable_spilltmp_reg(); - LoadConstant::U64(offset as u64) - .load_constant_and_add(addr, from_reg) - .into_iter() - .for_each(|inst| inst.emit(&[], sink, emit_info, state)); + let addr = match from { + VecAMode::UnitStride { base } if base.get_offset_with_state(state) == 0 => { + base.get_base_register() + } + VecAMode::UnitStride { base } => { + let tmp = writable_spilltmp_reg(); + Inst::LoadAddr { rd: tmp, mem: base }.emit(&[], sink, emit_info, state); + tmp.to_reg() + } + }; let srcloc = state.cur_srcloc(); if !srcloc.is_default() && !flags.notrap() { @@ -2819,7 +2824,7 @@ impl MachInstEmit for Inst { 0x07, to.to_reg(), eew, - addr.to_reg(), + addr, from.lumop(), // We don't implement masking yet. VecOpMasking::Disabled, @@ -2835,16 +2840,20 @@ impl MachInstEmit for Inst { flags, .. } => { - let offset = to.get_offset_with_state(state); - let to_reg = allocs.next(to.get_base_register()); + let to = to.clone().with_allocs(&mut allocs); let from = allocs.next(from); // Vector Stores don't support immediate offsets, so we need to load it into a register. - let addr = writable_spilltmp_reg(); - LoadConstant::U64(offset as u64) - .load_constant_and_add(addr, to_reg) - .into_iter() - .for_each(|inst| inst.emit(&[], sink, emit_info, state)); + let addr = match to { + VecAMode::UnitStride { base } if base.get_offset_with_state(state) == 0 => { + base.get_base_register() + } + VecAMode::UnitStride { base } => { + let tmp = writable_spilltmp_reg(); + Inst::LoadAddr { rd: tmp, mem: base }.emit(&[], sink, emit_info, state); + tmp.to_reg() + } + }; let srcloc = state.cur_srcloc(); if !srcloc.is_default() && !flags.notrap() { @@ -2856,7 +2865,7 @@ impl MachInstEmit for Inst { 0x27, from, eew, - addr.to_reg(), + addr, to.sumop(), // We don't implement masking yet. VecOpMasking::Disabled, diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs index 596000246f40..4b7330ab8d5e 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs @@ -1,3 +1,4 @@ +use crate::isa::riscv64::inst::AllocationConsumer; use crate::isa::riscv64::inst::EmitState; use crate::isa::riscv64::lower::isle::generated_code::{ VecAMode, VecAluOpRRImm5, VecAluOpRRR, VecAvl, VecElementWidth, VecLmul, VecMaskMode, @@ -310,6 +311,14 @@ impl VecAMode { } } + pub(crate) fn with_allocs(self, allocs: &mut AllocationConsumer<'_>) -> Self { + match self { + VecAMode::UnitStride { base } => VecAMode::UnitStride { + base: base.with_allocs(allocs), + }, + } + } + pub(crate) fn get_offset_with_state(&self, state: &EmitState) -> i64 { match self { VecAMode::UnitStride { base, .. } => base.get_offset_with_state(state), diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-abi.clif b/cranelift/filetests/filetests/isa/riscv64/simd-abi.clif index 787d8b0b6ade..daf0eda889ff 100644 --- a/cranelift/filetests/filetests/isa/riscv64/simd-abi.clif +++ b/cranelift/filetests/filetests/isa/riscv64/simd-abi.clif @@ -151,413 +151,122 @@ block0( ; fsd fa0, 0(sp) ; fsd fa1, 8(sp) ; .byte 0x57, 0x70, 0x08, 0xcc -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x10, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, s0 +; addi t6, s0, 0x10 ; .byte 0x07, 0x8e, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x20, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, s0 +; addi t6, s0, 0x20 ; .byte 0x87, 0x8e, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x30, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, s0 +; addi t6, s0, 0x30 ; .byte 0x07, 0x8f, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x40, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, s0 +; addi t6, s0, 0x40 ; .byte 0x87, 0x8f, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x50, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, s0 +; addi t6, s0, 0x50 ; .byte 0x07, 0x80, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x60, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, s0 +; addi t6, s0, 0x60 ; .byte 0x87, 0x80, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x70, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, s0 +; addi t6, s0, 0x70 ; .byte 0x07, 0x81, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x80, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, s0 +; addi t6, s0, 0x80 ; .byte 0x87, 0x81, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x90, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, s0 +; addi t6, s0, 0x90 ; .byte 0x87, 0x82, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0xa0, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, s0 +; addi t6, s0, 0xa0 ; .byte 0x87, 0x83, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0xb0, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, s0 +; addi t6, s0, 0xb0 ; .byte 0x07, 0x82, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0xc0, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, s0 +; addi t6, s0, 0xc0 ; .byte 0x07, 0x83, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0xd0, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, s0 +; addi t6, s0, 0xd0 ; .byte 0x87, 0x8c, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0xe0, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, s0 +; addi t6, s0, 0xe0 ; .byte 0x87, 0x8d, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0xf0, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, s0 +; addi t6, s0, 0xf0 ; .byte 0x87, 0x84, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x00, 0x01, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, s0 +; addi t6, s0, 0x100 ; .byte 0x87, 0x89, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x10, 0x01, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, s0 +; addi t6, s0, 0x110 ; .byte 0x87, 0x8a, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x20, 0x01, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, s0 +; addi t6, s0, 0x120 ; .byte 0x87, 0x8b, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x30, 0x01, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, s0 +; addi t6, s0, 0x130 ; .byte 0x07, 0x8d, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x40, 0x01, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, s0 +; addi t6, s0, 0x140 ; .byte 0x07, 0x84, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x50, 0x01, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, s0 +; addi t6, s0, 0x150 ; .byte 0x07, 0x89, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x60, 0x01, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, s0 +; addi t6, s0, 0x160 ; .byte 0x07, 0x8a, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x70, 0x01, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, s0 +; addi t6, s0, 0x170 ; .byte 0x07, 0x8b, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x80, 0x01, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, s0 +; addi t6, s0, 0x180 ; .byte 0x07, 0x8c, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x90, 0x01, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, s0 +; addi t6, s0, 0x190 ; .byte 0x87, 0x85, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0xa0, 0x01, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, s0 +; addi t6, s0, 0x1a0 ; .byte 0x07, 0x85, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x00, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 -; .byte 0x27, 0x8c, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x10, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; .byte 0x27, 0x0c, 0x05, 0x02 +; addi t6, a0, 0x10 ; .byte 0x27, 0x8b, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x20, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0x20 ; .byte 0x27, 0x8a, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x30, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0x30 ; .byte 0x27, 0x89, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x40, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0x40 ; .byte 0x27, 0x84, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x50, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0x50 ; .byte 0x27, 0x8d, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x60, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0x60 ; .byte 0xa7, 0x8b, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x70, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0x70 ; .byte 0xa7, 0x8a, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x80, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0x80 ; .byte 0xa7, 0x89, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x90, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0x90 ; .byte 0xa7, 0x84, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0xa0, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0xa0 ; .byte 0xa7, 0x8d, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0xb0, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0xb0 ; .byte 0xa7, 0x8c, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0xc0, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0xc0 ; .byte 0x27, 0x83, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0xd0, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0xd0 ; .byte 0x27, 0x82, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0xe0, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0xe0 ; .byte 0xa7, 0x83, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0xf0, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0xf0 ; .byte 0xa7, 0x82, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x00, 0x01, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0x100 ; .byte 0xa7, 0x81, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x10, 0x01, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0x110 ; .byte 0x27, 0x81, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x20, 0x01, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0x120 ; .byte 0xa7, 0x80, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x30, 0x01, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0x130 ; .byte 0x27, 0x80, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x40, 0x01, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0x140 ; .byte 0xa7, 0x8f, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x50, 0x01, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0x150 ; .byte 0x27, 0x8f, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x60, 0x01, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0x160 ; .byte 0xa7, 0x8e, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x70, 0x01, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0x170 ; .byte 0x27, 0x8e, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x80, 0x01, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0x180 ; .byte 0xa7, 0x88, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x90, 0x01, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0x190 ; .byte 0x27, 0x88, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0xa0, 0x01, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0x1a0 ; .byte 0xa7, 0x87, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0xb0, 0x01, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0x1b0 ; .byte 0x27, 0x87, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0xc0, 0x01, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0x1c0 ; .byte 0xa7, 0x86, 0x0f, 0x02 -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0xd0, 0x01, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0x1d0 ; .byte 0x27, 0x86, 0x0f, 0x02 ; fld fa4, 8(sp) -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0xe0, 0x01, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0x1e0 ; .byte 0x27, 0x87, 0x0f, 0x02 ; fld fa7, 0(sp) -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0xf0, 0x01, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 +; addi t6, a0, 0x1f0 ; .byte 0xa7, 0x88, 0x0f, 0x02 ; addi sp, sp, 0x70 ; fld fs0, -8(sp) diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-loads.clif b/cranelift/filetests/filetests/isa/riscv64/simd-loads.clif index c0a8e8551267..54988d46260c 100644 --- a/cranelift/filetests/filetests/isa/riscv64/simd-loads.clif +++ b/cranelift/filetests/filetests/isa/riscv64/simd-loads.clif @@ -17,13 +17,7 @@ block0(v0: i64): ; Disassembled: ; block0: ; offset 0x0 ; .byte 0x57, 0x70, 0x08, 0xcc -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x00, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 -; .byte 0x07, 0x85, 0x0f, 0x02 +; .byte 0x07, 0x05, 0x05, 0x02 ; ret function %load_i16x8(i64) -> i16x8 { @@ -40,13 +34,7 @@ block0(v0: i64): ; Disassembled: ; block0: ; offset 0x0 ; .byte 0x57, 0x70, 0x84, 0xcc -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x00, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 -; .byte 0x07, 0xd5, 0x0f, 0x02 +; .byte 0x07, 0x55, 0x05, 0x02 ; ret function %load_i32x4(i64) -> i32x4 { @@ -63,13 +51,7 @@ block0(v0: i64): ; Disassembled: ; block0: ; offset 0x0 ; .byte 0x57, 0x70, 0x02, 0xcd -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x00, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 -; .byte 0x07, 0xe5, 0x0f, 0x02 +; .byte 0x07, 0x65, 0x05, 0x02 ; ret function %load_i64x2(i64) -> i64x2 { @@ -86,12 +68,6 @@ block0(v0: i64): ; Disassembled: ; block0: ; offset 0x0 ; .byte 0x57, 0x70, 0x81, 0xcd -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x00, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 -; .byte 0x07, 0xf5, 0x0f, 0x02 +; .byte 0x07, 0x75, 0x05, 0x02 ; ret diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-stores.clif b/cranelift/filetests/filetests/isa/riscv64/simd-stores.clif index f969243c1103..4c93773d290e 100644 --- a/cranelift/filetests/filetests/isa/riscv64/simd-stores.clif +++ b/cranelift/filetests/filetests/isa/riscv64/simd-stores.clif @@ -17,13 +17,7 @@ block0(v0: i64, v1: i8x16): ; Disassembled: ; block0: ; offset 0x0 ; .byte 0x57, 0x70, 0x08, 0xcc -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x00, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 -; .byte 0x27, 0x85, 0x0f, 0x02 +; .byte 0x27, 0x05, 0x05, 0x02 ; ret function %store_i16x8(i64, i16x8) { @@ -40,13 +34,7 @@ block0(v0: i64, v1: i16x8): ; Disassembled: ; block0: ; offset 0x0 ; .byte 0x57, 0x70, 0x84, 0xcc -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x00, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 -; .byte 0x27, 0xd5, 0x0f, 0x02 +; .byte 0x27, 0x55, 0x05, 0x02 ; ret function %store_i32x4(i64, i32x4) { @@ -63,13 +51,7 @@ block0(v0: i64, v1: i32x4): ; Disassembled: ; block0: ; offset 0x0 ; .byte 0x57, 0x70, 0x02, 0xcd -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x00, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 -; .byte 0x27, 0xe5, 0x0f, 0x02 +; .byte 0x27, 0x65, 0x05, 0x02 ; ret function %store_i64x2(i64, i64x2) { @@ -86,12 +68,6 @@ block0(v0: i64, v1: i64x2): ; Disassembled: ; block0: ; offset 0x0 ; .byte 0x57, 0x70, 0x81, 0xcd -; auipc t6, 0 -; ld t6, 0xc(t6) -; j 0xc -; .byte 0x00, 0x00, 0x00, 0x00 -; .byte 0x00, 0x00, 0x00, 0x00 -; add t6, t6, a0 -; .byte 0x27, 0xf5, 0x0f, 0x02 +; .byte 0x27, 0x75, 0x05, 0x02 ; ret From 16687fa1f76664908004621971930e5e41f1c543 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Mon, 24 Apr 2023 13:52:32 +0100 Subject: [PATCH 05/14] riscv64: Add Const/Lable AModes --- cranelift/codegen/src/isa/riscv64/inst.isle | 4 + .../codegen/src/isa/riscv64/inst/args.rs | 57 +++++---- .../codegen/src/isa/riscv64/inst/emit.rs | 114 ++++++++++++------ cranelift/codegen/src/isa/riscv64/inst/mod.rs | 22 +++- .../codegen/src/isa/riscv64/inst/vector.rs | 2 +- cranelift/codegen/src/isa/riscv64/lower.isle | 9 ++ .../codegen/src/isa/riscv64/lower/isle.rs | 6 + .../filetests/isa/riscv64/reftypes.clif | 4 +- .../filetests/isa/riscv64/stack.clif | 24 ++-- 9 files changed, 157 insertions(+), 85 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle index c3453661e5fb..e3019008a6c8 100644 --- a/cranelift/codegen/src/isa/riscv64/inst.isle +++ b/cranelift/codegen/src/isa/riscv64/inst.isle @@ -1988,6 +1988,10 @@ (decl gen_amode (Reg Offset32 Type) AMode) (extern constructor gen_amode gen_amode) +;; Generates a AMode that points to a constant in the constant pool. +(decl gen_const_amode (VCodeConstant) AMode) +(extern constructor gen_const_amode gen_const_amode) + (decl offset32_imm (i32) Offset32) (extern constructor offset32_imm offset32_imm) diff --git a/cranelift/codegen/src/isa/riscv64/inst/args.rs b/cranelift/codegen/src/isa/riscv64/inst/args.rs index 585e1f2bfc11..5b8309a8eac2 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/args.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/args.rs @@ -35,6 +35,13 @@ pub enum AMode { /// clobber pushes. See the diagram in the documentation for /// [crate::isa::riscv64::abi](the ABI module) for more details. NominalSPOffset(i64, Type), + + /// A reference to a constant which is placed outside of the function's + /// body, typically at the end. + Const(VCodeConstant), + + /// A reference to a label. + Label(MachLabel), } impl AMode { @@ -43,19 +50,23 @@ impl AMode { } pub(crate) fn with_allocs(self, allocs: &mut AllocationConsumer<'_>) -> Self { - let reg = allocs.next(self.get_base_register()); match self { - AMode::RegOffset(_, offset, ty) => AMode::RegOffset(reg, offset, ty), - AMode::SPOffset(..) | AMode::FPOffset(..) | AMode::NominalSPOffset(..) => self, + AMode::RegOffset(reg, offset, ty) => AMode::RegOffset(allocs.next(reg), offset, ty), + AMode::SPOffset(..) | AMode::FPOffset(..) | AMode::NominalSPOffset(..) => { + allocs.next(self.get_base_register().unwrap()); + self + } + AMode::Const(..) | AMode::Label(..) => self, } } - pub(crate) fn get_base_register(&self) -> Reg { + pub(crate) fn get_base_register(&self) -> Option { match self { - &AMode::RegOffset(reg, ..) => reg, - &AMode::SPOffset(..) => stack_reg(), - &AMode::FPOffset(..) => fp_reg(), - &AMode::NominalSPOffset(..) => stack_reg(), + &AMode::RegOffset(reg, ..) => Some(reg), + &AMode::SPOffset(..) => Some(stack_reg()), + &AMode::FPOffset(..) => Some(fp_reg()), + &AMode::NominalSPOffset(..) => Some(stack_reg()), + &AMode::Const(..) | AMode::Label(..) => None, } } @@ -72,26 +83,18 @@ impl AMode { &AMode::SPOffset(offset, _) => offset, &AMode::FPOffset(offset, _) => offset, &AMode::NominalSPOffset(offset, _) => offset, + &AMode::Const(_) | &AMode::Label(_) => 0, } } pub(crate) fn to_string_with_alloc(&self, allocs: &mut AllocationConsumer<'_>) -> String { - let reg = self.get_base_register(); - let next = allocs.next(reg); - let offset = self.get_offset(); - match self { - &AMode::NominalSPOffset(..) => format!("{}", self), - _ => format!("{}({})", offset, reg_name(next),), - } - } - - pub(crate) fn to_addr(&self, allocs: &mut AllocationConsumer<'_>) -> String { - let reg = self.get_base_register(); - let next = allocs.next(reg); - let offset = self.get_offset(); - match self { - &AMode::NominalSPOffset(..) => format!("nsp{:+}", offset), - _ => format!("{}{:+}", reg_name(next), offset), + let reg = self.get_base_register().map(|r| allocs.next(r)); + match (self, reg) { + (&AMode::NominalSPOffset(..), _) => format!("{}", self), + (&AMode::Const(addr), _) => format!("{}(const)", addr.as_u32()), + (&AMode::Label(label), _) => format!("[label{}]", label.as_u32()), + (_, Some(reg)) => format!("{}({})", self.get_offset(), reg_name(reg)), + (_, None) => unreachable!(), } } } @@ -111,6 +114,12 @@ impl Display for AMode { &AMode::FPOffset(offset, ..) => { write!(f, "{}(fp)", offset) } + &AMode::Const(addr, ..) => { + write!(f, "[const({})]", addr.as_u32()) + } + &AMode::Label(label) => { + write!(f, "[label{}]", label.as_u32()) + } } } } diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index c524ed277a18..0672f31823da 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -665,18 +665,21 @@ impl MachInstEmit for Inst { flags, } => { let from = from.clone().with_allocs(&mut allocs); + let rd = allocs.next_writable(rd); + let base = from.get_base_register(); let offset = from.get_offset_with_state(state); - let rd = allocs.next_writable(rd); + let offset_imm12 = Imm12::maybe_from_u64(offset as u64); - let (addr, imm12) = if let Some(imm12) = Imm12::maybe_from_u64(offset as u64) { + let (addr, imm12) = match (base, offset_imm12) { // If the offset fits into an imm12 we can directly encode it. - (base, imm12) - } else { + (Some(base), Some(imm12)) => (base, imm12), // Otherwise load the address it into a reg and load from it. - let tmp = writable_spilltmp_reg(); - Inst::LoadAddr { rd: tmp, mem: from }.emit(&[], sink, emit_info, state); - (tmp.to_reg(), Imm12::zero()) + _ => { + let tmp = writable_spilltmp_reg(); + Inst::LoadAddr { rd: tmp, mem: from }.emit(&[], sink, emit_info, state); + (tmp.to_reg(), Imm12::zero()) + } }; let srcloc = state.cur_srcloc(); @@ -695,18 +698,21 @@ impl MachInstEmit for Inst { } &Inst::Store { op, src, flags, to } => { let to = to.clone().with_allocs(&mut allocs); + let src = allocs.next(src); + let base = to.get_base_register(); let offset = to.get_offset_with_state(state); - let src = allocs.next(src); + let offset_imm12 = Imm12::maybe_from_u64(offset as u64); - let (addr, imm12) = if let Some(imm12) = Imm12::maybe_from_u64(offset as u64) { + let (addr, imm12) = match (base, offset_imm12) { // If the offset fits into an imm12 we can directly encode it. - (base, imm12) - } else { + (Some(base), Some(imm12)) => (base, imm12), // Otherwise load the address it into a reg and load from it. - let tmp = writable_spilltmp_reg(); - Inst::LoadAddr { rd: tmp, mem: to }.emit(&[], sink, emit_info, state); - (tmp.to_reg(), Imm12::zero()) + _ => { + let tmp = writable_spilltmp_reg(); + Inst::LoadAddr { rd: tmp, mem: to }.emit(&[], sink, emit_info, state); + (tmp.to_reg(), Imm12::zero()) + } }; let srcloc = state.cur_srcloc(); @@ -1166,24 +1172,38 @@ impl MachInstEmit for Inst { } &Inst::LoadAddr { rd, mem } => { - let base = mem.get_base_register(); - let base = allocs.next(base); + let mem = mem.with_allocs(&mut allocs); let rd = allocs.next_writable(rd); + + let base = mem.get_base_register(); let offset = mem.get_offset_with_state(state); - if let Some(offset) = Imm12::maybe_from_u64(offset as u64) { - Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd, - rs: base, - imm12: offset, + let offset_imm12 = Imm12::maybe_from_u64(offset as u64); + + match (mem, base, offset_imm12) { + (_, Some(rs), Some(imm12)) => { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd, + rs, + imm12, + } + .emit(&[], sink, emit_info, state); + } + (_, Some(rs), None) => { + LoadConstant::U64(offset as u64) + .load_constant_and_add(rd, rs) + .into_iter() + .for_each(|inst| inst.emit(&[], sink, emit_info, state)); + } + (AMode::Const(addr), None, _) => unimplemented!("LoadAddr: {:?}", addr), + (AMode::Label(label), None, _) => unimplemented!("LoadAddr: {:?}", label), + (amode, _, _) => { + unimplemented!("LoadAddr: {:?}", amode); } - .emit(&[], sink, emit_info, state); - } else { - LoadConstant::U64(offset as u64) - .load_constant_and_add(rd, base) - .into_iter() - .for_each(|inst| inst.emit(&[], sink, emit_info, state)); } + + // let label = sink.get_label_for_constant(*addr); + // let label = MemLabel::Mach(label); } &Inst::Select { @@ -2802,14 +2822,21 @@ impl MachInstEmit for Inst { let from = from.clone().with_allocs(&mut allocs); let to = allocs.next_writable(to); + let base = from.get_base_register(); + let offset = from.get_offset_with_state(state); + // Vector Loads don't support immediate offsets, so we need to load it into a register. - let addr = match from { - VecAMode::UnitStride { base } if base.get_offset_with_state(state) == 0 => { - base.get_base_register() - } - VecAMode::UnitStride { base } => { + let addr = match (&from, base, offset) { + // Reg+0 Offset can be directly encoded + (_, Some(base), 0) => base, + // Otherwise load the address it into a reg and load from it. + (VecAMode::UnitStride { base }, _, _) => { let tmp = writable_spilltmp_reg(); - Inst::LoadAddr { rd: tmp, mem: base }.emit(&[], sink, emit_info, state); + Inst::LoadAddr { + rd: tmp, + mem: base.clone(), + } + .emit(&[], sink, emit_info, state); tmp.to_reg() } }; @@ -2843,14 +2870,21 @@ impl MachInstEmit for Inst { let to = to.clone().with_allocs(&mut allocs); let from = allocs.next(from); + let base = to.get_base_register(); + let offset = to.get_offset_with_state(state); + // Vector Stores don't support immediate offsets, so we need to load it into a register. - let addr = match to { - VecAMode::UnitStride { base } if base.get_offset_with_state(state) == 0 => { - base.get_base_register() - } - VecAMode::UnitStride { base } => { + let addr = match (&to, base, offset) { + // Reg+0 Offset can be directly encoded + (_, Some(base), 0) => base, + // Otherwise load the address it into a reg and load from it. + (VecAMode::UnitStride { base }, _, _) => { let tmp = writable_spilltmp_reg(); - Inst::LoadAddr { rd: tmp, mem: base }.emit(&[], sink, emit_info, state); + Inst::LoadAddr { + rd: tmp, + mem: base.clone(), + } + .emit(&[], sink, emit_info, state); tmp.to_reg() } }; diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs index cdeb34de1a9c..be0ef4fab0ad 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -388,11 +388,15 @@ fn riscv64_get_operands VReg>(inst: &Inst, collector: &mut Operan collector.reg_def(rd); } &Inst::Load { rd, from, .. } => { - collector.reg_use(from.get_base_register()); + if let Some(r) = from.get_base_register() { + collector.reg_use(r); + } collector.reg_def(rd); } &Inst::Store { to, src, .. } => { - collector.reg_use(to.get_base_register()); + if let Some(r) = to.get_base_register() { + collector.reg_use(r); + } collector.reg_use(src); } @@ -443,7 +447,9 @@ fn riscv64_get_operands VReg>(inst: &Inst, collector: &mut Operan collector.reg_def(rd); } &Inst::LoadAddr { rd, mem } => { - collector.reg_use(mem.get_base_register()); + if let Some(r) = mem.get_base_register() { + collector.reg_use(r); + } collector.reg_early_def(rd); } @@ -654,11 +660,15 @@ fn riscv64_get_operands VReg>(inst: &Inst, collector: &mut Operan collector.reg_def(rd); } &Inst::VecLoad { to, ref from, .. } => { - collector.reg_use(from.get_base_register()); + if let Some(r) = from.get_base_register() { + collector.reg_use(r); + } collector.reg_def(to); } &Inst::VecStore { ref to, from, .. } => { - collector.reg_use(to.get_base_register()); + if let Some(r) = to.get_base_register() { + collector.reg_use(r); + } collector.reg_use(from); } } @@ -1524,7 +1534,7 @@ impl Inst { format!("load_sym {},{}{:+}", rd, name.display(None), offset) } &MInst::LoadAddr { ref rd, ref mem } => { - let rs = mem.to_addr(allocs); + let rs = mem.to_string_with_alloc(allocs); let rd = format_reg(rd.to_reg(), allocs); format!("load_addr {},{}", rd, rs) } diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs index 4b7330ab8d5e..9df1832c73ca 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs @@ -305,7 +305,7 @@ impl fmt::Display for VecAluOpRRImm5 { } impl VecAMode { - pub fn get_base_register(&self) -> Reg { + pub fn get_base_register(&self) -> Option { match self { VecAMode::UnitStride { base, .. } => base.get_base_register(), } diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index 96e608512dd6..7b15f1cd0bea 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -9,6 +9,15 @@ (rule (lower (has_type ty (iconst (u64_from_imm64 n)))) (imm ty n)) +;; ;;;; Rules for `vconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; (rule (lower (has_type (ty_vec_fits_in_register ty) (vconst n))) +;; (vec_load +;; (element_width_from_type ty) +;; (VecAMode.UnitStride (gen_const_amode (const_to_vconst n))) +;; (mem_flags_trusted) +;; ty)) + ;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (f32const (u32_from_ieee32 n))) diff --git a/cranelift/codegen/src/isa/riscv64/lower/isle.rs b/cranelift/codegen/src/isa/riscv64/lower/isle.rs index 3b2fb0652cd1..345894583f44 100644 --- a/cranelift/codegen/src/isa/riscv64/lower/isle.rs +++ b/cranelift/codegen/src/isa/riscv64/lower/isle.rs @@ -379,9 +379,15 @@ impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend> fn int_convert_2_float_op(&mut self, from: Type, is_signed: bool, to: Type) -> FpuOPRR { FpuOPRR::int_convert_2_float_op(from, is_signed, to) } + fn gen_amode(&mut self, base: Reg, offset: Offset32, ty: Type) -> AMode { AMode::RegOffset(base, i64::from(offset), ty) } + + fn gen_const_amode(&mut self, c: VCodeConstant) -> AMode { + AMode::Const(c) + } + fn valid_atomic_transaction(&mut self, ty: Type) -> Option { if ty.is_int() && ty.bits() <= 64 { Some(ty) diff --git a/cranelift/filetests/filetests/isa/riscv64/reftypes.clif b/cranelift/filetests/filetests/isa/riscv64/reftypes.clif index 074bd4a3a53e..a1ee0ab49a8c 100644 --- a/cranelift/filetests/filetests/isa/riscv64/reftypes.clif +++ b/cranelift/filetests/filetests/isa/riscv64/reftypes.clif @@ -100,7 +100,7 @@ block3(v7: r64, v8: r64): ; sd a6,8(nominal_sp) ; load_sym t0,%f+0 ; callind t0 -; load_addr t4,nsp+0 +; load_addr t4,0(nominal_sp) ; ld a6,8(nominal_sp) ; sd a6,0(t4) ; andi t0,a0,255 @@ -114,7 +114,7 @@ block3(v7: r64, v8: r64): ; ld a1,16(nominal_sp) ; j label3 ; block3: -; load_addr a2,nsp+0 +; load_addr a2,0(nominal_sp) ; ld a2,0(a2) ; mv t3,s3 ; sd a2,0(t3) diff --git a/cranelift/filetests/filetests/isa/riscv64/stack.clif b/cranelift/filetests/filetests/isa/riscv64/stack.clif index 06458298b98a..86876201c248 100644 --- a/cranelift/filetests/filetests/isa/riscv64/stack.clif +++ b/cranelift/filetests/filetests/isa/riscv64/stack.clif @@ -18,7 +18,7 @@ block0: ; mv fp,sp ; add sp,-16 ; block0: -; load_addr a0,nsp+0 +; load_addr a0,0(nominal_sp) ; add sp,+16 ; ld ra,8(sp) ; ld fp,0(sp) @@ -59,7 +59,7 @@ block0: ; call %Probestack ; add sp,-100016 ; block0: -; load_addr a0,nsp+0 +; load_addr a0,0(nominal_sp) ; add sp,+100016 ; ld ra,8(sp) ; ld fp,0(sp) @@ -108,7 +108,7 @@ block0: ; mv fp,sp ; add sp,-16 ; block0: -; load_addr t1,nsp+0 +; load_addr t1,0(nominal_sp) ; ld a0,0(t1) ; add sp,+16 ; ld ra,8(sp) @@ -151,7 +151,7 @@ block0: ; call %Probestack ; add sp,-100016 ; block0: -; load_addr t1,nsp+0 +; load_addr t1,0(nominal_sp) ; ld a0,0(t1) ; add sp,+100016 ; ld ra,8(sp) @@ -202,7 +202,7 @@ block0(v0: i64): ; mv fp,sp ; add sp,-16 ; block0: -; load_addr t2,nsp+0 +; load_addr t2,0(nominal_sp) ; sd a0,0(t2) ; add sp,+16 ; ld ra,8(sp) @@ -245,7 +245,7 @@ block0(v0: i64): ; call %Probestack ; add sp,-100016 ; block0: -; load_addr t2,nsp+0 +; load_addr t2,0(nominal_sp) ; sd a0,0(t2) ; add sp,+100016 ; ld ra,8(sp) @@ -859,7 +859,7 @@ block0(v0: i128): ; add sp,-16 ; block0: ; mv a2,a0 -; load_addr a0,nsp+0 +; load_addr a0,0(nominal_sp) ; sd a2,0(a0) ; sd a1,8(a0) ; add sp,+16 @@ -903,7 +903,7 @@ block0(v0: i128): ; add sp,-32 ; block0: ; mv a2,a0 -; load_addr a0,nsp+32 +; load_addr a0,32(nominal_sp) ; sd a2,0(a0) ; sd a1,8(a0) ; add sp,+32 @@ -950,7 +950,7 @@ block0(v0: i128): ; add sp,-100016 ; block0: ; mv a2,a0 -; load_addr a0,nsp+0 +; load_addr a0,0(nominal_sp) ; sd a2,0(a0) ; sd a1,8(a0) ; add sp,+100016 @@ -1004,7 +1004,7 @@ block0: ; mv fp,sp ; add sp,-16 ; block0: -; load_addr t2,nsp+0 +; load_addr t2,0(nominal_sp) ; ld a0,0(t2) ; ld a1,8(t2) ; add sp,+16 @@ -1046,7 +1046,7 @@ block0: ; mv fp,sp ; add sp,-32 ; block0: -; load_addr t2,nsp+32 +; load_addr t2,32(nominal_sp) ; ld a0,0(t2) ; ld a1,8(t2) ; add sp,+32 @@ -1091,7 +1091,7 @@ block0: ; call %Probestack ; add sp,-100016 ; block0: -; load_addr t2,nsp+0 +; load_addr t2,0(nominal_sp) ; ld a0,0(t2) ; ld a1,8(t2) ; add sp,+100016 From 0105eb833f79fedd1f326631accb68af639d8d07 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Mon, 24 Apr 2023 14:31:15 +0100 Subject: [PATCH 06/14] riscv64: Add Label Address Generation --- .../codegen/src/isa/riscv64/inst/emit.rs | 34 +++++++++++++--- cranelift/codegen/src/isa/riscv64/inst/mod.rs | 40 +++++++++++++++---- 2 files changed, 62 insertions(+), 12 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index 0672f31823da..31247bd180ad 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -1195,15 +1195,39 @@ impl MachInstEmit for Inst { .into_iter() .for_each(|inst| inst.emit(&[], sink, emit_info, state)); } - (AMode::Const(addr), None, _) => unimplemented!("LoadAddr: {:?}", addr), - (AMode::Label(label), None, _) => unimplemented!("LoadAddr: {:?}", label), + (AMode::Const(addr), None, _) => { + // Get an address label for the constant and recurse. + let label = sink.get_label_for_constant(addr); + Inst::LoadAddr { + rd, + mem: AMode::Label(label), + } + .emit(&[], sink, emit_info, state); + } + (AMode::Label(label), None, _) => { + // Get the current PC. + sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20); + let inst = Inst::Auipc { + rd, + imm: Imm20::from_bits(0), + }; + inst.emit(&[], sink, emit_info, state); + + // Emit an add to the address with a relocation. + // This later gets patched up with the correct offset. + sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd, + rs: rd.to_reg(), + imm12: Imm12::zero(), + } + .emit(&[], sink, emit_info, state); + } (amode, _, _) => { unimplemented!("LoadAddr: {:?}", amode); } } - - // let label = sink.get_label_for_constant(*addr); - // let label = MemLabel::Mach(label); } &Inst::Select { diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs index be0ef4fab0ad..1b943be38efc 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -1664,6 +1664,20 @@ pub enum LabelUse { /// is added to the current pc to give the target address. The /// conditional branch range is ±4 KiB. B12, + + /// Equivalent to the `R_RISCV_PCREL_HI20` relocation, Allows setting + /// the immediate field of an `auipc` instruction. + /// + /// Since we currently don't support offsets in labels, this relocation has + /// an implicit offset of 4. + PCRelHi20, + + /// Equivalent to the `R_RISCV_PCREL_LO12_I` relocation, Allows setting + /// the immediate field of I Type instructions such as `addi` or `lw`. + /// + /// Since we currently don't support offsets in labels, this relocation has + /// an implicit offset of 4. + PCRelLo12I, } impl MachInstLabelUse for LabelUse { @@ -1675,7 +1689,9 @@ impl MachInstLabelUse for LabelUse { fn max_pos_range(self) -> CodeOffset { match self { LabelUse::Jal20 => ((1 << 19) - 1) * 2, - LabelUse::PCRel32 => Inst::imm_max() as CodeOffset, + LabelUse::PCRelLo12I | LabelUse::PCRelHi20 | LabelUse::PCRel32 => { + Inst::imm_max() as CodeOffset + } LabelUse::B12 => ((1 << 11) - 1) * 2, } } @@ -1691,9 +1707,8 @@ impl MachInstLabelUse for LabelUse { /// Size of window into code needed to do the patch. fn patch_size(self) -> CodeOffset { match self { - LabelUse::Jal20 => 4, + LabelUse::Jal20 | LabelUse::B12 | LabelUse::PCRelHi20 | LabelUse::PCRelLo12I => 4, LabelUse::PCRel32 => 8, - LabelUse::B12 => 4, } } @@ -1718,8 +1733,7 @@ impl MachInstLabelUse for LabelUse { /// Is a veneer supported for this label reference type? fn supports_veneer(self) -> bool { match self { - Self::B12 => true, - Self::Jal20 => true, + Self::Jal20 | Self::B12 => true, _ => false, } } @@ -1727,8 +1741,7 @@ impl MachInstLabelUse for LabelUse { /// How large is the veneer, if supported? fn veneer_size(self) -> CodeOffset { match self { - Self::B12 => 8, - Self::Jal20 => 8, + Self::B12 | Self::Jal20 => 8, _ => unreachable!(), } } @@ -1812,6 +1825,19 @@ impl LabelUse { | ((offset >> 12 & 0b1) << 31); buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn | v)); } + + LabelUse::PCRelHi20 => { + let offset = offset as u32 + 4; + let hi20 = offset & 0xFFFFF000; + let insn = (insn & 0xFFF) | hi20; + buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn)); + } + + LabelUse::PCRelLo12I => { + let offset = (offset as u32 + 4) & 0xFFF; + let insn = (insn & 0xFFFFF) | (offset << 20); + buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn)); + } } } } From df1bed7dc0788984bcfba123739e049182f9c856 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Mon, 24 Apr 2023 14:46:26 +0100 Subject: [PATCH 07/14] riscv64: Add `vconst` support --- .../codegen/src/isa/riscv64/inst/args.rs | 2 +- cranelift/codegen/src/isa/riscv64/lower.isle | 12 +- .../isa/riscv64/simd-vconst-64bit.clif | 114 +++++++++++++ .../filetests/isa/riscv64/simd-vconst.clif | 160 ++++++++++++++++++ .../filetests/runtests/simd-vconst-64bit.clif | 1 + .../filetests/runtests/simd-vconst.clif | 51 +++--- 6 files changed, 314 insertions(+), 26 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-vconst-64bit.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-vconst.clif diff --git a/cranelift/codegen/src/isa/riscv64/inst/args.rs b/cranelift/codegen/src/isa/riscv64/inst/args.rs index 5b8309a8eac2..66a84abb868b 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/args.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/args.rs @@ -91,7 +91,7 @@ impl AMode { let reg = self.get_base_register().map(|r| allocs.next(r)); match (self, reg) { (&AMode::NominalSPOffset(..), _) => format!("{}", self), - (&AMode::Const(addr), _) => format!("{}(const)", addr.as_u32()), + (&AMode::Const(addr), _) => format!("[const({})]", addr.as_u32()), (&AMode::Label(label), _) => format!("[label{}]", label.as_u32()), (_, Some(reg)) => format!("{}({})", self.get_offset(), reg_name(reg)), (_, None) => unreachable!(), diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index 7b15f1cd0bea..a0e48a52dfb7 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -11,12 +11,12 @@ ;; ;;;; Rules for `vconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; (rule (lower (has_type (ty_vec_fits_in_register ty) (vconst n))) -;; (vec_load -;; (element_width_from_type ty) -;; (VecAMode.UnitStride (gen_const_amode (const_to_vconst n))) -;; (mem_flags_trusted) -;; ty)) +(rule (lower (has_type (ty_vec_fits_in_register ty) (vconst n))) + (vec_load + (element_width_from_type ty) + (VecAMode.UnitStride (gen_const_amode (const_to_vconst n))) + (mem_flags_trusted) + ty)) ;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-vconst-64bit.clif b/cranelift/filetests/filetests/isa/riscv64/simd-vconst-64bit.clif new file mode 100644 index 000000000000..7bf8b5b70c6b --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-vconst-64bit.clif @@ -0,0 +1,114 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %vconst_zeroes() -> i8x8 { +block0: + v0 = vconst.i8x8 0x00 + return v0 +} + +; VCode: +; block0: +; vle8.v v10,[const(0)] #avl=8, #vtype=(e8, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x04, 0xcc +; auipc t6, 0 +; addi t6, t6, 0x14 +; .byte 0x07, 0x85, 0x0f, 0x02 +; ret +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 + +function %vconst_ones() -> i8x8 { +block0: + v0 = vconst.i8x8 0xffffffffffffffff + return v0 +} + +; VCode: +; block0: +; vle8.v v10,[const(0)] #avl=8, #vtype=(e8, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x04, 0xcc +; auipc t6, 0 +; addi t6, t6, 0x14 +; .byte 0x07, 0x85, 0x0f, 0x02 +; ret +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0xff, 0xff, 0xff, 0xff +; .byte 0xff, 0xff, 0xff, 0xff + +function %vconst_i8x8() -> i8x8 { +block0: + v0 = vconst.i8x8 [0 31 63 95 127 159 191 255] + return v0 +} + +; VCode: +; block0: +; vle8.v v10,[const(0)] #avl=8, #vtype=(e8, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x04, 0xcc +; auipc t6, 0 +; addi t6, t6, 0x14 +; .byte 0x07, 0x85, 0x0f, 0x02 +; ret +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x1f, 0x3f, 0x5f +; .byte 0x7f, 0x9f, 0xbf, 0xff + +function %vconst_i16x4() -> i16x4 { +block0: + v0 = vconst.i16x4 [0 255 32767 65535] + return v0 +} + +; VCode: +; block0: +; vle16.v v10,[const(0)] #avl=4, #vtype=(e16, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x82, 0xcc +; auipc t6, 0 +; addi t6, t6, 0x14 +; .byte 0x07, 0xd5, 0x0f, 0x02 +; ret +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0xff, 0x00 +; .byte 0xff, 0x7f, 0xff, 0xff + +function %vconst_i32x2() -> i32x2 { +block0: + v0 = vconst.i32x2 [0 4294967295] + return v0 +} + +; VCode: +; block0: +; vle32.v v10,[const(0)] #avl=2, #vtype=(e32, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x01, 0xcd +; auipc t6, 0 +; addi t6, t6, 0x14 +; .byte 0x07, 0xe5, 0x0f, 0x02 +; ret +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0xff, 0xff, 0xff, 0xff + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-vconst.clif b/cranelift/filetests/filetests/isa/riscv64/simd-vconst.clif new file mode 100644 index 000000000000..0919aefed737 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-vconst.clif @@ -0,0 +1,160 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + +function %vconst_zeroes_i8x16() -> i8x16 { +block0: + v0 = vconst.i8x16 0x00 + return v0 +} + +; VCode: +; block0: +; vle8.v v10,[const(0)] #avl=16, #vtype=(e8, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x08, 0xcc +; auipc t6, 0 +; addi t6, t6, 0x1c +; .byte 0x07, 0x85, 0x0f, 0x02 +; ret +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 + +function %vconst_ones_i8x16() -> i8x16 { +block0: + v0 = vconst.i8x16 0xffffffffffffffff_ffffffffffffffff + return v0 +} + +; VCode: +; block0: +; vle8.v v10,[const(0)] #avl=16, #vtype=(e8, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x08, 0xcc +; auipc t6, 0 +; addi t6, t6, 0x1c +; .byte 0x07, 0x85, 0x0f, 0x02 +; ret +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0xff, 0xff, 0xff, 0xff +; .byte 0xff, 0xff, 0xff, 0xff +; .byte 0xff, 0xff, 0xff, 0xff +; .byte 0xff, 0xff, 0xff, 0xff + +function %vconst_i8x16() -> i8x16 { +block0: + v0 = vconst.i8x16 [0 31 63 95 127 159 191 255 1 2 3 4 5 6 7 8] + return v0 +} + +; VCode: +; block0: +; vle8.v v10,[const(0)] #avl=16, #vtype=(e8, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x08, 0xcc +; auipc t6, 0 +; addi t6, t6, 0x1c +; .byte 0x07, 0x85, 0x0f, 0x02 +; ret +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x1f, 0x3f, 0x5f +; .byte 0x7f, 0x9f, 0xbf, 0xff +; .byte 0x01, 0x02, 0x03, 0x04 +; .byte 0x05, 0x06, 0x07, 0x08 + +function %vconst_i16x8() -> i16x8 { +block0: + v0 = vconst.i16x8 [0 255 32767 65535 1 2 3 4] + return v0 +} + +; VCode: +; block0: +; vle16.v v10,[const(0)] #avl=8, #vtype=(e16, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x84, 0xcc +; auipc t6, 0 +; addi t6, t6, 0x1c +; .byte 0x07, 0xd5, 0x0f, 0x02 +; ret +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0xff, 0x00 +; .byte 0xff, 0x7f, 0xff, 0xff +; .byte 0x01, 0x00, 0x02, 0x00 +; lb zero, 0(s0) + +function %vconst_i32x4() -> i32x4 { +block0: + v0 = vconst.i32x4 [0 4294967295 1 2] + return v0 +} + +; VCode: +; block0: +; vle32.v v10,[const(0)] #avl=4, #vtype=(e32, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x02, 0xcd +; auipc t6, 0 +; addi t6, t6, 0x1c +; .byte 0x07, 0xe5, 0x0f, 0x02 +; ret +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0xff, 0xff, 0xff, 0xff +; .byte 0x01, 0x00, 0x00, 0x00 +; .byte 0x02, 0x00, 0x00, 0x00 + +function %vconst_i64x2() -> i64x2 { +block0: + v0 = vconst.i64x2 [0 -1] + return v0 +} + +; VCode: +; block0: +; vle64.v v10,[const(0)] #avl=2, #vtype=(e64, m1, ta, ma) +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; .byte 0x57, 0x70, 0x81, 0xcd +; auipc t6, 0 +; addi t6, t6, 0x1c +; .byte 0x07, 0xf5, 0x0f, 0x02 +; ret +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0x00, 0x00, 0x00, 0x00 +; .byte 0xff, 0xff, 0xff, 0xff +; .byte 0xff, 0xff, 0xff, 0xff + diff --git a/cranelift/filetests/filetests/runtests/simd-vconst-64bit.clif b/cranelift/filetests/filetests/runtests/simd-vconst-64bit.clif index d81aeaaecd73..c7e6752bb8a2 100644 --- a/cranelift/filetests/filetests/runtests/simd-vconst-64bit.clif +++ b/cranelift/filetests/filetests/runtests/simd-vconst-64bit.clif @@ -1,6 +1,7 @@ test interpret test run target aarch64 +target riscv64 has_v ; x86_64 and s390x do not support 64-bit vectors. function %vconst_zeroes() -> i8x8 { diff --git a/cranelift/filetests/filetests/runtests/simd-vconst.clif b/cranelift/filetests/filetests/runtests/simd-vconst.clif index a307f59268a3..7e153107fe24 100644 --- a/cranelift/filetests/filetests/runtests/simd-vconst.clif +++ b/cranelift/filetests/filetests/runtests/simd-vconst.clif @@ -4,34 +4,47 @@ target aarch64 set enable_simd target x86_64 has_sse3 has_ssse3 has_sse41 target x86_64 has_sse3 has_ssse3 has_sse41 has_avx +target riscv64 has_v -function %vconst_zeroes() -> i8 { +function %vconst_zeroes_i8x16() -> i8x16 { block0: v0 = vconst.i8x16 0x00 - v1 = extractlane v0, 4 - v2 = icmp_imm eq v1, 0 - return v2 + return v0 } -; run +; run: %vconst_zeroes_i8x16() == [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] -function %vconst_ones() -> i8 { +function %vconst_ones_i8x16() -> i8x16 { block0: - v0 = vconst.i8x16 0xffffffffffffffffffffffffffffffff - v1 = extractlane v0, 2 - v2 = icmp_imm eq v1, 0xff - return v2 + v0 = vconst.i8x16 0xffffffffffffffff_ffffffffffffffff + return v0 } -; run +; run: %vconst_ones_i8x16() == [255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255] +function %vconst_i8x16() -> i8x16 { +block0: + v0 = vconst.i8x16 [0 31 63 95 127 159 191 255 1 2 3 4 5 6 7 8] + return v0 +} +; run: %vconst_i8x16() == [0 31 63 95 127 159 191 255 1 2 3 4 5 6 7 8] + +function %vconst_i16x8() -> i16x8 { +block0: + v0 = vconst.i16x8 [0 255 32767 65535 1 2 3 4] + return v0 +} +; run: %vconst_i16x8() == [0 255 32767 65535 1 2 3 4] + +function %vconst_i32x4() -> i32x4 { +block0: + v0 = vconst.i32x4 [0 4294967295 1 2] + return v0 +} +; run: %vconst_i32x4() == [0 4294967295 1 2] -function %splat_i64x2() -> i8 { +function %vconst_i64x2() -> i64x2 { block0: - v0 = iconst.i64 -1 - v1 = splat.i64x2 v0 - v2 = vconst.i64x2 [-1 -1] - v3 = icmp eq v1, v2 - v8 = vall_true v3 - return v8 + v0 = vconst.i64x2 [0 -1] + return v0 } -; run +; run: %vconst_i64x2() == [0 -1] From 1048da32befaaae9f8098cb5d0c680175fca5ddd Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Fri, 5 May 2023 12:22:58 +0100 Subject: [PATCH 08/14] riscv64: Use `unsigned_field_width` in encode --- cranelift/codegen/src/isa/riscv64/inst/encode.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/inst/encode.rs b/cranelift/codegen/src/isa/riscv64/inst/encode.rs index 693518d7495d..bb3ab02b08c3 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/encode.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/encode.rs @@ -59,11 +59,11 @@ pub fn encode_r_type( /// | Opcode | rd | width | rs1 | Offset[11:0] | pub fn encode_i_type(opcode: u32, rd: Reg, width: u32, rs1: Reg, offset: Imm12) -> u32 { let mut bits = 0; - bits |= opcode & 0b1111111; + bits |= unsigned_field_width(opcode, 7); bits |= reg_to_gpr_num(rd) << 7; - bits |= (width & 0b111) << 12; + bits |= unsigned_field_width(width, 3) << 12; bits |= reg_to_gpr_num(rs1) << 15; - bits |= (offset.as_u32() & 0b1111_1111_1111) << 20; + bits |= unsigned_field_width(offset.as_u32(), 12) << 20; bits } @@ -74,12 +74,12 @@ pub fn encode_i_type(opcode: u32, rd: Reg, width: u32, rs1: Reg, offset: Imm12) /// | Opcode | imm[4:0] | width | base | src | imm[11:5] | pub fn encode_s_type(opcode: u32, width: u32, base: Reg, src: Reg, offset: Imm12) -> u32 { let mut bits = 0; - bits |= opcode & 0b1111111; + bits |= unsigned_field_width(opcode, 7); bits |= (offset.as_u32() & 0b11111) << 7; - bits |= (width & 0b111) << 12; + bits |= unsigned_field_width(width, 3) << 12; bits |= reg_to_gpr_num(base) << 15; bits |= reg_to_gpr_num(src) << 20; - bits |= ((offset.as_u32() >> 5) & 0b1111111) << 25; + bits |= unsigned_field_width(offset.as_u32() >> 5, 7) << 25; bits } From 5d1f8fe9ed30f48d48103411c87c8ca5fbaf592b Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Fri, 5 May 2023 12:25:07 +0100 Subject: [PATCH 09/14] riscv64: Use `WritableReg` in encode --- cranelift/codegen/src/isa/riscv64/inst/emit.rs | 8 +------- cranelift/codegen/src/isa/riscv64/inst/encode.rs | 4 ++-- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index 31247bd180ad..cec242df357e 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -688,13 +688,7 @@ impl MachInstEmit for Inst { sink.add_trap(TrapCode::HeapOutOfBounds); } - sink.put4(encode_i_type( - op.op_code(), - rd.to_reg(), - op.funct3(), - addr, - imm12, - )); + sink.put4(encode_i_type(op.op_code(), rd, op.funct3(), addr, imm12)); } &Inst::Store { op, src, flags, to } => { let to = to.clone().with_allocs(&mut allocs); diff --git a/cranelift/codegen/src/isa/riscv64/inst/encode.rs b/cranelift/codegen/src/isa/riscv64/inst/encode.rs index bb3ab02b08c3..2a479867578c 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/encode.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/encode.rs @@ -57,10 +57,10 @@ pub fn encode_r_type( /// Layout: /// 0-------6-7-------11-12------14-15------19-20------------------31 /// | Opcode | rd | width | rs1 | Offset[11:0] | -pub fn encode_i_type(opcode: u32, rd: Reg, width: u32, rs1: Reg, offset: Imm12) -> u32 { +pub fn encode_i_type(opcode: u32, rd: WritableReg, width: u32, rs1: Reg, offset: Imm12) -> u32 { let mut bits = 0; bits |= unsigned_field_width(opcode, 7); - bits |= reg_to_gpr_num(rd) << 7; + bits |= reg_to_gpr_num(rd.to_reg()) << 7; bits |= unsigned_field_width(width, 3) << 12; bits |= reg_to_gpr_num(rs1) << 15; bits |= unsigned_field_width(offset.as_u32(), 12) << 20; From 59b098338ce97609639e8b73efd69fc024ac6365 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Fri, 5 May 2023 12:33:29 +0100 Subject: [PATCH 10/14] riscv64: Deduplicate AMode formatting --- cranelift/codegen/src/isa/riscv64/inst/args.rs | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/inst/args.rs b/cranelift/codegen/src/isa/riscv64/inst/args.rs index 66a84abb868b..d6e8604bd9a0 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/args.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/args.rs @@ -88,14 +88,7 @@ impl AMode { } pub(crate) fn to_string_with_alloc(&self, allocs: &mut AllocationConsumer<'_>) -> String { - let reg = self.get_base_register().map(|r| allocs.next(r)); - match (self, reg) { - (&AMode::NominalSPOffset(..), _) => format!("{}", self), - (&AMode::Const(addr), _) => format!("[const({})]", addr.as_u32()), - (&AMode::Label(label), _) => format!("[label{}]", label.as_u32()), - (_, Some(reg)) => format!("{}({})", self.get_offset(), reg_name(reg)), - (_, None) => unreachable!(), - } + format!("{}", self.clone().with_allocs(allocs)) } } @@ -103,7 +96,7 @@ impl Display for AMode { fn fmt(&self, f: &mut Formatter<'_>) -> Result { match self { &AMode::RegOffset(r, offset, ..) => { - write!(f, "{}({:?})", offset, r) + write!(f, "{}({})", offset, reg_name(r)) } &AMode::SPOffset(offset, ..) => { write!(f, "{}(sp)", offset) From f3f2fd19916cd4a699bfddaa4b8dbab04813203f Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Fri, 5 May 2023 12:42:17 +0100 Subject: [PATCH 11/14] riscv64: Refcator VectorLoad/Store AMode Pattern matching --- .../codegen/src/isa/riscv64/inst/emit.rs | 62 ++++++++++--------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index cec242df357e..c4f28f409ca7 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -2840,22 +2840,25 @@ impl MachInstEmit for Inst { let from = from.clone().with_allocs(&mut allocs); let to = allocs.next_writable(to); - let base = from.get_base_register(); - let offset = from.get_offset_with_state(state); - // Vector Loads don't support immediate offsets, so we need to load it into a register. - let addr = match (&from, base, offset) { - // Reg+0 Offset can be directly encoded - (_, Some(base), 0) => base, - // Otherwise load the address it into a reg and load from it. - (VecAMode::UnitStride { base }, _, _) => { - let tmp = writable_spilltmp_reg(); - Inst::LoadAddr { - rd: tmp, - mem: base.clone(), + let addr = match from { + VecAMode::UnitStride { base } => { + let base_reg = base.get_base_register(); + let offset = base.get_offset_with_state(state); + + // Reg+0 Offset can be directly encoded + if let (Some(base_reg), 0) = (base_reg, offset) { + base_reg + } else { + // Otherwise load the address it into a reg and load from it. + let tmp = writable_spilltmp_reg(); + Inst::LoadAddr { + rd: tmp, + mem: base.clone(), + } + .emit(&[], sink, emit_info, state); + tmp.to_reg() } - .emit(&[], sink, emit_info, state); - tmp.to_reg() } }; @@ -2888,22 +2891,25 @@ impl MachInstEmit for Inst { let to = to.clone().with_allocs(&mut allocs); let from = allocs.next(from); - let base = to.get_base_register(); - let offset = to.get_offset_with_state(state); - // Vector Stores don't support immediate offsets, so we need to load it into a register. - let addr = match (&to, base, offset) { - // Reg+0 Offset can be directly encoded - (_, Some(base), 0) => base, - // Otherwise load the address it into a reg and load from it. - (VecAMode::UnitStride { base }, _, _) => { - let tmp = writable_spilltmp_reg(); - Inst::LoadAddr { - rd: tmp, - mem: base.clone(), + let addr = match to { + VecAMode::UnitStride { base } => { + let base_reg = base.get_base_register(); + let offset = base.get_offset_with_state(state); + + // Reg+0 Offset can be directly encoded + if let (Some(base_reg), 0) = (base_reg, offset) { + base_reg + } else { + // Otherwise load the address it into a reg and load from it. + let tmp = writable_spilltmp_reg(); + Inst::LoadAddr { + rd: tmp, + mem: base.clone(), + } + .emit(&[], sink, emit_info, state); + tmp.to_reg() } - .emit(&[], sink, emit_info, state); - tmp.to_reg() } }; From c2a89d55ee3d02d1372f822af65737e194b9cf60 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Fri, 5 May 2023 12:52:16 +0100 Subject: [PATCH 12/14] riscv64: Avoid passing `fp` and `sp` through the register allocator --- .../codegen/src/isa/riscv64/inst/args.rs | 27 ++++++++++++------- cranelift/codegen/src/isa/riscv64/inst/mod.rs | 10 +++---- .../codegen/src/isa/riscv64/inst/vector.rs | 6 +++++ 3 files changed, 29 insertions(+), 14 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/inst/args.rs b/cranelift/codegen/src/isa/riscv64/inst/args.rs index d6e8604bd9a0..d1d835f97673 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/args.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/args.rs @@ -45,18 +45,27 @@ pub enum AMode { } impl AMode { - pub(crate) fn reg_offset(reg: Reg, imm: i64, ty: Type) -> AMode { - AMode::RegOffset(reg, imm, ty) - } - pub(crate) fn with_allocs(self, allocs: &mut AllocationConsumer<'_>) -> Self { match self { AMode::RegOffset(reg, offset, ty) => AMode::RegOffset(allocs.next(reg), offset, ty), - AMode::SPOffset(..) | AMode::FPOffset(..) | AMode::NominalSPOffset(..) => { - allocs.next(self.get_base_register().unwrap()); - self - } - AMode::Const(..) | AMode::Label(..) => self, + AMode::SPOffset(..) + | AMode::FPOffset(..) + | AMode::NominalSPOffset(..) + | AMode::Const(..) + | AMode::Label(..) => self, + } + } + + /// Returns the registers that known to the register allocator. + /// Keep this in sync with `with_allocs`. + pub(crate) fn get_allocatable_register(&self) -> Option { + match self { + AMode::RegOffset(reg, ..) => Some(*reg), + AMode::SPOffset(..) + | AMode::FPOffset(..) + | AMode::NominalSPOffset(..) + | AMode::Const(..) + | AMode::Label(..) => None, } } diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs index 1b943be38efc..08f7a2d1410e 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -388,13 +388,13 @@ fn riscv64_get_operands VReg>(inst: &Inst, collector: &mut Operan collector.reg_def(rd); } &Inst::Load { rd, from, .. } => { - if let Some(r) = from.get_base_register() { + if let Some(r) = from.get_allocatable_register() { collector.reg_use(r); } collector.reg_def(rd); } &Inst::Store { to, src, .. } => { - if let Some(r) = to.get_base_register() { + if let Some(r) = to.get_allocatable_register() { collector.reg_use(r); } collector.reg_use(src); @@ -447,7 +447,7 @@ fn riscv64_get_operands VReg>(inst: &Inst, collector: &mut Operan collector.reg_def(rd); } &Inst::LoadAddr { rd, mem } => { - if let Some(r) = mem.get_base_register() { + if let Some(r) = mem.get_allocatable_register() { collector.reg_use(r); } collector.reg_early_def(rd); @@ -660,13 +660,13 @@ fn riscv64_get_operands VReg>(inst: &Inst, collector: &mut Operan collector.reg_def(rd); } &Inst::VecLoad { to, ref from, .. } => { - if let Some(r) = from.get_base_register() { + if let Some(r) = from.get_allocatable_register() { collector.reg_use(r); } collector.reg_def(to); } &Inst::VecStore { ref to, from, .. } => { - if let Some(r) = to.get_base_register() { + if let Some(r) = to.get_allocatable_register() { collector.reg_use(r); } collector.reg_use(from); diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs index 9df1832c73ca..c6029fb2fdeb 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs @@ -311,6 +311,12 @@ impl VecAMode { } } + pub fn get_allocatable_register(&self) -> Option { + match self { + VecAMode::UnitStride { base, .. } => base.get_allocatable_register(), + } + } + pub(crate) fn with_allocs(self, allocs: &mut AllocationConsumer<'_>) -> Self { match self { VecAMode::UnitStride { base } => VecAMode::UnitStride { From fabce61b2dae1b7f615b2ff7ebee46894543e9a9 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Fri, 5 May 2023 14:58:29 +0100 Subject: [PATCH 13/14] riscv64: Fix `PCRel{Hi20,Lo12I}` relocation --- cranelift/codegen/src/isa/riscv64/inst/mod.rs | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs index 08f7a2d1410e..f328d26e2cb7 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -1667,9 +1667,6 @@ pub enum LabelUse { /// Equivalent to the `R_RISCV_PCREL_HI20` relocation, Allows setting /// the immediate field of an `auipc` instruction. - /// - /// Since we currently don't support offsets in labels, this relocation has - /// an implicit offset of 4. PCRelHi20, /// Equivalent to the `R_RISCV_PCREL_LO12_I` relocation, Allows setting @@ -1827,15 +1824,22 @@ impl LabelUse { } LabelUse::PCRelHi20 => { - let offset = offset as u32 + 4; - let hi20 = offset & 0xFFFFF000; - let insn = (insn & 0xFFF) | hi20; + // See https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses + // + // We need to add 0x800 to ensure that we land at the next page as soon as it goes out of range for the + // Lo12 relocation. That relocation is signed and has a maximum range of -2048..2047. So when we get an + // offset of 2048, we need to land at the next page and subtract instead. + let offset = offset as u32; + let hi20 = offset.wrapping_add(0x800) >> 12; + let insn = (insn & 0xFFF) | (hi20 << 12); buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn)); } LabelUse::PCRelLo12I => { - let offset = (offset as u32 + 4) & 0xFFF; - let insn = (insn & 0xFFFFF) | (offset << 20); + // We add 4 here since this relocation usually follows a PCRelHi20 relocation, at the previous + // instruction. So we need to account for the 4 byte difference in offsets there. + let lo12 = (offset + 4) as u32 & 0xFFF; + let insn = (insn & 0xFFFFF) | (lo12 << 20); buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn)); } } From d8ae95001e60a2b4bd5de32d3c642795f896379e Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Mon, 8 May 2023 21:37:11 +0100 Subject: [PATCH 14/14] riscv64: Update PCRelLo12I Comment --- cranelift/codegen/src/isa/riscv64/inst/mod.rs | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs index f328d26e2cb7..15572da548b8 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -1669,7 +1669,8 @@ pub enum LabelUse { /// the immediate field of an `auipc` instruction. PCRelHi20, - /// Equivalent to the `R_RISCV_PCREL_LO12_I` relocation, Allows setting + /// Similar to the `R_RISCV_PCREL_LO12_I` relocation but pointing to + /// the final address, instead of the `PCREL_HI20` label. Allows setting /// the immediate field of I Type instructions such as `addi` or `lw`. /// /// Since we currently don't support offsets in labels, this relocation has @@ -1836,8 +1837,17 @@ impl LabelUse { } LabelUse::PCRelLo12I => { - // We add 4 here since this relocation usually follows a PCRelHi20 relocation, at the previous - // instruction. So we need to account for the 4 byte difference in offsets there. + // `offset` is the offset from the current instruction to the target address. + // + // However we are trying to compute the offset to the target address from the previous instruction. + // The previous instruction should be the one that contains the PCRelHi20 relocation and + // stores/references the program counter (`auipc` usually). + // + // Since we are trying to compute the offset from the previous instruction, we can + // represent it as offset = target_address - (current_instruction_address - 4) + // which is equivalent to offset = target_address - current_instruction_address + 4. + // + // Thus we need to add 4 to the offset here. let lo12 = (offset + 4) as u32 & 0xFFF; let insn = (insn & 0xFFFFF) | (lo12 << 20); buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn));