Skip to content

Commit

Permalink
riscv64: Add support for load+extend patterns (bytecodealliance#8765)
Browse files Browse the repository at this point in the history
* riscv64: Add support for `load+extend` patterns

RISC-V doesen't have sinkable loads per se, but the regular load
instructions support sign / zero extending the loaded values.

We model those here as a sinkable load on the extend instruction.

* riscv64: Clarify sinkable loads on RISC-V
  • Loading branch information
afonso360 authored Jun 11, 2024
1 parent b7aacfc commit 9e34bde
Show file tree
Hide file tree
Showing 4 changed files with 263 additions and 2 deletions.
30 changes: 28 additions & 2 deletions cranelift/codegen/src/isa/riscv64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -2422,22 +2422,48 @@
(gen_stack_slot_amode ss combined_offset))


;; Helpers for sinkable loads ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; RISC-V doesen't really have sinkable loads. But the regular load instructions
;; sign / zero extend their results to 64 bits. So we can pretend they are
;; an extend instruction with a sinkable load. This allows us to have better
;; lowerings on these cases.

;; Extract a sinkable instruction from a value operand.
(decl sinkable_inst (Inst) Value)
(extern extractor sinkable_inst sinkable_inst)

;; Matches a sinkable load.
(decl sinkable_load (Inst Type MemFlags Value Offset32) Value)
(extractor (sinkable_load inst ty flags addr offset)
(and
(load flags addr offset)
(sinkable_inst (has_type ty inst))))

;; Returns a canonical type for a LoadOP. We only return I64 or F64.
(decl load_op_reg_type (LoadOP) Type)
(rule 1 (load_op_reg_type (LoadOP.Fld)) $F64)
(rule 1 (load_op_reg_type (LoadOP.Flw)) $F64)
(rule 0 (load_op_reg_type _) $I64)

;; helper function to load from memory.
;; Helper constructor to build a load instruction.
(decl gen_load (AMode LoadOP MemFlags) Reg)
(rule (gen_load amode op flags)
(let ((dst WritableReg (temp_writable_reg (load_op_reg_type op)))
(_ Unit (emit (MInst.Load dst op flags amode))))
dst))

;; helper function to store to memory.
;; Similar to `gen_load` but marks `Inst` as sunk at the current point.
;;
;; This is only useful for load op's that perform some additional computation
;; such as extending the loaded value.
(decl gen_sunk_load (Inst AMode LoadOP MemFlags) Reg)
(rule (gen_sunk_load inst amode op flags)
(let ((_ Unit (sink_inst inst)))
(gen_load amode op flags)))


;; Helper constructor to build a store instruction.
;;
;; This helper contains a special-case for zero constants stored to memory to
;; directly store the `zero` register to memory. See #7162 for some discussion
Expand Down
20 changes: 20 additions & 0 deletions cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1115,16 +1115,36 @@
;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (fits_in_64 _) (uextend val)))
(zext val))

(rule 1 (lower (has_type $I128 (uextend val)))
(value_regs (zext val) (imm $I64 0)))

;; When the source of an `uextend` is a load, we can merge both ops
(rule 2 (lower (has_type (fits_in_64 _) (uextend (sinkable_load inst ty flags addr offset))))
(gen_sunk_load inst (amode addr offset) (uextend_load_op ty) flags))

(decl pure uextend_load_op (Type) LoadOP)
(rule (uextend_load_op $I8) (LoadOP.Lbu))
(rule (uextend_load_op $I16) (LoadOP.Lhu))
(rule (uextend_load_op $I32) (LoadOP.Lwu))

;;;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (fits_in_64 _) (sextend val @ (value_type in_ty))))
(sext val))

(rule 1 (lower (has_type $I128 (sextend val @ (value_type in_ty))))
(let ((lo XReg (sext val)))
(value_regs lo (rv_srai lo (imm12_const 63)))))

;; When the source of an `sextend` is a load, we can merge both ops
(rule 2 (lower (has_type (fits_in_64 _) (sextend (sinkable_load inst ty flags addr offset))))
(gen_sunk_load inst (amode addr offset) (sextend_load_op ty) flags))

(decl pure sextend_load_op (Type) LoadOP)
(rule (sextend_load_op $I8) (LoadOP.Lb))
(rule (sextend_load_op $I16) (LoadOP.Lh))
(rule (sextend_load_op $I32) (LoadOP.Lw))

;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule 0 (lower (has_type (fits_in_64 _) (popcnt x)))
Expand Down
5 changes: 5 additions & 0 deletions cranelift/codegen/src/isa/riscv64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,11 @@ impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend>
_ => None,
}
}

fn sinkable_inst(&mut self, val: Value) -> Option<Inst> {
self.is_sinkable_inst(val)
}

fn load_op(&mut self, ty: Type) -> LoadOP {
LoadOP::from_type(ty)
}
Expand Down
210 changes: 210 additions & 0 deletions cranelift/filetests/filetests/isa/riscv64/load-extends.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
test compile precise-output
set unwind_info=false
target riscv64

function %load_uextend_i8_i16(i64) -> i16 {
block0(v0: i64):
v1 = load.i8 v0
v2 = uextend.i16 v1
return v2
}

; VCode:
; block0:
; lbu a0,0(a0)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lbu a0, 0(a0) ; trap: heap_oob
; ret

function %load_uextend_i8_i32(i64) -> i32 {
block0(v0: i64):
v1 = load.i8 v0
v2 = uextend.i32 v1
return v2
}

; VCode:
; block0:
; lbu a0,0(a0)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lbu a0, 0(a0) ; trap: heap_oob
; ret

function %load_uextend_i8_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i8 v0
v2 = uextend.i64 v1
return v2
}

; VCode:
; block0:
; lbu a0,0(a0)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lbu a0, 0(a0) ; trap: heap_oob
; ret

function %load_uextend_i16_i32(i64) -> i32 {
block0(v0: i64):
v1 = load.i16 v0
v2 = uextend.i32 v1
return v2
}

; VCode:
; block0:
; lhu a0,0(a0)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lhu a0, 0(a0) ; trap: heap_oob
; ret

function %load_uextend_i16_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i16 v0
v2 = uextend.i64 v1
return v2
}

; VCode:
; block0:
; lhu a0,0(a0)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lhu a0, 0(a0) ; trap: heap_oob
; ret

function %load_uextend_i32_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i32 v0
v2 = uextend.i64 v1
return v2
}

; VCode:
; block0:
; lwu a0,0(a0)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lwu a0, 0(a0) ; trap: heap_oob
; ret



function %load_sextend_i8_i16(i64) -> i16 {
block0(v0: i64):
v1 = load.i8 v0
v2 = sextend.i16 v1
return v2
}

; VCode:
; block0:
; lb a0,0(a0)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lb a0, 0(a0) ; trap: heap_oob
; ret

function %load_sextend_i8_i32(i64) -> i32 {
block0(v0: i64):
v1 = load.i8 v0
v2 = sextend.i32 v1
return v2
}

; VCode:
; block0:
; lb a0,0(a0)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lb a0, 0(a0) ; trap: heap_oob
; ret

function %load_sextend_i8_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i8 v0
v2 = sextend.i64 v1
return v2
}

; VCode:
; block0:
; lb a0,0(a0)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lb a0, 0(a0) ; trap: heap_oob
; ret

function %load_sextend_i16_i32(i64) -> i32 {
block0(v0: i64):
v1 = load.i16 v0
v2 = sextend.i32 v1
return v2
}

; VCode:
; block0:
; lh a0,0(a0)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lh a0, 0(a0) ; trap: heap_oob
; ret

function %load_sextend_i16_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i16 v0
v2 = sextend.i64 v1
return v2
}

; VCode:
; block0:
; lh a0,0(a0)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lh a0, 0(a0) ; trap: heap_oob
; ret

function %load_sextend_i32_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i32 v0
v2 = sextend.i64 v1
return v2
}

; VCode:
; block0:
; lw a0,0(a0)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lw a0, 0(a0) ; trap: heap_oob
; ret

0 comments on commit 9e34bde

Please sign in to comment.