Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

aarch64: Use RegScaled* addressing modes #6945

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 24 additions & 2 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -3122,15 +3122,37 @@
(rule 5 (amode ty (iadd (sextend x @ (value_type $I32)) y) offset)
(AMode.RegExtended (amode_add y offset) x (ExtendOp.SXTW)))

;; `RegScaled*` rules where this matches an addition of an "index register" to a
;; base register. The index register is shifted by the size of the type loaded
;; in bytes to enable this mode matching.
;;
;; Note that this can additionally bundle an extending operation but the
;; extension must happen before the shift. This will pattern-match the shift
;; first and then if that succeeds afterwards try to find an extend.
(rule 6 (amode ty (iadd x (ishl y (iconst (u64_from_imm64 n)))) offset)
(if-let $true (u64_eq (ty_bytes ty) (u64_shl 1 n)))
(amode_reg_scaled (amode_add x offset) y ty))
(rule 7 (amode ty (iadd (ishl y (iconst (u64_from_imm64 n))) x) offset)
(if-let $true (u64_eq (ty_bytes ty) (u64_shl 1 n)))
(amode_reg_scaled (amode_add x offset) y ty))

(decl amode_reg_scaled (Reg Value Type) AMode)
(rule 0 (amode_reg_scaled base index ty)
(AMode.RegScaled base index ty))
(rule 1 (amode_reg_scaled base (uextend index @ (value_type $I32)) ty)
(AMode.RegScaledExtended base index ty (ExtendOp.UXTW)))
(rule 2 (amode_reg_scaled base (sextend index @ (value_type $I32)) ty)
(AMode.RegScaledExtended base index ty (ExtendOp.SXTW)))

;; Small optimizations where constants found in `iadd` are folded into the
;; `offset` immediate.
;;
;; NB: this should probably be done by mid-end optimizations rather than here
;; in the backend, but currently Cranelift doesn't do that.
(rule 6 (amode ty (iadd x (iconst (simm32 y))) offset)
(rule 8 (amode ty (iadd x (iconst (simm32 y))) offset)
(if-let new_offset (s32_add_fallible y offset))
(amode ty x new_offset))
(rule 7 (amode ty (iadd (iconst (simm32 x)) y) offset)
(rule 9 (amode ty (iadd (iconst (simm32 x)) y) offset)
(if-let new_offset (s32_add_fallible x offset))
(amode ty y new_offset))

Expand Down
1 change: 1 addition & 0 deletions cranelift/codegen/src/prelude.isle
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@

(decl pure u16_as_u64 (u16) u64)
(extern constructor u16_as_u64 u16_as_u64)
(convert u16 u64 u16_as_u64)

(decl pure u32_as_u64 (u32) u64)
(extern constructor u32_as_u64 u32_as_u64)
Expand Down
266 changes: 266 additions & 0 deletions cranelift/filetests/filetests/isa/aarch64/amodes.clif
Original file line number Diff line number Diff line change
Expand Up @@ -519,3 +519,269 @@ block0(v0: i64, v1: i32):
; stp x0, x1, [x6]
; ret

function %load_scaled16(i64, i64) -> i8 {
block0(v0: i64, v1: i64):
v2 = ishl_imm v1, 0
v3 = iadd v0, v2
v4 = load.i8 v3
return v4
}

; VCode:
; block0:
; ldrb w0, [x0, x1, LSL #0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrb w0, [x0, x1, lsl #0]
; ret

function %load_scaled16(i64, i64) -> i16 {
block0(v0: i64, v1: i64):
v2 = ishl_imm v1, 1
v3 = iadd v0, v2
v4 = load.i16 v3
return v4
}

; VCode:
; block0:
; ldrh w0, [x0, x1, LSL #1]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrh w0, [x0, x1, lsl #1]
; ret

function %load_scaled32(i64, i64) -> i32 {
block0(v0: i64, v1: i64):
v2 = ishl_imm v1, 2
v3 = iadd v0, v2
v4 = load.i32 v3
return v4
}

; VCode:
; block0:
; ldr w0, [x0, x1, LSL #2]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldr w0, [x0, x1, lsl #2]
; ret

function %load_scaled64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = ishl_imm v1, 3
v3 = iadd v0, v2
v4 = load.i64 v3
return v4
}

; VCode:
; block0:
; ldr x0, [x0, x1, LSL #3]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldr x0, [x0, x1, lsl #3]
; ret

function %load_not_scaled64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = ishl_imm v1, 2
v3 = iadd v0, v2
v4 = load.i64 v3
return v4
}

; VCode:
; block0:
; lsl x4, x1, #2
; ldr x0, [x0, x4]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lsl x4, x1, #2
; ldr x0, [x0, x4]
; ret

function %load_uextend_scaled16(i64, i32) -> i8 {
block0(v0: i64, v1: i32):
v2 = uextend.i64 v1
v3 = ishl_imm v2, 0
v4 = iadd v0, v3
v5 = load.i8 v4
return v5
}

; VCode:
; block0:
; ldrb w0, [x0, w1, UXTW #0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrb w0, [x0, w1, uxtw #0]
; ret

function %load_uextend_scaled16(i64, i32) -> i16 {
block0(v0: i64, v1: i32):
v2 = uextend.i64 v1
v3 = ishl_imm v2, 1
v4 = iadd v0, v3
v5 = load.i16 v4
return v5
}

; VCode:
; block0:
; ldrh w0, [x0, w1, UXTW #1]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrh w0, [x0, w1, uxtw #1]
; ret

function %load_uextend_scaled32(i64, i32) -> i32 {
block0(v0: i64, v1: i32):
v2 = uextend.i64 v1
v3 = ishl_imm v2, 2
v4 = iadd v0, v3
v5 = load.i32 v4
return v5
}

; VCode:
; block0:
; ldr w0, [x0, w1, UXTW #2]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldr w0, [x0, w1, uxtw #2]
; ret


function %load_uextend_scaled64(i64, i32) -> i64 {
block0(v0: i64, v1: i32):
v2 = uextend.i64 v1
v3 = ishl_imm v2, 3
v4 = iadd v0, v3
v5 = load.i64 v4
return v5
}

; VCode:
; block0:
; ldr x0, [x0, w1, UXTW #3]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldr x0, [x0, w1, uxtw #3]
; ret

function %load_not_extend_scaled64(i64, i32) -> i64 {
block0(v0: i64, v1: i32):
v2 = ishl_imm v1, 3
v3 = uextend.i64 v2
v4 = iadd v0, v3
v5 = load.i64 v4
return v5
}

; VCode:
; block0:
; lsl w4, w1, #3
; ldr x0, [x0, w4, UXTW]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lsl w4, w1, #3
; ldr x0, [x0, w4, uxtw]
; ret

function %load_sextend_scaled8(i64, i32) -> i8 {
block0(v0: i64, v1: i32):
v2 = sextend.i64 v1
v3 = ishl_imm v2, 0
v4 = iadd v0, v3
v5 = load.i8 v4
return v5
}

; VCode:
; block0:
; ldrb w0, [x0, w1, SXTW #0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrb w0, [x0, w1, sxtw #0]
; ret

function %load_sextend_scaled16(i64, i32) -> i16 {
block0(v0: i64, v1: i32):
v2 = sextend.i64 v1
v3 = ishl_imm v2, 1
v4 = iadd v0, v3
v5 = load.i16 v4
return v5
}

; VCode:
; block0:
; ldrh w0, [x0, w1, SXTW #1]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrh w0, [x0, w1, sxtw #1]
; ret

function %load_sextend_scaled32(i64, i32) -> i32 {
block0(v0: i64, v1: i32):
v2 = sextend.i64 v1
v3 = ishl_imm v2, 2
v4 = iadd v0, v3
v5 = load.i32 v4
return v5
}

; VCode:
; block0:
; ldr w0, [x0, w1, SXTW #2]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldr w0, [x0, w1, sxtw #2]
; ret

function %load_sextend_scaled64(i64, i32) -> i64 {
block0(v0: i64, v1: i32):
v2 = sextend.i64 v1
v3 = ishl_imm v2, 3
v4 = iadd v0, v3
v5 = load.i64 v4
return v5
}

; VCode:
; block0:
; ldr x0, [x0, w1, SXTW #3]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldr x0, [x0, w1, sxtw #3]
; ret