Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

riscv64: Implement a few misc SIMD instructions #6598

Merged
merged 6 commits into from
Jun 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 0 additions & 9 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,11 +228,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
return true;
}

// The memory64 testsuite has a single SIMD test that we don't pass yet.
if testname == "simd" && testsuite == "memory64" {
return true;
}

let known_failure = [
"canonicalize_nan",
"cvt_from_uint",
Expand All @@ -249,18 +244,14 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
"simd_i16x8_arith2",
"simd_i16x8_cmp",
"simd_i16x8_q15mulr_sat_s",
"simd_i32x4_arith2",
"simd_i32x4_cmp",
"simd_i32x4_trunc_sat_f32x4",
"simd_i32x4_trunc_sat_f64x2",
"simd_i64x2_arith2",
"simd_i64x2_cmp",
"simd_i8x16_arith2",
"simd_i8x16_cmp",
"simd_load",
"simd_load_zero",
"simd_splat",
"v128_select",
]
.contains(&testname);

Expand Down
12 changes: 0 additions & 12 deletions cranelift/codegen/src/isa/riscv64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -2673,18 +2673,6 @@
(gen_select_reg (IntCC.SignedGreaterThan) x y x y))


(decl lower_iabs (Type XReg) XReg)

; I64 and lower
; Generate the following code:
; sext.{b,h,w} a0, a0
; neg a1, a0
; max a0, a0, a1
(rule (lower_iabs (fits_in_64 ty) val)
(let ((extended XReg (sext val ty $I64))
(negated XReg (rv_neg extended)))
(max $I64 extended negated)))

(decl gen_trapif (XReg TrapCode) InstOutput)
(rule
(gen_trapif test trap_code)
Expand Down
11 changes: 7 additions & 4 deletions cranelift/codegen/src/isa/riscv64/inst_vector.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1006,14 +1006,17 @@


;; Build a vector mask from a u64
;; TODO: We should merge this with the `vconst` rules, and take advantage of
;; the other existing `vconst` rules. One example is using `vmv.v.i` which
;; can represent some of these masks.
;; TODO(#6571): We should merge this with the `vconst` rules, and take advantage of
;; the other existing `vconst` rules.
(decl gen_vec_mask (u64) VReg)

;; When the immediate fits in a 5-bit immediate, we can use `vmv.v.i` directly.
(rule 1 (gen_vec_mask (imm5_from_u64 imm))
(rv_vmv_vi imm (vstate_from_type $I64X2)))

;; Materialize the mask into an X register, and move it into the bottom of
;; the vector register.
(rule (gen_vec_mask mask)
(rule 0 (gen_vec_mask mask)
(rv_vmv_sx (imm $I64 mask) (vstate_from_type $I64X2)))


Expand Down
35 changes: 32 additions & 3 deletions cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1556,9 +1556,23 @@
(load_ra))

;;; Rules for `iabs` ;;;;;;;;;;;;;
(rule
(lower (has_type (fits_in_64 ty) (iabs x)))
(lower_iabs ty x))

;; I64 and lower
;; Generate the following code:
;; sext.{b,h,w} a0, a0
;; neg a1, a0
;; max a0, a0, a1
(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (iabs x)))
(let ((extended XReg (sext x ty $I64))
(negated XReg (rv_neg extended)))
(max $I64 extended negated)))

;; For vectors we generate the same code, but with vector instructions
;; we can skip the sign extension, since the vector unit will only process
;; Element Sized chunks.
(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (iabs x)))
(let ((negated VReg (rv_vneg_v x (unmasked) ty)))
(rv_vmax_vv x negated (unmasked) ty)))

;;;; Rules for calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down Expand Up @@ -1842,3 +1856,18 @@
(xor VReg (rv_vxor_vv x y (unmasked) ty))
(rhs VReg (rv_vssrl_vi xor one (unmasked) ty)))
(rv_vadd_vv lhs rhs (unmasked) ty)))

;;;; Rules for `scalar_to_vector` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (scalar_to_vector x)))
(if (ty_vector_not_float ty))
(let ((zero VReg (rv_vmv_vx (zero_reg) ty))
(mask VReg (gen_vec_mask 1)))
(rv_vmerge_vxm zero x mask ty)))

(rule 0 (lower (has_type (ty_vec_fits_in_register ty) (scalar_to_vector x)))
(if (ty_vector_float ty))
(let ((zero VReg (rv_vmv_vx (zero_reg) ty))
(elem VReg (rv_vfmv_sf x ty))
(mask VReg (gen_vec_mask 1)))
(rv_vmerge_vvm zero elem mask ty)))
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/riscv64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend>
} else {
vec![self.temp_writable_reg(I64), self.temp_writable_reg(I64)]
}
} else if ty.is_float() {
} else if ty.is_float() || ty.is_vector() {
vec![self.temp_writable_reg(ty)]
} else {
unimplemented!("ty:{:?}", ty)
Expand Down
166 changes: 166 additions & 0 deletions cranelift/filetests/filetests/isa/riscv64/simd-iabs.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
test compile precise-output
set unwind_info=false
target riscv64 has_v

function %iabs_i8x16(i8x16) -> i8x16 {
block0(v0: i8x16):
v1 = iabs v0
return v1
}

; VCode:
; add sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; block0:
; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
; vneg.v v4,v1 #avl=16, #vtype=(e8, m1, ta, ma)
; vmax.vv v6,v1,v4 #avl=16, #vtype=(e8, m1, ta, ma)
; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
; ld ra,8(sp)
; ld fp,0(sp)
; add sp,+16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; ori s0, sp, 0
; block1: ; offset 0x10
; .byte 0x57, 0x70, 0x08, 0xcc
; addi t6, s0, 0x10
; .byte 0x87, 0x80, 0x0f, 0x02
; .byte 0x57, 0x42, 0x10, 0x0e
; .byte 0x57, 0x03, 0x12, 0x1e
; .byte 0x27, 0x03, 0x05, 0x02
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

function %iabs_i16x8(i16x8) -> i16x8 {
block0(v0: i16x8):
v1 = iabs v0
return v1
}

; VCode:
; add sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; block0:
; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
; vneg.v v4,v1 #avl=8, #vtype=(e16, m1, ta, ma)
; vmax.vv v6,v1,v4 #avl=8, #vtype=(e16, m1, ta, ma)
; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
; ld ra,8(sp)
; ld fp,0(sp)
; add sp,+16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; ori s0, sp, 0
; block1: ; offset 0x10
; .byte 0x57, 0x70, 0x08, 0xcc
; addi t6, s0, 0x10
; .byte 0x87, 0x80, 0x0f, 0x02
; .byte 0x57, 0x70, 0x84, 0xcc
; .byte 0x57, 0x42, 0x10, 0x0e
; .byte 0x57, 0x03, 0x12, 0x1e
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x27, 0x03, 0x05, 0x02
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

function %iabs_i32x4(i32x4) -> i32x4 {
block0(v0: i32x4):
v1 = iabs v0
return v1
}

; VCode:
; add sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; block0:
; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
; vneg.v v4,v1 #avl=4, #vtype=(e32, m1, ta, ma)
; vmax.vv v6,v1,v4 #avl=4, #vtype=(e32, m1, ta, ma)
; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
; ld ra,8(sp)
; ld fp,0(sp)
; add sp,+16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; ori s0, sp, 0
; block1: ; offset 0x10
; .byte 0x57, 0x70, 0x08, 0xcc
; addi t6, s0, 0x10
; .byte 0x87, 0x80, 0x0f, 0x02
; .byte 0x57, 0x70, 0x02, 0xcd
; .byte 0x57, 0x42, 0x10, 0x0e
; .byte 0x57, 0x03, 0x12, 0x1e
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x27, 0x03, 0x05, 0x02
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

function %iabs_i64x2(i64x2) -> i64x2 {
block0(v0: i64x2):
v1 = iabs v0
return v1
}

; VCode:
; add sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; block0:
; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
; vneg.v v4,v1 #avl=2, #vtype=(e64, m1, ta, ma)
; vmax.vv v6,v1,v4 #avl=2, #vtype=(e64, m1, ta, ma)
; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
; ld ra,8(sp)
; ld fp,0(sp)
; add sp,+16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; ori s0, sp, 0
; block1: ; offset 0x10
; .byte 0x57, 0x70, 0x08, 0xcc
; addi t6, s0, 0x10
; .byte 0x87, 0x80, 0x0f, 0x02
; .byte 0x57, 0x70, 0x81, 0xcd
; .byte 0x57, 0x42, 0x10, 0x0e
; .byte 0x57, 0x03, 0x12, 0x1e
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x27, 0x03, 0x05, 0x02
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

Loading