Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

aarch64: Migrate popcnt to ISLE #3662

Merged
merged 1 commit into from
Jan 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1421,6 +1421,13 @@
(_ Unit (emit (MInst.VecRRR op dst src1 src2 size))))
(writable_reg_to_reg dst)))

;; Helper for emitting `MInst.VecLanes` instructions.
(decl vec_lanes (VecLanesOp Reg VectorSize) Reg)
(rule (vec_lanes op src size)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.VecLanes op dst src size))))
(writable_reg_to_reg dst)))

;; Helper for emitting `MInst.VecDup` instructions.
(decl vec_dup (Reg VectorSize) Reg)
(rule (vec_dup src size)
Expand Down Expand Up @@ -1543,6 +1550,21 @@
(_ Unit (emit (MInst.VecRRLong op dst src high_half))))
(writable_reg_to_reg dst)))

;; Helper for emitting `MInst.MovToFpu` instructions.
(decl mov_to_fpu (Reg ScalarSize) Reg)
(rule (mov_to_fpu x size)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.MovToFpu dst x size))))
(writable_reg_to_reg dst)))

;; Helper for emitting `MInst.MovToVec` instructions.
(decl mov_to_vec (Reg Reg u8 VectorSize) Reg)
(rule (mov_to_vec src1 src2 lane size)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_1 Unit (emit (MInst.FpuMove128 dst src1)))
(_2 Unit (emit (MInst.MovToVec dst src2 lane size))))
(writable_reg_to_reg dst)))

;; Helper for emitting `MInst.MovFromVec` instructions.
(decl mov_from_vec (Reg u8 VectorSize) Reg)
(rule (mov_from_vec rn idx size)
Expand Down Expand Up @@ -1745,6 +1767,10 @@
(decl addp (Reg Reg VectorSize) Reg)
(rule (addp x y size) (vec_rrr (VecALUOp.Addp) x y size))

;; Helper for generating `addv` instructions.
(decl addv (Reg VectorSize) Reg)
(rule (addv x size) (vec_lanes (VecLanesOp.Addv) x size))

;; Helper for generating `shll32` instructions.
(decl shll32 (Reg bool) Reg)
(rule (shll32 x high_half) (vec_rr_long (VecRRLongOp.Shll32) x high_half))
Expand Down Expand Up @@ -1931,6 +1957,11 @@
(decl eon64 (Reg Reg) Reg)
(rule (eon64 x y) (alu_rrr (ALUOp.EorNot64) x y))

;; Helpers for generating `cnt` instructions.

(decl vec_cnt (Reg VectorSize) Reg)
(rule (vec_cnt x size) (vec_misc (VecMisc2.Cnt) x size))

;; Immediate value helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(decl imm (Type u64) Reg)
Expand Down
64 changes: 64 additions & 0 deletions cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1129,3 +1129,67 @@
))
)
(value_regs (add64 maybe_lo hi_cls) (imm $I64 0))))

;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; The implementation of `popcnt` for scalar types is done by moving the value
;; into a vector register, using the `cnt` instruction, and then collating the
;; result back into a normal register.
;;
;; The general sequence emitted here is
;;
;; fmov tmp, in_lo
;; if ty == i128:
;; mov tmp.d[1], in_hi
;;
;; cnt tmp.16b, tmp.16b / cnt tmp.8b, tmp.8b
;; addv tmp, tmp.16b / addv tmp, tmp.8b / addp tmp.8b, tmp.8b, tmp.8b / (no instruction for 8-bit inputs)
;;
;; umov out_lo, tmp.b[0]
;; if ty == i128:
;; mov out_hi, 0

(rule (lower (has_type $I8 (popcnt x)))
(let (
(tmp Reg (mov_to_fpu (put_in_reg x) (ScalarSize.Size32)))
(nbits Reg (vec_cnt tmp (VectorSize.Size8x8)))
)
(value_reg (mov_from_vec nbits 0 (VectorSize.Size8x16)))))

;; Note that this uses `addp` instead of `addv` as it's usually cheaper.
(rule (lower (has_type $I16 (popcnt x)))
(let (
(tmp Reg (mov_to_fpu (put_in_reg x) (ScalarSize.Size32)))
(nbits Reg (vec_cnt tmp (VectorSize.Size8x8)))
(added Reg (addp nbits nbits (VectorSize.Size8x8)))
)
(value_reg (mov_from_vec added 0 (VectorSize.Size8x16)))))

(rule (lower (has_type $I32 (popcnt x)))
(let (
(tmp Reg (mov_to_fpu (put_in_reg x) (ScalarSize.Size32)))
(nbits Reg (vec_cnt tmp (VectorSize.Size8x8)))
(added Reg (addv nbits (VectorSize.Size8x8)))
)
(value_reg (mov_from_vec added 0 (VectorSize.Size8x16)))))

(rule (lower (has_type $I64 (popcnt x)))
(let (
(tmp Reg (mov_to_fpu (put_in_reg x) (ScalarSize.Size64)))
(nbits Reg (vec_cnt tmp (VectorSize.Size8x8)))
(added Reg (addv nbits (VectorSize.Size8x8)))
)
(value_reg (mov_from_vec added 0 (VectorSize.Size8x16)))))

(rule (lower (has_type $I128 (popcnt x)))
(let (
(val ValueRegs (put_in_regs x))
(tmp_half Reg (mov_to_fpu (value_regs_get val 0) (ScalarSize.Size64)))
(tmp Reg (mov_to_vec tmp_half (value_regs_get val 1) 1 (VectorSize.Size64x2)))
(nbits Reg (vec_cnt tmp (VectorSize.Size8x16)))
(added Reg (addv nbits (VectorSize.Size8x16)))
)
(value_regs (mov_from_vec added 0 (VectorSize.Size8x16)) (imm $I64 0))))

(rule (lower (has_type $I8X16 (popcnt x)))
(value_reg (vec_cnt (put_in_reg x) (VectorSize.Size8x16))))
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
src/clif.isle f176ef3bba99365
src/prelude.isle babc931e5dc5b4cf
src/isa/aarch64/inst.isle 3ae25d431916bb81
src/isa/aarch64/lower.isle 5715ecb7c7a41164
src/isa/aarch64/inst.isle 5fa80451697b084f
src/isa/aarch64/lower.isle 2d2e1e076a0c8a23
Loading