Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

riscv64/x390: add *_overflow #9214

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions cranelift/codegen/src/isa/riscv64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -2871,6 +2871,37 @@
;; zero-extension for the `Value` input.
(decl put_value_in_reg_for_icmp (IntCC Value) XReg)

(decl umadd_overflow64 (XReg XReg XReg) ValueRegs)

(rule (umadd_overflow64 x y z)
(let ((one XReg (imm $I8 1))
(hi XReg (rv_mulhu x y))
(m XReg (rv_mul x y))
(of_mul XReg (gen_select_xreg (cmp_eqz hi) (zero_reg) one))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could replace this with a snez instruction

(sum XReg (rv_add m z))
(of_add XReg (gen_select_xreg (int_compare (IntCC.UnsignedLessThan) sum m) one (zero_reg)))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could also be replaced with a sltu instruction which is a shorter sequence than a full select.

(of XReg (rv_or of_mul of_add)))
(value_regs sum of)))

(decl smadd_overflow64 (XReg XReg XReg) ValueRegs)

(rule (smadd_overflow64 x y z)
(let ((mul_lo XReg (rv_mul x y))
(mul_hi XReg (rv_mulh x y))
(sign_mul_lo XReg (rv_srai mul_lo (imm12_const 63)))
(sign_mul_hi XReg (rv_srai mul_hi (imm12_const 63)))
(mul_overflow XReg (rv_xor sign_mul_lo sign_mul_hi))

(sum XReg (rv_add mul_lo z))
(sign_sum XReg (rv_srai sum (imm12_const 63)))
(sign_z XReg (rv_srai z (imm12_const 63)))
(same_signs_add XReg (rv_xor sign_mul_lo sign_z))
(diff_sign_add XReg (rv_xor sign_mul_lo sign_sum))
(add_overflow XReg (rv_and diff_sign_add (rv_xnor same_signs_add (zero_reg))))

(overflow XReg (rv_or mul_overflow add_overflow)))
(value_regs sum overflow)))

;; Base cases, use the `cc` to determine whether to zero or sign extend.
(rule 0 (put_value_in_reg_for_icmp cc val)
(zext val))
Expand Down
185 changes: 185 additions & 0 deletions cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,191 @@
(_ InstOutput (gen_trapif (IntCC.UnsignedLessThan) tmp x tc)))
tmp))

;;; Rules for `uadd_overflow` ;;;;;;;;;;;;;
(rule 0 (lower (has_type (fits_in_32 ty) (uadd_overflow x y)))
(let ((tmp_x XReg (zext x))
(tmp_y XReg (zext y))
(sum XReg (rv_add tmp_x tmp_y))
(test XReg (rv_srli sum (imm12_const (ty_bits ty)))))
(output_pair
(value_reg sum)
(value_reg test))))

(rule 1 (lower (has_type $I64 (uadd_overflow x y)))
(let ((sum XReg (rv_add x y))
(one XReg (imm $I8 1))
(of XReg (gen_select_xreg (int_compare (IntCC.UnsignedLessThan) sum x) one (zero_reg))))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be better to use rv_sltu here instead of a select between one and zero. The RISC-V comparision functions already return a zero or one, and they are a lot shorter than our current implementation of select_xreg

(output_pair
(value_reg sum)
(value_reg of))))

(rule 2 (lower (has_type $I128 (uadd_overflow x y)))
(let ((one XReg (imm $I8 1))
(low XReg (rv_add (value_regs_get x 0) (value_regs_get y 0)))
;; compute carry.
(carry XReg (rv_sltu low (value_regs_get y 0)))
;;
(high_tmp XReg (rv_add (value_regs_get x 1) (value_regs_get y 1)))
;; add carry.
(high XReg (rv_add high_tmp carry))
(of XReg (gen_select_xreg (int_compare (IntCC.UnsignedLessThan) high carry) one (zero_reg))))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto here for sltu.

(output_pair
(value_regs low high)
(value_reg of))))

;;; Rules for `umul_overflow` ;;;;;;;;;;;;;
(rule 0 (lower (has_type (fits_in_32 ty) (umul_overflow x y)))
(let ((tmp_x XReg (zext x))
(tmp_y XReg (zext y))
(res XReg (rv_mul tmp_x tmp_y))
(hi XReg (rv_srli res (imm12_const (ty_bits ty))))
(one XReg (imm $I8 1))
(of XReg (gen_select_xreg (cmp_eqz hi) (zero_reg) one)))
Copy link
Contributor

@afonso360 afonso360 Sep 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto here

(output_pair
(value_reg res)
(value_reg of))))

(rule 1 (lower (has_type $I64 (umul_overflow x y)))
(let ((hi XReg (rv_mulhu x y))
(res XReg (rv_mul x y))
(one XReg (imm $I8 1))
(of XReg (gen_select_xreg (cmp_eqz hi) (zero_reg) one)))
Copy link
Contributor

@afonso360 afonso360 Sep 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be better to use rv_snez here instead of a select between one and zero.

(output_pair
(value_reg res)
(value_reg of))))

(rule 2 (lower (has_type $I128 (umul_overflow x y)))
(let
((x_regs ValueRegs x)
(x_lo XReg (value_regs_get x_regs 0))
(x_hi XReg (value_regs_get x_regs 1))

;; Get the high/low registers for `y`.
(y_regs ValueRegs y)
(y_lo XReg (value_regs_get y_regs 0))
(y_hi XReg (value_regs_get y_regs 1))

;; 128bit mul formula:
;; dst_lo = x_lo * y_lo
;; dst_hi = mulhu(x_lo, y_lo) + (x_lo * y_hi) + (x_hi * y_lo)
;;
;; We can convert the above formula into the following
;; mulhu dst_hi, x_lo, y_lo
;; madd dst_hi, x_lo, y_hi, dst_hi
;; madd dst_hi, x_hi, y_lo, dst_hi
;; madd dst_lo, x_lo, y_lo, zero
(dst_hi1 XReg (rv_mulhu x_lo y_lo))
(one XReg (imm $I32 1))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one doesn't seem to be used anywhere.

(dst_hi2 ValueRegs (umadd_overflow64 x_lo y_hi (value_regs_get dst_hi1 0)))
(dst_hi ValueRegs (umadd_overflow64 x_hi y_lo (value_regs_get dst_hi2 0)))
(dst_lo XReg (madd x_lo y_lo (zero_reg)))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here we could use mul instead of madd and save one instruction.


(of_res1 XReg (rv_or (value_regs_get dst_hi1 1) (value_regs_get dst_hi2 1)))
(of_res XReg (rv_or of_res1 (value_regs_get dst_hi 1))))
(output_pair
(value_regs dst_lo (value_regs_get dst_hi 0))
(value_reg of_res))))

;;; Rules for `smul_overflow` ;;;;;;;;;;;;;

(rule 0 (lower (has_type (fits_in_32 ty) (smul_overflow x y)))
(let ((tmp_x XReg (sext x))
(tmp_y XReg (sext y))
(res XReg (rv_mul tmp_x tmp_y))
(hi XReg (rv_srai res (imm12_const (ty_bits ty))))
(one XReg (imm $I8 1))
(of XReg (gen_select_xreg (cmp_eqz hi) (zero_reg) one)))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could also be a snez

(output_pair
(value_reg res)
(value_reg of))))

(rule 1 (lower (has_type $I64 (smul_overflow x y)))
(let ((hi XReg (rv_mulh x y))
(res XReg (rv_mul x y))
(one XReg (imm $I8 1))
(of XReg (gen_select_xreg (cmp_eqz hi) (zero_reg) one)))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here

(output_pair
(value_reg res)
(value_reg of))))

(rule 2 (lower (has_type $I128 (smul_overflow x y)))
(let
((x_regs ValueRegs x)
(x_lo XReg (value_regs_get x_regs 0))
(x_hi XReg (value_regs_get x_regs 1))

;; Get the high/low registers for `y`.
(y_regs ValueRegs y)
(y_lo XReg (value_regs_get y_regs 0))
(y_hi XReg (value_regs_get y_regs 1))

;; 128bit mul formula:
;; dst_lo = x_lo * y_lo
;; dst_hi = mulhu(x_lo, y_lo) + (x_lo * y_hi) + (x_hi * y_lo)
;;
;; We can convert the above formula into the following
;; mulhu dst_hi, x_lo, y_lo
;; madd dst_hi, x_lo, y_hi, dst_hi
;; madd dst_hi, x_hi, y_lo, dst_hi
;; madd dst_lo, x_lo, y_lo, zero
(dst_hi1 XReg (rv_mulhu x_lo y_lo))
(one XReg (imm $I32 1))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one doesn't seem to be used anywhere, similarly in the rules below there are a few one unused instructions.

(dst_hi2 ValueRegs (smadd_overflow64 x_lo y_hi (value_regs_get dst_hi1 0)))
(dst_hi ValueRegs (smadd_overflow64 x_hi y_lo (value_regs_get dst_hi2 0)))
(dst_lo XReg (madd x_lo y_lo (zero_reg)))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of doing madd here, we can just multiply x_lo and y_lo and save one instruction.


(of_res1 XReg (rv_or (value_regs_get dst_hi1 1) (value_regs_get dst_hi2 1)))
(of_res XReg (rv_or of_res1 (value_regs_get dst_hi 1))))
(output_pair
(value_regs dst_lo (value_regs_get dst_hi 0))
(value_reg of_res))))

;;; Rules for `sadd_overflow` ;;;;;;;;;;;;;

(rule 0 (lower (has_type (fits_in_32 ty) (sadd_overflow x y)))
(let ((tmp_x XReg (sext x))
(tmp_y XReg (sext y))
(one XReg (imm $I8 1))
(sum XReg (rv_add tmp_x tmp_y))
(sign_x XReg (rv_srai tmp_x (imm12_const (ty_bits ty))))
(sign_y XReg (rv_srai tmp_y (imm12_const (ty_bits ty))))
(sign_sum XReg (rv_srai sum (imm12_const (ty_bits ty))))
(same_signs XReg (rv_xor sign_x sign_y))
(diff_sign XReg (rv_xor sign_x sign_sum))
(overflow XReg (rv_and diff_sign (rv_xnor same_signs (zero_reg)))))
(output_pair
(value_reg sum)
(value_reg overflow))))

(rule 1 (lower (has_type $I64 (sadd_overflow x y)))
(let ((sum XReg (rv_add x y))
(one XReg (imm $I8 1))
(sign_x XReg (rv_srai x (imm12_const 63)))
(sign_y XReg (rv_srai y (imm12_const 63)))
(sign_sum XReg (rv_srai sum (imm12_const 63)))
(same_signs XReg (rv_xor sign_x sign_y))
(diff_sign XReg (rv_xor sign_x sign_sum))
(overflow XReg (rv_and diff_sign (rv_xnor same_signs (zero_reg)))))
(output_pair
(value_reg sum)
(value_reg overflow))))

(rule 2 (lower (has_type $I128 (sadd_overflow x y)))
(let ((one XReg (imm $I8 1))
(low XReg (rv_add (value_regs_get x 0) (value_regs_get y 0)))
(carry XReg (rv_slt low (value_regs_get y 0)))
(high_tmp XReg (rv_add (value_regs_get x 1) (value_regs_get y 1)))
(high XReg (rv_add high_tmp carry))
(sign_x XReg (rv_srai (value_regs_get x 1) (imm12_const 63)))
(sign_y XReg (rv_srai (value_regs_get y 1) (imm12_const 63)))
(sign_sum XReg (rv_srai high (imm12_const 63)))
(same_signs XReg (rv_xor sign_x sign_y))
(diff_sign XReg (rv_xor sign_x sign_sum))
(overflow XReg (rv_and diff_sign (rv_xnor same_signs (zero_reg)))))
(output_pair
(value_regs low high)
(value_reg overflow))))

;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Base case, simply subtracting things in registers.

Expand Down
41 changes: 41 additions & 0 deletions cranelift/codegen/src/isa/s390x/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -2082,13 +2082,25 @@
(_ Unit (emit (MInst.AluRSImm16 op dst src imm))))
dst))

(decl alu_rsimm16_with_flags_paired (Type ALUOp Reg i16) ProducesFlags)
(rule (alu_rsimm16_with_flags_paired ty op src imm)
(let ((dst WritableReg (temp_writable_reg ty)))
(ProducesFlags.ProducesFlagsReturnsResultWithConsumer
(MInst.AluRSImm16 op dst src imm) dst)))

;; Helper for emitting `MInst.AluRSImm32` instructions.
(decl alu_rsimm32 (Type ALUOp Reg i32) Reg)
(rule (alu_rsimm32 ty op src imm)
(let ((dst WritableReg (temp_writable_reg ty))
(_ Unit (emit (MInst.AluRSImm32 op dst src imm))))
dst))

(decl alu_rsimm32_with_flags_paired (Type ALUOp Reg i32) ProducesFlags)
(rule (alu_rsimm32_with_flags_paired ty op src imm)
(let ((dst WritableReg (temp_writable_reg ty)))
(ProducesFlags.ProducesFlagsReturnsResultWithConsumer
(MInst.AluRSImm32 op dst src imm) dst)))

;; Helper for emitting `MInst.AluRUImm32` instructions.
(decl alu_ruimm32 (Type ALUOp Reg u32) Reg)
(rule (alu_ruimm32 ty op src imm)
Expand Down Expand Up @@ -3371,6 +3383,7 @@
(bool producer (invert_cond cond)))

;; Use a boolean condition to select between two registers.
; important
(decl select_bool_reg (Type ProducesBool Reg Reg) Reg)
(rule (select_bool_reg ty (ProducesBool.ProducesBool producer cond) reg_true reg_false)
(with_flags_reg producer (cmov_reg_reg ty cond reg_true reg_false)))
Expand Down Expand Up @@ -4060,24 +4073,52 @@
(decl mul_reg (Type Reg Reg) Reg)
(rule (mul_reg ty x y) (alu_rrr ty (aluop_mul ty) x y))

(decl mul_reg_with_flags_paired (Type Reg Reg) ProducesFlags)
(rule (mul_reg_with_flags_paired ty x y)
(alu_rrr_with_flags_paired ty (aluop_mul ty) x y))

(decl mul_reg_sext32 (Type Reg Reg) Reg)
(rule (mul_reg_sext32 ty x y) (alu_rr ty (aluop_mul_sext32 ty) x y))

(decl mul_reg_sext32_with_flags_paired (Type Reg Reg) ProducesFlags)
(rule (mul_reg_sext32_with_flags_paired ty x y)
(alu_rrr_with_flags_paired ty (aluop_mul_sext32 ty) x y))

(decl mul_simm16 (Type Reg i16) Reg)
(rule (mul_simm16 ty x y) (alu_rsimm16 ty (aluop_mul ty) x y))

(decl mul_simm16_with_flags_paired (Type Reg i16) ProducesFlags)
(rule (mul_simm16_with_flags_paired ty x y)
(alu_rsimm16_with_flags_paired ty (aluop_mul ty) x y))

(decl mul_simm32 (Type Reg i32) Reg)
(rule (mul_simm32 ty x y) (alu_rsimm32 ty (aluop_mul ty) x y))

(decl mul_simm32_with_flags_paired (Type Reg i32) ProducesFlags)
(rule (mul_simm32_with_flags_paired ty x y)
(alu_rsimm32_with_flags_paired ty (aluop_mul ty) x y))

(decl mul_mem (Type Reg MemArg) Reg)
(rule (mul_mem ty x y) (alu_rx ty (aluop_mul ty) x y))

(decl mul_mem_with_flags_paired (Type Reg MemArg) ProducesFlags)
(rule (mul_mem_with_flags_paired ty x y)
(alu_rx_with_flags_paired ty (aluop_mul ty) x y))

(decl mul_mem_sext16 (Type Reg MemArg) Reg)
(rule (mul_mem_sext16 ty x y) (alu_rx ty (aluop_mul_sext16 ty) x y))

(decl mul_mem_sext16_with_flags_paired (Type Reg MemArg) ProducesFlags)
(rule (mul_mem_sext16_with_flags_paired ty x y)
(alu_rx_with_flags_paired ty (aluop_mul_sext16 ty) x y))

(decl mul_mem_sext32 (Type Reg MemArg) Reg)
(rule (mul_mem_sext32 ty x y) (alu_rx ty (aluop_mul_sext32 ty) x y))

(decl mul_mem_sext32_with_flags_paired (Type Reg MemArg) ProducesFlags)
(rule (mul_mem_sext32_with_flags_paired ty x y)
(alu_rx_with_flags_paired ty (aluop_mul_sext32 ty) x y))

(decl vecop_mul (Type) VecBinaryOp)
(rule (vecop_mul $I8X16) (VecBinaryOp.Mul8x16))
(rule (vecop_mul $I16X8) (VecBinaryOp.Mul16x8))
Expand Down
Loading
Loading