-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
riscv64/x390: add *_overflow #9214
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2871,6 +2871,37 @@ | |
;; zero-extension for the `Value` input. | ||
(decl put_value_in_reg_for_icmp (IntCC Value) XReg) | ||
|
||
(decl umadd_overflow64 (XReg XReg XReg) ValueRegs) | ||
|
||
(rule (umadd_overflow64 x y z) | ||
(let ((one XReg (imm $I8 1)) | ||
(hi XReg (rv_mulhu x y)) | ||
(m XReg (rv_mul x y)) | ||
(of_mul XReg (gen_select_xreg (cmp_eqz hi) (zero_reg) one)) | ||
(sum XReg (rv_add m z)) | ||
(of_add XReg (gen_select_xreg (int_compare (IntCC.UnsignedLessThan) sum m) one (zero_reg))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This could also be replaced with a |
||
(of XReg (rv_or of_mul of_add))) | ||
(value_regs sum of))) | ||
|
||
(decl smadd_overflow64 (XReg XReg XReg) ValueRegs) | ||
|
||
(rule (smadd_overflow64 x y z) | ||
(let ((mul_lo XReg (rv_mul x y)) | ||
(mul_hi XReg (rv_mulh x y)) | ||
(sign_mul_lo XReg (rv_srai mul_lo (imm12_const 63))) | ||
(sign_mul_hi XReg (rv_srai mul_hi (imm12_const 63))) | ||
(mul_overflow XReg (rv_xor sign_mul_lo sign_mul_hi)) | ||
|
||
(sum XReg (rv_add mul_lo z)) | ||
(sign_sum XReg (rv_srai sum (imm12_const 63))) | ||
(sign_z XReg (rv_srai z (imm12_const 63))) | ||
(same_signs_add XReg (rv_xor sign_mul_lo sign_z)) | ||
(diff_sign_add XReg (rv_xor sign_mul_lo sign_sum)) | ||
(add_overflow XReg (rv_and diff_sign_add (rv_xnor same_signs_add (zero_reg)))) | ||
|
||
(overflow XReg (rv_or mul_overflow add_overflow))) | ||
(value_regs sum overflow))) | ||
|
||
;; Base cases, use the `cc` to determine whether to zero or sign extend. | ||
(rule 0 (put_value_in_reg_for_icmp cc val) | ||
(zext val)) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -311,6 +311,191 @@ | |
(_ InstOutput (gen_trapif (IntCC.UnsignedLessThan) tmp x tc))) | ||
tmp)) | ||
|
||
;;; Rules for `uadd_overflow` ;;;;;;;;;;;;; | ||
(rule 0 (lower (has_type (fits_in_32 ty) (uadd_overflow x y))) | ||
(let ((tmp_x XReg (zext x)) | ||
(tmp_y XReg (zext y)) | ||
(sum XReg (rv_add tmp_x tmp_y)) | ||
(test XReg (rv_srli sum (imm12_const (ty_bits ty))))) | ||
(output_pair | ||
(value_reg sum) | ||
(value_reg test)))) | ||
|
||
(rule 1 (lower (has_type $I64 (uadd_overflow x y))) | ||
(let ((sum XReg (rv_add x y)) | ||
(one XReg (imm $I8 1)) | ||
(of XReg (gen_select_xreg (int_compare (IntCC.UnsignedLessThan) sum x) one (zero_reg)))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It might be better to use |
||
(output_pair | ||
(value_reg sum) | ||
(value_reg of)))) | ||
|
||
(rule 2 (lower (has_type $I128 (uadd_overflow x y))) | ||
(let ((one XReg (imm $I8 1)) | ||
(low XReg (rv_add (value_regs_get x 0) (value_regs_get y 0))) | ||
;; compute carry. | ||
(carry XReg (rv_sltu low (value_regs_get y 0))) | ||
;; | ||
(high_tmp XReg (rv_add (value_regs_get x 1) (value_regs_get y 1))) | ||
;; add carry. | ||
(high XReg (rv_add high_tmp carry)) | ||
(of XReg (gen_select_xreg (int_compare (IntCC.UnsignedLessThan) high carry) one (zero_reg)))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto here for |
||
(output_pair | ||
(value_regs low high) | ||
(value_reg of)))) | ||
|
||
;;; Rules for `umul_overflow` ;;;;;;;;;;;;; | ||
(rule 0 (lower (has_type (fits_in_32 ty) (umul_overflow x y))) | ||
(let ((tmp_x XReg (zext x)) | ||
(tmp_y XReg (zext y)) | ||
(res XReg (rv_mul tmp_x tmp_y)) | ||
(hi XReg (rv_srli res (imm12_const (ty_bits ty)))) | ||
(one XReg (imm $I8 1)) | ||
(of XReg (gen_select_xreg (cmp_eqz hi) (zero_reg) one))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto here |
||
(output_pair | ||
(value_reg res) | ||
(value_reg of)))) | ||
|
||
(rule 1 (lower (has_type $I64 (umul_overflow x y))) | ||
(let ((hi XReg (rv_mulhu x y)) | ||
(res XReg (rv_mul x y)) | ||
(one XReg (imm $I8 1)) | ||
(of XReg (gen_select_xreg (cmp_eqz hi) (zero_reg) one))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It might be better to use |
||
(output_pair | ||
(value_reg res) | ||
(value_reg of)))) | ||
|
||
(rule 2 (lower (has_type $I128 (umul_overflow x y))) | ||
(let | ||
((x_regs ValueRegs x) | ||
(x_lo XReg (value_regs_get x_regs 0)) | ||
(x_hi XReg (value_regs_get x_regs 1)) | ||
|
||
;; Get the high/low registers for `y`. | ||
(y_regs ValueRegs y) | ||
(y_lo XReg (value_regs_get y_regs 0)) | ||
(y_hi XReg (value_regs_get y_regs 1)) | ||
|
||
;; 128bit mul formula: | ||
;; dst_lo = x_lo * y_lo | ||
;; dst_hi = mulhu(x_lo, y_lo) + (x_lo * y_hi) + (x_hi * y_lo) | ||
;; | ||
;; We can convert the above formula into the following | ||
;; mulhu dst_hi, x_lo, y_lo | ||
;; madd dst_hi, x_lo, y_hi, dst_hi | ||
;; madd dst_hi, x_hi, y_lo, dst_hi | ||
;; madd dst_lo, x_lo, y_lo, zero | ||
(dst_hi1 XReg (rv_mulhu x_lo y_lo)) | ||
(one XReg (imm $I32 1)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This |
||
(dst_hi2 ValueRegs (umadd_overflow64 x_lo y_hi (value_regs_get dst_hi1 0))) | ||
(dst_hi ValueRegs (umadd_overflow64 x_hi y_lo (value_regs_get dst_hi2 0))) | ||
(dst_lo XReg (madd x_lo y_lo (zero_reg))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here we could use |
||
|
||
(of_res1 XReg (rv_or (value_regs_get dst_hi1 1) (value_regs_get dst_hi2 1))) | ||
(of_res XReg (rv_or of_res1 (value_regs_get dst_hi 1)))) | ||
(output_pair | ||
(value_regs dst_lo (value_regs_get dst_hi 0)) | ||
(value_reg of_res)))) | ||
|
||
;;; Rules for `smul_overflow` ;;;;;;;;;;;;; | ||
|
||
(rule 0 (lower (has_type (fits_in_32 ty) (smul_overflow x y))) | ||
(let ((tmp_x XReg (sext x)) | ||
(tmp_y XReg (sext y)) | ||
(res XReg (rv_mul tmp_x tmp_y)) | ||
(hi XReg (rv_srai res (imm12_const (ty_bits ty)))) | ||
(one XReg (imm $I8 1)) | ||
(of XReg (gen_select_xreg (cmp_eqz hi) (zero_reg) one))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This could also be a |
||
(output_pair | ||
(value_reg res) | ||
(value_reg of)))) | ||
|
||
(rule 1 (lower (has_type $I64 (smul_overflow x y))) | ||
(let ((hi XReg (rv_mulh x y)) | ||
(res XReg (rv_mul x y)) | ||
(one XReg (imm $I8 1)) | ||
(of XReg (gen_select_xreg (cmp_eqz hi) (zero_reg) one))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here |
||
(output_pair | ||
(value_reg res) | ||
(value_reg of)))) | ||
|
||
(rule 2 (lower (has_type $I128 (smul_overflow x y))) | ||
(let | ||
((x_regs ValueRegs x) | ||
(x_lo XReg (value_regs_get x_regs 0)) | ||
(x_hi XReg (value_regs_get x_regs 1)) | ||
|
||
;; Get the high/low registers for `y`. | ||
(y_regs ValueRegs y) | ||
(y_lo XReg (value_regs_get y_regs 0)) | ||
(y_hi XReg (value_regs_get y_regs 1)) | ||
|
||
;; 128bit mul formula: | ||
;; dst_lo = x_lo * y_lo | ||
;; dst_hi = mulhu(x_lo, y_lo) + (x_lo * y_hi) + (x_hi * y_lo) | ||
;; | ||
;; We can convert the above formula into the following | ||
;; mulhu dst_hi, x_lo, y_lo | ||
;; madd dst_hi, x_lo, y_hi, dst_hi | ||
;; madd dst_hi, x_hi, y_lo, dst_hi | ||
;; madd dst_lo, x_lo, y_lo, zero | ||
(dst_hi1 XReg (rv_mulhu x_lo y_lo)) | ||
(one XReg (imm $I32 1)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This |
||
(dst_hi2 ValueRegs (smadd_overflow64 x_lo y_hi (value_regs_get dst_hi1 0))) | ||
(dst_hi ValueRegs (smadd_overflow64 x_hi y_lo (value_regs_get dst_hi2 0))) | ||
(dst_lo XReg (madd x_lo y_lo (zero_reg))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of doing madd here, we can just multiply |
||
|
||
(of_res1 XReg (rv_or (value_regs_get dst_hi1 1) (value_regs_get dst_hi2 1))) | ||
(of_res XReg (rv_or of_res1 (value_regs_get dst_hi 1)))) | ||
(output_pair | ||
(value_regs dst_lo (value_regs_get dst_hi 0)) | ||
(value_reg of_res)))) | ||
|
||
;;; Rules for `sadd_overflow` ;;;;;;;;;;;;; | ||
|
||
(rule 0 (lower (has_type (fits_in_32 ty) (sadd_overflow x y))) | ||
(let ((tmp_x XReg (sext x)) | ||
(tmp_y XReg (sext y)) | ||
(one XReg (imm $I8 1)) | ||
(sum XReg (rv_add tmp_x tmp_y)) | ||
(sign_x XReg (rv_srai tmp_x (imm12_const (ty_bits ty)))) | ||
(sign_y XReg (rv_srai tmp_y (imm12_const (ty_bits ty)))) | ||
(sign_sum XReg (rv_srai sum (imm12_const (ty_bits ty)))) | ||
(same_signs XReg (rv_xor sign_x sign_y)) | ||
(diff_sign XReg (rv_xor sign_x sign_sum)) | ||
(overflow XReg (rv_and diff_sign (rv_xnor same_signs (zero_reg))))) | ||
(output_pair | ||
(value_reg sum) | ||
(value_reg overflow)))) | ||
|
||
(rule 1 (lower (has_type $I64 (sadd_overflow x y))) | ||
(let ((sum XReg (rv_add x y)) | ||
(one XReg (imm $I8 1)) | ||
(sign_x XReg (rv_srai x (imm12_const 63))) | ||
(sign_y XReg (rv_srai y (imm12_const 63))) | ||
(sign_sum XReg (rv_srai sum (imm12_const 63))) | ||
(same_signs XReg (rv_xor sign_x sign_y)) | ||
(diff_sign XReg (rv_xor sign_x sign_sum)) | ||
(overflow XReg (rv_and diff_sign (rv_xnor same_signs (zero_reg))))) | ||
(output_pair | ||
(value_reg sum) | ||
(value_reg overflow)))) | ||
|
||
(rule 2 (lower (has_type $I128 (sadd_overflow x y))) | ||
(let ((one XReg (imm $I8 1)) | ||
(low XReg (rv_add (value_regs_get x 0) (value_regs_get y 0))) | ||
(carry XReg (rv_slt low (value_regs_get y 0))) | ||
(high_tmp XReg (rv_add (value_regs_get x 1) (value_regs_get y 1))) | ||
(high XReg (rv_add high_tmp carry)) | ||
(sign_x XReg (rv_srai (value_regs_get x 1) (imm12_const 63))) | ||
(sign_y XReg (rv_srai (value_regs_get y 1) (imm12_const 63))) | ||
(sign_sum XReg (rv_srai high (imm12_const 63))) | ||
(same_signs XReg (rv_xor sign_x sign_y)) | ||
(diff_sign XReg (rv_xor sign_x sign_sum)) | ||
(overflow XReg (rv_and diff_sign (rv_xnor same_signs (zero_reg))))) | ||
(output_pair | ||
(value_regs low high) | ||
(value_reg overflow)))) | ||
|
||
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
;; Base case, simply subtracting things in registers. | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We could replace this with a
snez
instruction