From ae915aea3243e69e354158087a0aa85c4c7ad25a Mon Sep 17 00:00:00 2001 From: David Keller Date: Fri, 24 Sep 2021 17:35:28 +0200 Subject: [PATCH] Int128 compiler-rt methods (Int128 literal support part 1) (#11206) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Johannes Müller --- spec/compiler/codegen/arithmetics_spec.cr | 6 +- .../std/crystal/compiler_rt/divmod128_spec.cr | 93 ++++++++ spec/std/crystal/compiler_rt/mulodi4_spec.cr | 2 +- spec/std/crystal/compiler_rt/mulosi4_spec.cr | 76 +++++++ spec/std/crystal/compiler_rt/muloti4_spec.cr | 151 +++++++++++++ spec/std/int_spec.cr | 68 ++++-- spec/std/uint_spec.cr | 8 + spec/win32_std_spec.cr | 2 + src/crystal/compiler_rt.cr | 3 +- src/crystal/compiler_rt/divmod128.cr | 205 ++++++++++++++++++ src/crystal/compiler_rt/mul.cr | 42 ++++ src/crystal/compiler_rt/mulodi4.cr | 37 ---- 12 files changed, 635 insertions(+), 58 deletions(-) create mode 100644 spec/std/crystal/compiler_rt/divmod128_spec.cr create mode 100644 spec/std/crystal/compiler_rt/mulosi4_spec.cr create mode 100644 spec/std/crystal/compiler_rt/muloti4_spec.cr create mode 100644 src/crystal/compiler_rt/divmod128.cr create mode 100644 src/crystal/compiler_rt/mul.cr delete mode 100644 src/crystal/compiler_rt/mulodi4.cr diff --git a/spec/compiler/codegen/arithmetics_spec.cr b/spec/compiler/codegen/arithmetics_spec.cr index adc1ccd32ca8..9bcf008e6b4d 100644 --- a/spec/compiler/codegen/arithmetics_spec.cr +++ b/spec/compiler/codegen/arithmetics_spec.cr @@ -1,15 +1,13 @@ require "../../spec_helper" -{% if flag?(:darwin) %} +# Int128 and UInt128 specs do not pass on win32 because of missing compiler-rt symbols +{% unless flag?(:win32) %} SupportedInts = [UInt8, UInt16, UInt32, UInt64, UInt128, Int8, Int16, Int32, Int64, Int128] SupportedIntsConversions = { to_i8: Int8, to_i16: Int16, to_i32: Int32, to_i64: Int64, to_i128: Int128, to_u8: UInt8, to_u16: UInt16, to_u32: UInt32, to_u64: UInt64, to_u128: UInt128, } {% else %} - # Skip Int128 and UInt128 on linux platforms due to compiler-rt dependency. - # PreviewOverflowFlags includes compiler_rt flag to support Int64 overflow - # detection in 32 bits platforms. SupportedInts = [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64] SupportedIntsConversions = { to_i8: Int8, to_i16: Int16, to_i32: Int32, to_i64: Int64, diff --git a/spec/std/crystal/compiler_rt/divmod128_spec.cr b/spec/std/crystal/compiler_rt/divmod128_spec.cr new file mode 100644 index 000000000000..5dfd30dc180c --- /dev/null +++ b/spec/std/crystal/compiler_rt/divmod128_spec.cr @@ -0,0 +1,93 @@ +require "spec" + +# TODO: Replace helper methods with literals once possible + +private def make_ti(a : Int128, b : Int128) + (a << 64) + b +end + +private def make_tu(a : UInt128, b : UInt128) + (a << 64) + b +end + +# Ported from: +# - https://github.com/llvm/llvm-project/blob/ce59ccd04023cab3a837da14079ca2dcbfebb70c/compiler-rt/test/builtins/Unit/umodti3_test.c +# - https://github.com/llvm/llvm-project/blob/ce59ccd04023cab3a837da14079ca2dcbfebb70c/compiler-rt/test/builtins/Unit/udivti3_test.c +# - https://github.com/llvm/llvm-project/blob/ce59ccd04023cab3a837da14079ca2dcbfebb70c/compiler-rt/test/builtins/Unit/modti3_test.c +# - https://github.com/llvm/llvm-project/blob/ce59ccd04023cab3a837da14079ca2dcbfebb70c/compiler-rt/test/builtins/Unit/divti3_test.c + +private def test__divti3(a : Int128, b : Int128, expected : Int128, file = __FILE__, line = __LINE__) + it "passes compiler-rt builtins unit tests" do + actual = __divti3(a, b) + actual.should eq(expected), file: file, line: line + end +end + +private def test__modti3(a : Int128, b : Int128, expected : Int128, file = __FILE__, line = __LINE__) + it "passes compiler-rt builtins unit tests" do + actual = __modti3(a, b) + actual.should eq(expected), file: file, line: line + end +end + +private def test__udivti3(a : UInt128, b : UInt128, expected : UInt128, file = __FILE__, line = __LINE__) + it "passes compiler-rt builtins unit tests" do + actual = __udivti3(a, b) + actual.should eq(expected), file: file, line: line + end +end + +private def test__umodti3(a : UInt128, b : UInt128, expected : UInt128, file = __FILE__, line = __LINE__) + it "passes compiler-rt builtins unit tests" do + actual = __umodti3(a, b) + actual.should eq(expected), file: file, line: line + end +end + +describe "__divti3" do + test__divti3(0, 1, 0) + test__divti3(0, -1, 0) + test__divti3(2, 1, 2) + test__divti3(2, -1, -2) + test__divti3(-2, 1, -2) + test__divti3(-2, -1, 2) + test__divti3(make_ti(-9223372036854775808, 0x0), 1, make_ti(-9223372036854775808, 0x0)) + test__divti3(make_ti(-9223372036854775808, 0x0), -1, make_ti(-9223372036854775808, 0x0)) + test__divti3(make_ti(-9223372036854775808, 0x0), -2, make_ti(0x4000000000000000, 0x0)) + test__divti3(make_ti(-9223372036854775808, 0x0), 2, make_ti(-0x4000000000000000, 0x0)) +end + +describe "__modti3" do + test__modti3(0, 1, 0) + test__modti3(0, -1, 0) + + test__modti3(5, 3, 2) + test__modti3(5, -3, 2) + test__modti3(-5, 3, -2) + test__modti3(-5, -3, -2) + + test__modti3(make_ti(-9223372036854775808, 0x0), 1, 0) + test__modti3(make_ti(-9223372036854775808, 0x0), -1, 0) + test__modti3(make_ti(-9223372036854775808, 0x0), 2, 0) + test__modti3(make_ti(-9223372036854775808, 0x0), -2, 0) + test__modti3(make_ti(-9223372036854775808, 0x0), 3, -2) + test__modti3(make_ti(-9223372036854775808, 0x0), -3, -2) +end + +describe "__udivti3" do + test__udivti3(0, 1, 0) + test__udivti3(2, 1, 2) + + test__udivti3(make_tu(0x0, 0x8000000000000000), 1, make_tu(0x0, 0x8000000000000000)) + test__udivti3(make_tu(0x0, 0x8000000000000000), 2, make_tu(0x0, 0x4000000000000000)) + test__udivti3(make_tu(0xffffffffffffffff, 0xffffffffffffffff), 2, make_tu(0x7fffffffffffffff, 0xffffffffffffffff)) +end + +describe "__umodti3" do + test__umodti3(0, 1, 0) + test__umodti3(2, 1, 0) + + test__umodti3(make_tu(0x0, 0x8000000000000000), 1, 0) + test__umodti3(make_tu(0x0, 0x8000000000000000), 2, 0) + test__umodti3(make_tu(0xffffffffffffffff, 0xffffffffffffffff), 2, 1) +end diff --git a/spec/std/crystal/compiler_rt/mulodi4_spec.cr b/spec/std/crystal/compiler_rt/mulodi4_spec.cr index 3e2586cd5d6a..1c413bdb8e77 100644 --- a/spec/std/crystal/compiler_rt/mulodi4_spec.cr +++ b/spec/std/crystal/compiler_rt/mulodi4_spec.cr @@ -1,6 +1,6 @@ require "spec" -# Ported from compiler-rt:test/builtins/Unit/mulodi4_test.c +# Ported from https://github.com/llvm/llvm-project/blob/ce59ccd04023cab3a837da14079ca2dcbfebb70c/compiler-rt/test/builtins/Unit/mulodi4_test.c private def test__mulodi4(a : Int64, b : Int64, expected : Int64, expected_overflow : Int32, file = __FILE__, line = __LINE__) it "passes compiler-rt builtins unit tests" do diff --git a/spec/std/crystal/compiler_rt/mulosi4_spec.cr b/spec/std/crystal/compiler_rt/mulosi4_spec.cr new file mode 100644 index 000000000000..e303ab4759b8 --- /dev/null +++ b/spec/std/crystal/compiler_rt/mulosi4_spec.cr @@ -0,0 +1,76 @@ +require "spec" + +# Ported from https://github.com/llvm/llvm-project/blob/ce59ccd04023cab3a837da14079ca2dcbfebb70c/compiler-rt/test/builtins/Unit/mulosi4_test.c + +private def test__mulosi4(a : Int32, b : Int32, expected : Int32, expected_overflow : Int32, file = __FILE__, line = __LINE__) + it "passes compiler-rt builtins unit tests" do + actual_overflow : Int32 = 0 + actual = __mulosi4(a, b, pointerof(actual_overflow)) + actual_overflow.should eq(expected_overflow), file: file, line: line + if !expected_overflow + actual.should eq(expected), file: file, line: line + end + end +end + +describe "__mulosi4" do + test__mulosi4(0, 0, 0, 0) + test__mulosi4(0, 1, 0, 0) + test__mulosi4(1, 0, 0, 0) + test__mulosi4(0, 10, 0, 0) + test__mulosi4(10, 0, 0, 0) + test__mulosi4(0, 0x1234567, 0, 0) + test__mulosi4(0x1234567, 0, 0, 0) + + test__mulosi4(0, -1, 0, 0) + test__mulosi4(-1, 0, 0, 0) + test__mulosi4(0, -10, 0, 0) + test__mulosi4(-10, 0, 0, 0) + test__mulosi4(0, 0x1234567, 0, 0) + test__mulosi4(0x1234567, 0, 0, 0) + + test__mulosi4(1, 1, 1, 0) + test__mulosi4(1, 10, 10, 0) + test__mulosi4(10, 1, 10, 0) + test__mulosi4(1, 0x1234567, 0x1234567, 0) + test__mulosi4(0x1234567, 1, 0x1234567, 0) + + test__mulosi4(1, -1, -1, 0) + test__mulosi4(1, -10, -10, 0) + test__mulosi4(-10, 1, -10, 0) + test__mulosi4(1, -0x1234567, -0x1234567, 0) + test__mulosi4(-0x1234567, 1, -0x1234567, 0) + + test__mulosi4(0x7FFFFFFF, -2, -0x7fffffff, 1) + test__mulosi4(-2, 0x7FFFFFFF, -0x7fffffff, 1) + test__mulosi4(0x7FFFFFFF, -1, -0x7fffffff, 0) + test__mulosi4(-1, 0x7FFFFFFF, -0x7fffffff, 0) + test__mulosi4(0x7FFFFFFF, 0, 0, 0) + test__mulosi4(0, 0x7FFFFFFF, 0, 0) + test__mulosi4(0x7FFFFFFF, 1, 0x7FFFFFFF, 0) + test__mulosi4(1, 0x7FFFFFFF, 0x7FFFFFFF, 0) + test__mulosi4(0x7FFFFFFF, 2, -0x7fffffff, 1) + test__mulosi4(2, 0x7FFFFFFF, -0x7fffffff, 1) + + test__mulosi4(-0x80000000, -2, -0x80000000, 1) + test__mulosi4(-2, -0x80000000, -0x80000000, 1) + test__mulosi4(-0x80000000, -1, -0x80000000, 1) + test__mulosi4(-1, -0x80000000, -0x80000000, 1) + test__mulosi4(-0x80000000, 0, 0, 0) + test__mulosi4(0, -0x80000000, 0, 0) + test__mulosi4(-0x80000000, 1, -0x80000000, 0) + test__mulosi4(1, -0x80000000, -0x80000000, 0) + test__mulosi4(-0x80000000, 2, -0x80000000, 1) + test__mulosi4(2, -0x80000000, -0x80000000, 1) + + test__mulosi4(-0x7fffffff, -2, -0x7fffffff, 1) + test__mulosi4(-2, -0x7fffffff, -0x7fffffff, 1) + test__mulosi4(-0x7fffffff, -1, 0x7FFFFFFF, 0) + test__mulosi4(-1, -0x7fffffff, 0x7FFFFFFF, 0) + test__mulosi4(-0x7fffffff, 0, 0, 0) + test__mulosi4(0, -0x7fffffff, 0, 0) + test__mulosi4(-0x7fffffff, 1, -0x7fffffff, 0) + test__mulosi4(1, -0x7fffffff, -0x7fffffff, 0) + test__mulosi4(-0x7fffffff, 2, -0x80000000, 1) + test__mulosi4(2, -0x7fffffff, -0x80000000, 1) +end diff --git a/spec/std/crystal/compiler_rt/muloti4_spec.cr b/spec/std/crystal/compiler_rt/muloti4_spec.cr new file mode 100644 index 000000000000..960193dbd6c6 --- /dev/null +++ b/spec/std/crystal/compiler_rt/muloti4_spec.cr @@ -0,0 +1,151 @@ +require "spec" + +# Ported from https://github.com/llvm/llvm-project/blob/ce59ccd04023cab3a837da14079ca2dcbfebb70c/compiler-rt/test/builtins/Unit/muloti4_test.c + +private def test__muloti4(a : Int128, b : Int128, expected : Int128, expected_overflow : Int32, file = __FILE__, line = __LINE__) + it "passes compiler-rt builtins unit tests" do + actual_overflow : Int32 = 0 + actual = __muloti4(a, b, pointerof(actual_overflow)) + actual_overflow.should eq(expected_overflow), file: file, line: line + if !expected_overflow + actual.should eq(expected), file: file, line: line + end + end +end + +# TODO: Replace helper methods with literals once possible + +private def make_ti(a : Int128, b : Int128) + (a << 64) + b +end + +describe "__muloti4" do + test__muloti4(0, 0, 0, 0) + test__muloti4(0, 1, 0, 0) + test__muloti4(1, 0, 0, 0) + test__muloti4(0, 10, 0, 0) + test__muloti4(10, 0, 0, 0) + test__muloti4(0, 81985529216486895, 0, 0) + test__muloti4(81985529216486895, 0, 0, 0) + test__muloti4(0, -1, 0, 0) + test__muloti4(-1, 0, 0, 0) + test__muloti4(0, -10, 0, 0) + test__muloti4(-10, 0, 0, 0) + test__muloti4(0, -81985529216486895, 0, 0) + test__muloti4(-81985529216486895, 0, 0, 0) + test__muloti4(1, 1, 1, 0) + test__muloti4(1, 10, 10, 0) + test__muloti4(10, 1, 10, 0) + test__muloti4(1, 81985529216486895, 81985529216486895, 0) + test__muloti4(81985529216486895, 1, 81985529216486895, 0) + test__muloti4(1, -1, -1, 0) + test__muloti4(1, -10, -10, 0) + test__muloti4(-10, 1, -10, 0) + test__muloti4(1, -81985529216486895, -81985529216486895, 0) + test__muloti4(-81985529216486895, 1, -81985529216486895, 0) + test__muloti4(3037000499, 3037000499, 9223372030926249001, 0) + test__muloti4(-3037000499, 3037000499, -9223372030926249001, 0) + test__muloti4(3037000499, -3037000499, -9223372030926249001, 0) + test__muloti4(-3037000499, -3037000499, 9223372030926249001, 0) + test__muloti4(4398046511103, 2097152, 9223372036852678656, 0) + test__muloti4(-4398046511103, 2097152, -9223372036852678656, 0) + test__muloti4(4398046511103, -2097152, -9223372036852678656, 0) + test__muloti4(-4398046511103, -2097152, 9223372036852678656, 0) + test__muloti4(2097152, 4398046511103, 9223372036852678656, 0) + test__muloti4(-2097152, 4398046511103, -9223372036852678656, 0) + test__muloti4(2097152, -4398046511103, -9223372036852678656, 0) + test__muloti4(-2097152, -4398046511103, 9223372036852678656, 0) + test__muloti4(make_ti(0x00000000000000B5, 0x04F333F9DE5BE000), + make_ti(0x0000000000000000, 0x00B504F333F9DE5B), + make_ti(0x7FFFFFFFFFFFF328, 0xDF915DA296E8A000), 0) + test__muloti4(make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), + -2, + make_ti(0x8000000000000000, 0x0000000000000001), 1) + test__muloti4(-2, + make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), + make_ti(0x8000000000000000, 0x0000000000000001), 1) + test__muloti4(make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), + -1, + make_ti(0x8000000000000000, 0x0000000000000001), 0) + test__muloti4(-1, + make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), + make_ti(0x8000000000000000, 0x0000000000000001), 0) + test__muloti4(make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), + 0, + 0, 0) + test__muloti4(0, + make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), + 0, 0) + test__muloti4(make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), + 1, + make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), 0) + test__muloti4(1, + make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), + make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), 0) + test__muloti4(make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), + 2, + make_ti(0x8000000000000000, 0x0000000000000001), 1) + test__muloti4(2, + make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), + make_ti(0x8000000000000000, 0x0000000000000001), 1) + test__muloti4(make_ti(0x8000000000000000, 0x0000000000000000), + -2, + make_ti(0x8000000000000000, 0x0000000000000000), 1) + test__muloti4(-2, + make_ti(0x8000000000000000, 0x0000000000000000), + make_ti(0x8000000000000000, 0x0000000000000000), 1) + test__muloti4(make_ti(0x8000000000000000, 0x0000000000000000), + -1, + make_ti(0x8000000000000000, 0x0000000000000000), 1) + test__muloti4(-1, + make_ti(0x8000000000000000, 0x0000000000000000), + make_ti(0x8000000000000000, 0x0000000000000000), 1) + test__muloti4(make_ti(0x8000000000000000, 0x0000000000000000), + 0, + 0, 0) + test__muloti4(0, + make_ti(0x8000000000000000, 0x0000000000000000), + 0, 0) + test__muloti4(make_ti(0x8000000000000000, 0x0000000000000000), + 1, + make_ti(0x8000000000000000, 0x0000000000000000), 0) + test__muloti4(1, + make_ti(0x8000000000000000, 0x0000000000000000), + make_ti(0x8000000000000000, 0x0000000000000000), 0) + test__muloti4(make_ti(0x8000000000000000, 0x0000000000000000), + 2, + make_ti(0x8000000000000000, 0x0000000000000000), 1) + test__muloti4(2, + make_ti(0x8000000000000000, 0x0000000000000000), + make_ti(0x8000000000000000, 0x0000000000000000), 1) + test__muloti4(make_ti(0x8000000000000000, 0x0000000000000001), + -2, + make_ti(0x8000000000000000, 0x0000000000000001), 1) + test__muloti4(-2, + make_ti(0x8000000000000000, 0x0000000000000001), + make_ti(0x8000000000000000, 0x0000000000000001), 1) + test__muloti4(make_ti(0x8000000000000000, 0x0000000000000001), + -1, + make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), 0) + test__muloti4(-1, + make_ti(0x8000000000000000, 0x0000000000000001), + make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), 0) + test__muloti4(make_ti(0x8000000000000000, 0x0000000000000001), + 0, + 0, 0) + test__muloti4(0, + make_ti(0x8000000000000000, 0x0000000000000001), + 0, 0) + test__muloti4(make_ti(0x8000000000000000, 0x0000000000000001), + 1, + make_ti(0x8000000000000000, 0x0000000000000001), 0) + test__muloti4(1, + make_ti(0x8000000000000000, 0x0000000000000001), + make_ti(0x8000000000000000, 0x0000000000000001), 0) + test__muloti4(make_ti(0x8000000000000000, 0x0000000000000001), + 2, + make_ti(0x8000000000000000, 0x0000000000000000), 1) + test__muloti4(2, + make_ti(0x8000000000000000, 0x0000000000000001), + make_ti(0x8000000000000000, 0x0000000000000000), 1) +end diff --git a/spec/std/int_spec.cr b/spec/std/int_spec.cr index e2aed420ed7d..54d0caf5a228 100644 --- a/spec/std/int_spec.cr +++ b/spec/std/int_spec.cr @@ -187,7 +187,14 @@ describe "Int" do it_converts_to_s 255_u8, "255" it_converts_to_s 65535_u16, "65535" it_converts_to_s 4294967295_u32, "4294967295" + it_converts_to_s 18446744073709551615_u64, "18446744073709551615" + + {% unless flag?(:win32) %} + it_converts_to_s UInt128::MAX, "340282366920938463463374607431768211455" + it_converts_to_s Int128::MAX, "170141183460469231731687303715884105727" + it_converts_to_s Int128::MIN, "-170141183460469231731687303715884105728" + {% end %} end context "base and upcase parameters" do @@ -468,6 +475,9 @@ describe "Int" do Int64.new(1).should be_a(Int64) Int64.new(1).should eq(1) + Int128.new(1).should be_a(Int128) + Int128.new(1).should eq(1) + UInt8.new(1).should be_a(UInt8) UInt8.new(1).should eq(1) @@ -479,6 +489,9 @@ describe "Int" do UInt64.new(1).should be_a(UInt64) UInt64.new(1).should eq(1) + + UInt128.new(1).should be_a(UInt128) + UInt128.new(1).should eq(1) end end @@ -507,6 +520,10 @@ describe "Int" do (UInt8::MIN / -1).should eq(0) end + + pending_win32 "divides Int128::MIN by -1" do + (Int128::MIN / -1).should eq(-(Int128::MIN.to_f64)) + end end describe "floor division //" do @@ -518,6 +535,16 @@ describe "Int" do {% end %} end + # Missing symbols: __floattidf, __floatuntidf, __fixdfti, __fixsfti, __fixunsdfti, __fixunssfti, __floatuntisf, __floattisf + # These symbols are all required to convert U/Int128s to Floats + pending_win32 "preserves type of lhs (128-bit)" do + {% for type in [UInt128, Int128] %} + ({{type}}.new(7) // 2).should be_a({{type}}) + ({{type}}.new(7) // 2.0).should be_a({{type}}) + ({{type}}.new(7) // 2.0_f32).should be_a({{type}}) + {% end %} + end + it "divides negative numbers" do (7 // 2).should eq(3) (-7 // 2).should eq(-4) @@ -558,6 +585,7 @@ describe "Int" do expect_raises(ArgumentError) { Int16::MIN // -1 } expect_raises(ArgumentError) { Int32::MIN // -1 } expect_raises(ArgumentError) { Int64::MIN // -1 } + expect_raises(ArgumentError) { Int128::MIN // -1 } (UInt8::MIN // -1).should eq(0) end @@ -583,8 +611,8 @@ describe "Int" do end it "returns 0 when doing IntN::MIN % -1 (#8306)" do - {% for n in [8, 16, 32, 64] %} - (Int{{n}}::MIN % -1_i{{n}}).should eq(0) + {% for n in [8, 16, 32, 64, 128] %} + (Int{{n}}::MIN % -1.to_i{{n}}).should eq(0) {% end %} end @@ -597,8 +625,8 @@ describe "Int" do end it "returns 0 when doing IntN::MIN.remainder(-1) (#8306)" do - {% for n in [8, 16, 32, 64] %} - (Int{{n}}::MIN.remainder(-1_i{{n}})).should eq(0) + {% for n in [8, 16, 32, 64, 128] %} + (Int{{n}}::MIN.remainder(-1.to_i{{n}})).should eq(0) {% end %} end @@ -734,27 +762,31 @@ describe "Int" do it { 5_i64.popcount.should eq(2) } it { 9223372036854775807_i64.popcount.should eq(63) } it { 18446744073709551615_u64.popcount.should eq(64) } + + it { 0_i128.popcount.should eq(0) } + it { Int128::MAX.popcount.should eq(127) } + it { UInt128::MAX.popcount.should eq(128) } end describe "#leading_zeros_count" do - {% for width in %w(8 16 32 64).map(&.id) %} - it { -1_i{{width}}.leading_zeros_count.should eq(0) } - it { 0_i{{width}}.leading_zeros_count.should eq({{width}}) } - it { 0_u{{width}}.leading_zeros_count.should eq({{width}}) } + {% for width in %w(8 16 32 64 128).map(&.id) %} + it { -1.to_i{{width}}.leading_zeros_count.should eq(0) } + it { 0.to_i{{width}}.leading_zeros_count.should eq({{width}}) } + it { 0.to_u{{width}}.leading_zeros_count.should eq({{width}}) } {% end %} end describe "#trailing_zeros_count" do - {% for width in %w(8 16 32 64).map(&.id) %} - it { -2_i{{width}}.trailing_zeros_count.should eq(1) } - it { 2_i{{width}}.trailing_zeros_count.should eq(1) } - it { 2_u{{width}}.trailing_zeros_count.should eq(1) } + {% for width in %w(8 16 32 64 128).map(&.id) %} + it { -2.to_i{{width}}.trailing_zeros_count.should eq(1) } + it { 2.to_i{{width}}.trailing_zeros_count.should eq(1) } + it { 2.to_u{{width}}.trailing_zeros_count.should eq(1) } {% end %} end pending_win32 "compares signed vs. unsigned integers" do - signed_ints = [Int8::MAX, Int16::MAX, Int32::MAX, Int64::MAX, Int8::MIN, Int16::MIN, Int32::MIN, Int64::MIN, 0_i8, 0_i16, 0_i32, 0_i64] - unsigned_ints = [UInt8::MAX, UInt16::MAX, UInt32::MAX, UInt64::MAX, 0_u8, 0_u16, 0_u32, 0_u64] + signed_ints = [Int8::MAX, Int16::MAX, Int32::MAX, Int64::MAX, Int128::MAX, Int8::MIN, Int16::MIN, Int32::MIN, Int64::MIN, Int128::MIN, 0_i8, 0_i16, 0_i32, 0_i64, 0_i128] + unsigned_ints = [UInt8::MAX, UInt16::MAX, UInt32::MAX, UInt64::MAX, UInt128::MAX, 0_u8, 0_u16, 0_u32, 0_u64, 0_u128] big_signed_ints = signed_ints.map &.to_big_i big_unsigned_ints = unsigned_ints.map &.to_big_i @@ -781,7 +813,7 @@ describe "Int" do end it "clones" do - [1_u8, 2_u16, 3_u32, 4_u64, 5_i8, 6_i16, 7_i32, 8_i64].each do |value| + [1_u8, 2_u16, 3_u32, 4_u64, 5.to_u128, 6_i8, 7_i16, 8_i32, 9_i64, 10.to_i128].each do |value| value.clone.should eq(value) end end @@ -841,6 +873,12 @@ describe "Int" do UInt64::MAX.digits.should eq(UInt64::MAX.to_s.chars.map(&.to_i).reverse) end + # Missing symbol __floatuntidf on windows + pending_win32 "works for u/int128 maximums" do + Int128::MAX.digits.should eq(Int128::MAX.to_s.chars.map(&.to_i).reverse) + UInt128::MAX.digits.should eq(UInt128::MAX.to_s.chars.map(&.to_i).reverse) + end + it "works for non-Int32" do digits = 123_i64.digits digits.should eq([3, 2, 1]) diff --git a/spec/std/uint_spec.cr b/spec/std/uint_spec.cr index 7de658f63b70..bd69c6ed3974 100644 --- a/spec/std/uint_spec.cr +++ b/spec/std/uint_spec.cr @@ -48,6 +48,14 @@ describe "UInt" do x = &-18446744073709551615_u64 x.should eq(1_u64) x.should be_a(UInt64) + + x = &-1_u128 + x.should eq(UInt128::MAX) # TODO: Change to literal once supported + x.should be_a(UInt128) + + x = &-(UInt128::MAX) # TODO: Change to literal once supported + x.should eq(1_u128) + x.should be_a(UInt128) end end end diff --git a/spec/win32_std_spec.cr b/spec/win32_std_spec.cr index e311e87fe8f3..1f0898f6518d 100644 --- a/spec/win32_std_spec.cr +++ b/spec/win32_std_spec.cr @@ -34,7 +34,9 @@ require "./std/crypto/bcrypt/password_spec.cr" require "./std/crypto/bcrypt_spec.cr" require "./std/crypto/blowfish_spec.cr" require "./std/crypto/subtle_spec.cr" +# require "./std/crystal/compiler_rt/muloti4_spec.cr" (failed to run) require "./std/crystal/compiler_rt/mulodi4_spec.cr" +require "./std/crystal/compiler_rt/mulosi4_spec.cr" require "./std/crystal/digest/md5_spec.cr" require "./std/crystal/digest/sha1_spec.cr" require "./std/crystal/hasher_spec.cr" diff --git a/src/crystal/compiler_rt.cr b/src/crystal/compiler_rt.cr index d52e22ba6ebd..e3a557ca595d 100644 --- a/src/crystal/compiler_rt.cr +++ b/src/crystal/compiler_rt.cr @@ -1,3 +1,4 @@ {% skip_file if flag?(:skip_crystal_compiler_rt) %} -require "./compiler_rt/mulodi4.cr" +require "./compiler_rt/mul.cr" +require "./compiler_rt/divmod128.cr" diff --git a/src/crystal/compiler_rt/divmod128.cr b/src/crystal/compiler_rt/divmod128.cr new file mode 100644 index 000000000000..91f59f4664c6 --- /dev/null +++ b/src/crystal/compiler_rt/divmod128.cr @@ -0,0 +1,205 @@ +# This file includes an implementation of (U)Int128 modulo/division operations + +# :nodoc: +fun __divti3(a : Int128, b : Int128) : Int128 + # Ported from https://github.com/llvm/llvm-project/blob/ce59ccd04023cab3a837da14079ca2dcbfebb70c/compiler-rt/lib/builtins/int_div_impl.inc + + s_a = a >> 127 # s_a = a < 0 ? -1 : 0 + s_b = b >> 127 # s_b = b < 0 ? -1 : 0 + a = (a ^ s_a) &- s_a # negate if s_a == -1 + b = (b ^ s_b) &- s_b # negate if s_b == -1 + s_a ^= s_b # sign of quotient + quo, _ = _u128_div_rem(a.to_u128!, b.to_u128!) + ((quo ^ s_a) &- s_a).to_i128! # negate if s_a == -1 +end + +# :nodoc: +fun __modti3(a : Int128, b : Int128) : Int128 + # Ported from https://github.com/llvm/llvm-project/blob/ce59ccd04023cab3a837da14079ca2dcbfebb70c/compiler-rt/lib/builtins/int_div_impl.inc + + s = b >> 127 # s = b < 0 ? -1 : 0 + b = (b ^ s) &- s # negate if s == -1 + s = a >> 127 # s = a < 0 ? -1 : 0 + a = (a ^ s) &- s # negate if s == -1 + _, rem = _u128_div_rem(a.to_u128!, b.to_u128!) + (rem.to_i128! ^ s) &- s # negate if s == -1 +end + +# :nodoc: +fun __udivti3(a : UInt128, b : UInt128) : UInt128 + # Ported from https://github.com/llvm/llvm-project/blob/ce59ccd04023cab3a837da14079ca2dcbfebb70c/compiler-rt/lib/builtins/int_div_impl.inc + + quo, _ = _u128_div_rem(a, b) + quo +end + +# :nodoc: +fun __umodti3(a : UInt128, b : UInt128) : UInt128 + # Ported from https://github.com/llvm/llvm-project/blob/ce59ccd04023cab3a837da14079ca2dcbfebb70c/compiler-rt/lib/builtins/int_div_impl.inc + + _, rem = _u128_div_rem(a, b) + rem +end + +# :nodoc: +def _carrying_mul(lhs : UInt64, rhs : UInt64) : Tuple(UInt64, UInt64) + # Ported from https://github.com/rust-lang/compiler-builtins/blob/2be2bc086bd9b3c0fc8eb8d2dc7df025e6ffd318/src/int/specialized_div_rem/trifecta.rs + + tmp = lhs.to_u128! &* rhs.to_u128! + {tmp.to_u64!, (tmp >> 64).to_u64!} +end + +# :nodoc: +def _carrying_mul_add(lhs : UInt64, mul : UInt64, add : UInt64) : Tuple(UInt64, UInt64) + # Ported from https://github.com/rust-lang/compiler-builtins/blob/2be2bc086bd9b3c0fc8eb8d2dc7df025e6ffd318/src/int/specialized_div_rem/trifecta.rs + + tmp = lhs.to_u128! + tmp &*= mul.to_u128! + tmp &+= add.to_u128! + {tmp.to_u64!, (tmp >> 64).to_u64!} +end + +# :nodoc: +def _u128_div_rem(duo : UInt128, div : UInt128) : Tuple(UInt128, UInt128) + # Ported from https://github.com/rust-lang/compiler-builtins/blob/2be2bc086bd9b3c0fc8eb8d2dc7df025e6ffd318/src/int/specialized_div_rem/trifecta.rs + + # Rust also has another algorithm for 128-bit integer division + # for microarchitectures that have slow hardware integer division. + + # This algorithm is called the trifecta algorithm because it uses three main algorithms: + # - short division for small divisors + # - the two possibility algorithm for large divisors + # - an undersubtracting long division algorithm for intermediate cases + + div_lz = div.leading_zeros_count + duo_lz = duo.leading_zeros_count + + if div_lz <= duo_lz + # Resulting quotient is 0 or 1 at this point + # The highest set bit of `duo` needs to be at least one place higher than `div` for the quotient to be more than one. + if duo >= div + return {1_u128, duo - div} + else + return {0_u128, duo} + end + end + + # Use 64-bit integer division if possible + if duo_lz >= 64 + # duo fits in a 64-bit integer + # Because of the previous branch (div_lz <= duo_lz), div will also fit in an 64-bit integer + quo_local1 = duo.to_u64! // div.to_u64! + rem_local1 = duo.to_u64! % div.to_u64! + return {quo_local1.to_u128!, rem_local1.to_u128!} + end + + # Short division branch + if div_lz >= 96 + duo_hi = (duo >> 64).to_u64! + div_0 = div.to_u32!.to_u64! + quo_hi = duo_hi // div_0 + rem_3 = duo_hi % div_0 + + duo_mid = (duo >> 32).to_u32!.to_u64! | (rem_3 << 32) + quo_1 = duo_mid // div_0 + rem_2 = duo_mid % div_0 + + duo_lo = duo.to_u32!.to_u64! | (rem_2 << 32) + quo_0 = duo_lo // div_0 + rem_1 = duo_lo % div_0 + + return {quo_0.to_u128! | (quo_1.to_u128! << 32) | (quo_hi.to_u128! << 64), rem_1.to_u128!} + end + + # Relative leading significant bits (cannot overflow because of above branches) + lz_diff = div_lz - duo_lz + + if lz_diff < 32 + # Two possibility division algorithm + + # The most significant bits of duo and div are within 32 bits of each other. + # If we take the n most significant bits of duo and divide them by the corresponding bits in div, it produces the quotient value quo. + # It happens that quo or quo - 1 will always be the correct quotient for the whole number. + + shift = 64 - duo_lz + duo_sig_n = (duo >> shift).to_u64! + div_sig_n = (div >> shift).to_u64! + quo_local2 = duo_sig_n // div_sig_n + + # The larger quo can overflow, so a manual carrying mul is used with manual overflow checking. + div_lo = div.to_u64! + div_hi = (div >> 64).to_u64! + tmp_lo, carry = _carrying_mul(quo_local2, div_lo) + tmp_hi, overflow = _carrying_mul_add(quo_local2, div_hi, carry) + tmp = tmp_lo.to_u128! | (tmp_hi.to_u128! << 64) + if (overflow != 0) || (duo < tmp) + # In `duo &+ div &- tmp`, both the subtraction and addition can overflow, but the result is always a correct positive number. + return {(quo_local2 - 1).to_u128!, duo &+ div &- tmp} + else + return {quo_local2.to_u128!, duo - tmp} + end + end + + # Undersubtracting long division algorithm. + + quo : UInt128 = 0 + div_extra = 96 - div_lz # Number of lesser significant bits that aren't part of div_sig_32 + div_sig_32 = (div >> div_extra).to_u32! # Most significant 32 bits of div + div_sig_32_add1 = div_sig_32.to_u64! + 1 # This must be a UInt64 because this can overflow + + loop do + duo_extra = 64 - duo_lz # Number of lesser significant bits that aren't part of duo_sig_n + duo_sig_n = (duo >> duo_extra).to_u64! # Most significant 64 bits of duo + + # The two possibility algorithm requires that the difference between most significant bits is less than 32 + if div_extra <= duo_extra + # Undersubtracting long division step + quo_part = (duo_sig_n // div_sig_32_add1).to_u128! + extra_shl = duo_extra - div_extra + + # Addition to the quotient + quo += (quo_part << extra_shl) + + # Subtraction from duo. At least 31 bits are cleared from duo here + duo -= ((div &* quo_part) << extra_shl) + else + # Two possibility algorithm + + shift = 64 - duo_lz + duo_sig_n = (duo >> shift).to_u64! + div_sig_n = (div >> shift).to_u64! + quo_part = duo_sig_n // div_sig_n + div_lo = div.to_u64! + div_hi = (div >> 64).to_u64! + + tmp_lo, carry = _carrying_mul(quo_part, div_lo) + # The undersubtracting long division algorithm has already run once, so overflow beyond 128 bits is impossible + tmp_hi, _ = _carrying_mul_add(quo_part, div_hi, carry) + tmp = tmp_lo.to_u128! | (tmp_hi.to_u128! << 64) + + if duo < tmp + return {quo + (quo_part - 1), duo &+ div &- tmp} + else + return {quo + quo_part, duo - tmp} + end + end + + duo_lz = duo.leading_zeros_count + + if div_lz <= duo_lz + # Quotient can have 0 or 1 added to it + if div <= duo + return {quo + 1, duo - div} + else + return {quo, duo} + end + end + + # This can only happen if div_sd < 64 + if 64 <= duo_lz + quo_local3 = duo.to_u64! // div.to_u64! + rem_local2 = duo.to_u64! % div.to_u64! + return {quo + quo_local3, rem_local2.to_u128!} + end + end +end diff --git a/src/crystal/compiler_rt/mul.cr b/src/crystal/compiler_rt/mul.cr new file mode 100644 index 000000000000..1d4604225fde --- /dev/null +++ b/src/crystal/compiler_rt/mul.cr @@ -0,0 +1,42 @@ +# :nodoc: +private macro __mul_impl(name, type, n) + # Ported from https://github.com/llvm/llvm-project/blob/ce59ccd04023cab3a837da14079ca2dcbfebb70c/compiler-rt/lib/builtins/int_mulo_impl.inc + # :nodoc: + fun {{name}}(a : {{type}}, b : {{type}}, overflow : Int32*) : {{type}} + overflow.value = 0 + result = a &* b + if a == {{type}}::MIN + if b != 0 && b != 1 + overflow.value = 1 + end + return result + end + if b == {{type}}::MIN + if a != 0 && a != 1 + overflow.value = 1 + end + return result + end + sa = a >> {{n - 1}} + abs_a = (a ^ sa) &- sa + sb = b >> {{n - 1}} + abs_b = (b ^ sb) &- sb + if abs_a < 2 || abs_b < 2 + return result + end + if sa == sb + if abs_a > ({{type}}::MAX // abs_b) + overflow.value = 1 + end + else + if abs_a > ({{type}}::MIN // ({{type}}.new(0) &- abs_b)) + overflow.value = 1 + end + end + return result + end +end + +__mul_impl(__mulosi4, Int32, 32) +__mul_impl(__mulodi4, Int64, 64) +__mul_impl(__muloti4, Int128, 128) diff --git a/src/crystal/compiler_rt/mulodi4.cr b/src/crystal/compiler_rt/mulodi4.cr deleted file mode 100644 index e853b89b13aa..000000000000 --- a/src/crystal/compiler_rt/mulodi4.cr +++ /dev/null @@ -1,37 +0,0 @@ -# :nodoc: -fun __mulodi4(a : Int64, b : Int64, overflow : Int32*) : Int64 - n = 64 - min = Int64::MIN - max = Int64::MAX - overflow.value = 0 - result = a &* b - if a == min - if b != 0 && b != 1 - overflow.value = 1 - end - return result - end - if b == min - if a != 0 && a != 1 - overflow.value = 1 - end - return result - end - sa = a >> (n &- 1) - abs_a = (a ^ sa) &- sa - sb = b >> (n &- 1) - abs_b = (b ^ sb) &- sb - if abs_a < 2 || abs_b < 2 - return result - end - if sa == sb - if abs_a > max // abs_b - overflow.value = 1 - end - else - if abs_a > min // (0i64 &- abs_b) - overflow.value = 1 - end - end - return result -end