Improve taylor series calculation of exp and sin by binary splitting method

tompng · tompng · commit c44cb2c08fd8 · 2025-11-05T15:40:25.000+09:00
exp and sin becomes orders of magnitude faster.
To make log and atan also fast, log and atan now depends on exp and sin.
log(x): solve exp(y)-x=0 by Newton's method
atan(x): solve tan(y)-x=0 by Newton's method
diff --git a/lib/bigdecimal.rb b/lib/bigdecimal.rb
@@ -60,6 +60,46 @@ def self.nan_computation_result # :nodoc:
       end
       BigDecimal::NAN
     end
+
+    # Iteration for Newton's method with increasing precision
+    def self.newton_loop(prec, initial_precision: BigDecimal.double_fig / 2, safe_margin: 2) # :nodoc:
+      precs = []
+      while prec > initial_precision
+        precs << prec
+        prec = (precs.last + 1) / 2 + safe_margin
+      end
+      precs.reverse_each do |p|
+        yield p
+      end
+    end
+
+    # Calculates Math.log(x.to_f) considering large or small exponent
+    def self.float_log(x) # :nodoc:
+      Math.log(x._decimal_shift(-x.exponent).to_f) + x.exponent * Math.log(10)
+    end
+
+    # Calculating Taylor series sum using binary splitting method
+    # Calculates f(x) = (x/d0)*(1+(x/d1)*(1+(x/d2)*(1+(x/d3)*(1+...))))
+    # x.n_significant_digits or ds.size must be small to be performant.
+    def self.taylor_sum_binary_splitting(x, ds, prec) # :nodoc:
+      fs = ds.map {|d| [0, BigDecimal(d)] }
+      # fs = [[a0, a1], [b0, b1], [c0, c1], ...]
+      # f(x) = a0/a1+(x/a1)*(1+b0/b1+(x/b1)*(1+c0/c1+(x/c1)*(1+d0/d1+(x/d1)*(1+...))))
+      while fs.size > 1
+        # Merge two adjacent fractions
+        # from: (1 + a0/a1 + x/a1 * (1 + b0/b1 + x/b1 * rest))
+        # to:   (1 + (a0*b1+x*(b0+b1))/(a1*b1) + (x*x)/(a1*b1) * rest)
+        xn = xn ? xn.mult(xn, prec) : x
+        fs = fs.each_slice(2).map do |(a, b)|
+          b ||= [0, BigDecimal(1)._decimal_shift([xn.exponent, 0].max + 2)]
+          [
+            (a[0] * b[1]).add(xn * (b[0] + b[1]), prec),
+            a[1].mult(b[1], prec)
+          ]
+        end
+      end
+      BigDecimal(fs[0][0]).div(fs[0][1], prec)
+    end
   end
 
   #  call-seq:
@@ -226,9 +266,7 @@ def sqrt(prec)
     ex = exponent / 2
     x = _decimal_shift(-2 * ex)
     y = BigDecimal(Math.sqrt(x.to_f), 0)
-    precs = [prec + BigDecimal.double_fig]
-    precs << 2 + precs.last / 2 while precs.last > BigDecimal.double_fig
-    precs.reverse_each do |p|
+    Internal.newton_loop(prec + BigDecimal.double_fig) do |p|
       y = y.add(x.div(y, p), p).div(2, p)
     end
     y._decimal_shift(ex).mult(1, prec)
@@ -264,59 +302,32 @@ def log(x, prec)
     return BigDecimal(0) if x == 1
 
     prec2 = prec + BigDecimal.double_fig
-    BigDecimal.save_limit do
-      BigDecimal.limit(0)
-      if x > 10 || x < 0.1
-        log10 = log(BigDecimal(10), prec2)
-        exponent = x.exponent
-        x = x._decimal_shift(-exponent)
-        if x < 0.3
-          x *= 10
-          exponent -= 1
-        end
-        return (log10 * exponent).add(log(x, prec2), prec)
-      end
-
-      x_minus_one_exponent = (x - 1).exponent
 
-      # log(x) = log(sqrt(sqrt(sqrt(sqrt(x))))) * 2**sqrt_steps
-      sqrt_steps = [Integer.sqrt(prec2) + 3 * x_minus_one_exponent, 0].max
-
-      lg2 = 0.3010299956639812
-      sqrt_prec = prec2 + [-x_minus_one_exponent, 0].max + (sqrt_steps * lg2).ceil
-
-      sqrt_steps.times do
-        x = x.sqrt(sqrt_prec)
-      end
-
-      # Taylor series for log(x) around 1
-      # log(x) = -log((1 + X) / (1 - X)) where X = (x - 1) / (x + 1)
-      # log(x) = 2 * (X + X**3 / 3 + X**5 / 5 + X**7 / 7 + ...)
-      x = (x - 1).div(x + 1, sqrt_prec)
-      y = x
-      x2 = x.mult(x, prec2)
-      1.step do |i|
-        n = prec2 + x.exponent - y.exponent + x2.exponent
-        break if n <= 0 || x.zero?
-        x = x.mult(x2.round(n - x2.exponent), n)
-        y = y.add(x.div(2 * i + 1, n), prec2)
-      end
+    if x < 0.1 || x > 10
+      exponent = (3 * x).exponent - 1
+      x = x._decimal_shift(-exponent)
+      return log(10, prec2).mult(exponent, prec2).add(log(x, prec2), prec)
+    end
 
-      y.mult(2 ** (sqrt_steps + 1), prec)
+    # Solve exp(y) - x = 0 with Newton's method
+    # Repeat: y -= (exp(y) - x) / exp(y)
+    y = BigDecimal(BigDecimal::Internal.float_log(x), 0)
+    exp_additional_prec = [-(x - 1).exponent, 0].max
+    BigDecimal::Internal.newton_loop(prec2) do |p|
+      expy = exp(y, p + exp_additional_prec)
+      y = y.sub(expy.sub(x, p).div(expy, p), p)
     end
+    y.mult(1, prec)
   end
 
-  # Taylor series for exp(x) around 0
-  private_class_method def _exp_taylor(x, prec) # :nodoc:
-    xn = BigDecimal(1)
-    y = BigDecimal(1)
-    1.step do |i|
-      n = prec + xn.exponent
-      break if n <= 0 || xn.zero?
-      xn = xn.mult(x, n).div(i, n)
-      y = y.add(xn, prec)
-    end
-    y
+  private_class_method def _exp_binary_splitting(x, prec) # :nodoc:
+    return BigDecimal(1) if x.zero?
+    # Find k that satisfies x**k / k! < 10**(-prec)
+    log10 = Math.log(10)
+    logx = BigDecimal::Internal.float_log(x.abs)
+    step = (1..).bsearch { |k| Math.lgamma(k + 1)[0] - k * logx > prec * log10 }
+    # exp(x)-1 = x*(1+x/2*(1+x/3*(1+x/4*(1+x/5*(1+...)))))
+    1 + BigDecimal::Internal.taylor_sum_binary_splitting(x, [*1..step], prec)
   end
 
   # call-seq:
@@ -341,11 +352,21 @@ def exp(x, prec)
     prec2 = prec + BigDecimal.double_fig + cnt
     x = x._decimal_shift(-cnt)
 
-    # Calculation of exp(small_prec) is fast because calculation of x**n is fast
-    # Calculation of exp(small_abs) converges fast.
-    # exp(x) = exp(small_prec_part + small_abs_part) = exp(small_prec_part) * exp(small_abs_part)
-    x_small_prec = x.round(Integer.sqrt(prec2))
-    y = _exp_taylor(x_small_prec, prec2).mult(_exp_taylor(x.sub(x_small_prec, prec2), prec2), prec2)
+    # Decimal form of bit-burst algorithm
+    # Calculate exp(x.xxxxxxxxxxxxxxxx) as
+    # exp(x.xx) * exp(0.00xx) * exp(0.0000xxxx) * exp(0.00000000xxxxxxxx)
+    x = x.mult(1, prec2)
+    n = 2
+    y = BigDecimal(1)
+    BigDecimal.save_limit do
+      BigDecimal.limit(0)
+      while x != 0 do
+        partial_x = x.truncate(n)
+        x -= partial_x
+        y = y.mult(_exp_binary_splitting(partial_x, prec2), prec2)
+        n *= 2
+      end
+    end
 
     # calculate exp(x * 10**cnt) from exp(x)
     # exp(x * 10**k) = exp(x * 10**(k - 1)) ** 10
diff --git a/lib/bigdecimal/math.rb b/lib/bigdecimal/math.rb
@@ -88,6 +88,37 @@ def sqrt(x, prec)
     end
   end
 
+  private_class_method def _sin_binary_splitting(x, prec) # :nodoc:
+    return x if x.zero?
+    x2 = x.mult(x, prec)
+    # Find k that satisfies x2**k / (2k+1)! < 10**(-prec)
+    log10 = Math.log(10)
+    logx = BigDecimal::Internal.float_log(x.abs)
+    step = (1..).bsearch { |k| Math.lgamma(2 * k + 1)[0] - 2 * k * logx > prec * log10 }
+    # Construct denominator sequence for binary splitting
+    # sin(x) = x*(1-x2/(2*3)*(1-x2/(4*5)*(1-x2/(6*7)*(1-x2/(8*9)*(1-...)))))
+    ds = (1..step).map {|i| -(2 * i) * (2 * i + 1) }
+    x.mult(1 + BigDecimal::Internal.taylor_sum_binary_splitting(x2, ds, prec), prec)
+  end
+
+  private_class_method def _sin_around_zero(x, prec) # :nodoc:
+    # Divide x into several parts
+    # sin(x.xxxxxxxx...) = sin(x.xx + 0.00xx + 0.0000xxxx + ...)
+    # Calculate sin of each part and restore sin(0.xxxxxxxx...) using addition theorem.
+    sin = BigDecimal(0)
+    cos = BigDecimal(1)
+    n = 2
+    while x != 0 do
+      partial_x = x.truncate(n)
+      x -= partial_x
+      s = _sin_binary_splitting(partial_x, prec)
+      c = (1 - s * s).sqrt(prec)
+      sin, cos = (sin * c).add(cos * s, prec), (cos * c).sub(sin * s, prec)
+      n *= 2
+    end
+    sin.clamp(BigDecimal(-1), BigDecimal(1))
+  end
+
   # call-seq:
   #   cbrt(decimal, numeric) -> BigDecimal
   #
@@ -150,26 +181,9 @@ def sin(x, prec)
     prec = BigDecimal::Internal.coerce_validate_prec(prec, :sin)
     x = BigDecimal::Internal.coerce_to_bigdecimal(x, prec, :sin)
     return BigDecimal::Internal.nan_computation_result if x.infinite? || x.nan?
-    n    = prec + BigDecimal.double_fig
-    one  = BigDecimal("1")
-    two  = BigDecimal("2")
+    n = prec + BigDecimal.double_fig
     sign, x = _sin_periodic_reduction(x, n)
-    x1   = x
-    x2   = x.mult(x,n)
-    y    = x
-    d    = y
-    i    = one
-    z    = one
-    while d.nonzero? && ((m = n - (y.exponent - d.exponent).abs) > 0)
-      m = BigDecimal.double_fig if m < BigDecimal.double_fig
-      x1  = -x2.mult(x1,n)
-      i  += two
-      z  *= (i-one) * i
-      d   = x1.div(z,m)
-      y  += d
-    end
-    y = BigDecimal("1") if y > 1
-    y.mult(sign, prec)
+    _sin_around_zero(x, n).mult(sign, prec)
   end
 
   # call-seq:
@@ -187,8 +201,9 @@ def cos(x, prec)
     prec = BigDecimal::Internal.coerce_validate_prec(prec, :cos)
     x = BigDecimal::Internal.coerce_to_bigdecimal(x, prec, :cos)
     return BigDecimal::Internal.nan_computation_result if x.infinite? || x.nan?
-    sign, x = _sin_periodic_reduction(x, prec + BigDecimal.double_fig, add_half_pi: true)
-    sign * sin(x, prec)
+    n = prec + BigDecimal.double_fig
+    sign, x = _sin_periodic_reduction(x, n, add_half_pi: true)
+    _sin_around_zero(x, n).mult(sign, prec)
   end
 
   # call-seq:
@@ -277,28 +292,21 @@ def atan(x, prec)
     x = BigDecimal::Internal.coerce_to_bigdecimal(x, prec, :atan)
     return BigDecimal::Internal.nan_computation_result if x.nan?
     n = prec + BigDecimal.double_fig
-    pi = PI(n)
+    return PI(n).div(2 * x.infinite?, prec) if x.infinite?
+
     x = -x if neg = x < 0
-    return pi.div(neg ? -2 : 2, prec) if x.infinite?
-    return pi.div(neg ? -4 : 4, prec) if x.round(prec) == 1
-    x = BigDecimal("1").div(x, n) if inv = x > 1
-    x = (-1 + sqrt(1 + x.mult(x, n), n)).div(x, n) if dbl = x > 0.5
-    y = x
-    d = y
-    t = x
-    r = BigDecimal("3")
-    x2 = x.mult(x,n)
-    while d.nonzero? && ((m = n - (y.exponent - d.exponent).abs) > 0)
-      m = BigDecimal.double_fig if m < BigDecimal.double_fig
-      t = -t.mult(x2,n)
-      d = t.div(r,m)
-      y += d
-      r += 2
+    x = BigDecimal(1).div(x, n) if inv = x < -1 || x > 1
+
+    # Solve tan(y) - x = 0 with Newton's method
+    # Repeat: y -= (tan(y) - x) * cos(y)**2
+    y = BigDecimal(Math.atan(x.to_f), 0)
+    BigDecimal::Internal.newton_loop(n) do |p|
+      s = sin(y, p)
+      c = (1 - s * s).sqrt(p)
+      y = y.sub(c * (s.sub(c * x.mult(1, p), p)), p)
     end
-    y *= 2 if dbl
-    y = pi / 2 - y if inv
-    y = -y if neg
-    y.mult(1, prec)
+    y = PI(n) / 2 - y if inv
+    y.mult(neg ? -1 : 1, prec)
   end
 
   # call-seq:
diff --git a/test/bigdecimal/test_bigmath.rb b/test/bigdecimal/test_bigmath.rb
@@ -182,8 +182,13 @@ def test_sin
     assert_converge_in_precision {|n| sin(BigDecimal("1e-30"), n) }
     assert_converge_in_precision {|n| sin(BigDecimal(PI(50)), n) }
     assert_converge_in_precision {|n| sin(BigDecimal(PI(50) * 100), n) }
-    assert_operator(sin(PI(30) / 2, 30), :<=, 1)
-    assert_operator(sin(-PI(30) / 2, 30), :>=, -1)
+    [:up, :down].each do |mode|
+      BigDecimal.save_rounding_mode do
+        BigDecimal.mode(BigDecimal::ROUND_MODE, mode)
+        assert_operator(sin(PI(30) / 2, 30), :<=, 1)
+        assert_operator(sin(-PI(30) / 2, 30), :>=, -1)
+      end
+    end
   end
 
   def test_cos
@@ -205,8 +210,13 @@ def test_cos
     assert_converge_in_precision {|n| cos(BigDecimal("1e50"), n) }
     assert_converge_in_precision {|n| cos(BigDecimal(PI(50) / 2), n) }
     assert_converge_in_precision {|n| cos(BigDecimal(PI(50) * 201 / 2), n) }
-    assert_operator(cos(PI(30), 30), :>=, -1)
-    assert_operator(cos(PI(30) * 2, 30), :<=, 1)
+    [:up, :down].each do |mode|
+      BigDecimal.save_rounding_mode do
+        BigDecimal.mode(BigDecimal::ROUND_MODE, mode)
+        assert_operator(cos(PI(30), 30), :>=, -1)
+        assert_operator(cos(PI(30) * 2, 30), :<=, 1)
+      end
+    end
   end
 
   def test_tan
@@ -388,28 +398,20 @@ def test_exp
 
   def test_log
     assert_equal(0, log(BigDecimal("1.0"), 10))
-    assert_in_epsilon(Math.log(10)*1000, log(BigDecimal("1e1000"), 10))
+    assert_in_epsilon(1000 * Math.log(10), log(BigDecimal("1e1000"), 10))
+    assert_in_epsilon(19999999999999 * Math.log(10), log(BigDecimal("1E19999999999999"), 10))
+    assert_in_epsilon(-19999999999999 * Math.log(10), log(BigDecimal("1E-19999999999999"), 10))
     assert_in_exact_precision(
       BigDecimal("2.3025850929940456840179914546843642076011014886287729760333279009675726096773524802359972050895982983419677840422862"),
       log(BigDecimal("10"), 100),
       100
     )
     assert_converge_in_precision {|n| log(BigDecimal("2"), n) }
-    assert_converge_in_precision {|n| log(BigDecimal("1e-30") + 1, n) }
-    assert_converge_in_precision {|n| log(BigDecimal("1e-30"), n) }
+    assert_converge_in_precision {|n| log(1 + SQRT2 * BigDecimal("1e-30"), n) }
+    assert_converge_in_precision {|n| log(SQRT2 * BigDecimal("1e-30"), n) }
     assert_converge_in_precision {|n| log(BigDecimal("1e30"), n) }
     assert_converge_in_precision {|n| log(SQRT2, n) }
     assert_raise(Math::DomainError) {log(BigDecimal("-0.1"), 10)}
-    assert_separately(%w[-rbigdecimal], <<-SRC)
-    begin
-      x = BigMath.log(BigDecimal("1E19999999999999"), 10)
-    rescue FloatDomainError
-    else
-      unless x.infinite?
-        assert_in_epsilon(Math.log(10)*19999999999999, x)
-      end
-    end
-    SRC
   end
 
   def test_log2