From 037b5fea25bf59df4e513992a7cc34a79d18425b Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 28 Nov 2021 18:58:30 -0600 Subject: [PATCH 01/16] Correctly rounded stdev results for Decimal inputs --- Lib/statistics.py | 33 ++++++++++++++++++++++++++++++--- Lib/test/test_statistics.py | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 66 insertions(+), 4 deletions(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index cf8eaa0a61e624..992c57ef896b7f 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -305,15 +305,18 @@ def _fail_neg(values, errmsg='negative value'): raise StatisticsError(errmsg) yield x + def _isqrt_frac_rto(n: int, m: int) -> float: """Square root of n/m, rounded to the nearest integer using round-to-odd.""" # Reference: https://www.lri.fr/~melquion/doc/05-imacs17_1-expose.pdf a = math.isqrt(n // m) return a | (a*a*m != n) + # For 53 bit precision floats, the _sqrt_frac() shift is 109. _sqrt_shift: int = 2 * sys.float_info.mant_dig + 3 + def _sqrt_frac(n: int, m: int) -> float: """Square root of n/m as a float, correctly rounded.""" # See principle and proof sketch at: https://bugs.python.org/msg407078 @@ -327,6 +330,31 @@ def _sqrt_frac(n: int, m: int) -> float: return numerator / denominator # Convert to float +def _deci_sqrt(n: int, m: int) -> Decimal: + """Square root of n/m as a float, correctly rounded.""" + # Premise: For decimal, computing sqrt(m / n) can be off by 1 ulp. + # Method: Check the result, moving up or down a step if needed. + if not n: + return 0.0 + + f_square = Fraction(n, m) + + d_mid = (Decimal(n) / Decimal(m)).sqrt() + f_mid = Fraction(*d_mid.as_integer_ratio()) + + d_plus = d_mid.next_plus() + f_plus = Fraction(*d_plus.as_integer_ratio()) + if f_square > ((f_mid + f_plus) / 2) ** 2: + return d_plus + + d_minus = d_mid.next_minus() + f_minus = Fraction(*d_minus.as_integer_ratio()) + if f_square < ((f_mid + f_minus) / 2) ** 2: + return d_minus + + return d_mid + + # === Measures of central tendency (averages) === def mean(data): @@ -888,9 +916,8 @@ def pstdev(data, mu=None): raise StatisticsError('pstdev requires at least one data point') T, ss = _ss(data, mu) mss = ss / n - if hasattr(T, 'sqrt'): - var = _convert(mss, T) - return var.sqrt() + if issubclass(T, Decimal): + return _deci_sqrt(mss.numerator, mss.denominator) return _sqrt_frac(mss.numerator, mss.denominator) diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index 771a03e707ee01..74ab445e649600 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -2219,7 +2219,42 @@ def is_root_correctly_rounded(x: Fraction, root: float) -> bool: statistics._sqrt_frac(1, 0) # The result is well defined if both inputs are negative - self.assertAlmostEqual(statistics._sqrt_frac(-2, -1), math.sqrt(2.0)) + self.assertEqual(statistics._sqrt_frac(-2, -1), statistics._sqrt_frac(2, 1)) + + def test_deci_sqrt(self): + root: Decimal + numerator: int + denominator: int + + for root, numerator, denominator in [ + (Decimal('0.4481904599041192673635338663'), 200874688349065940678243576378, 1000000000000000000000000000000), # No adj + (Decimal('0.7924949131383786609961759598'), 628048187350206338833590574929, 1000000000000000000000000000000), # Adj up + (Decimal('0.8500554152289934068192208727'), 722594208960136395984391238251, 1000000000000000000000000000000), # Adj down + ]: + with decimal.localcontext(decimal.DefaultContext): + self.assertEqual(statistics._deci_sqrt(numerator, denominator), root) + + # Confirm expected root with a quad precision decimal computation + with decimal.localcontext(decimal.DefaultContext) as ctx: + ctx.prec *= 4 + high_prec_root = (Decimal(numerator) / Decimal(denominator)).sqrt() + with decimal.localcontext(decimal.DefaultContext): + target_root = +high_prec_root + self.assertEqual(root, target_root) + + # Verify that corner cases and error handling match Decimal.sqrt() + self.assertEqual(statistics._deci_sqrt(0, 1), 0.0) + with self.assertRaises(decimal.InvalidOperation): + statistics._deci_sqrt(-1, 1) + with self.assertRaises(decimal.InvalidOperation): + statistics._deci_sqrt(1, -1) + + # Error handling for zero denominator matches that for Fraction(1, 0) + with self.assertRaises(ZeroDivisionError): + statistics._deci_sqrt(1, 0) + + # The result is well defined if both inputs are negative + self.assertEqual(statistics._deci_sqrt(-2, -1), statistics._deci_sqrt(2, 1)) class TestStdev(VarianceStdevMixin, NumericTestCase): From f5c091c336f047d45bd17d6b812a003f4f6f0da2 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 28 Nov 2021 19:05:26 -0600 Subject: [PATCH 02/16] Whitespace --- Lib/statistics.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index 992c57ef896b7f..bdf4e2277eb62d 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -336,9 +336,7 @@ def _deci_sqrt(n: int, m: int) -> Decimal: # Method: Check the result, moving up or down a step if needed. if not n: return 0.0 - f_square = Fraction(n, m) - d_mid = (Decimal(n) / Decimal(m)).sqrt() f_mid = Fraction(*d_mid.as_integer_ratio()) From 70cdade9fc9eef83c71bbff9be22add41a99547d Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 28 Nov 2021 19:16:54 -0600 Subject: [PATCH 03/16] Rename the functions consistently --- Lib/statistics.py | 12 ++++++------ Lib/test/test_statistics.py | 28 ++++++++++++++-------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index bdf4e2277eb62d..f48dce79f59511 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -313,11 +313,11 @@ def _isqrt_frac_rto(n: int, m: int) -> float: return a | (a*a*m != n) -# For 53 bit precision floats, the _sqrt_frac() shift is 109. +# For 53 bit precision floats, the _float_sqrt_of_frac() shift is 109. _sqrt_shift: int = 2 * sys.float_info.mant_dig + 3 -def _sqrt_frac(n: int, m: int) -> float: +def _float_sqrt_of_frac(n: int, m: int) -> float: """Square root of n/m as a float, correctly rounded.""" # See principle and proof sketch at: https://bugs.python.org/msg407078 q = (n.bit_length() - m.bit_length() - _sqrt_shift) // 2 @@ -330,7 +330,7 @@ def _sqrt_frac(n: int, m: int) -> float: return numerator / denominator # Convert to float -def _deci_sqrt(n: int, m: int) -> Decimal: +def _decimal_sqrt_of_frac(n: int, m: int) -> Decimal: """Square root of n/m as a float, correctly rounded.""" # Premise: For decimal, computing sqrt(m / n) can be off by 1 ulp. # Method: Check the result, moving up or down a step if needed. @@ -895,7 +895,7 @@ def stdev(data, xbar=None): if hasattr(T, 'sqrt'): var = _convert(mss, T) return var.sqrt() - return _sqrt_frac(mss.numerator, mss.denominator) + return _float_sqrt_of_frac(mss.numerator, mss.denominator) def pstdev(data, mu=None): @@ -915,8 +915,8 @@ def pstdev(data, mu=None): T, ss = _ss(data, mu) mss = ss / n if issubclass(T, Decimal): - return _deci_sqrt(mss.numerator, mss.denominator) - return _sqrt_frac(mss.numerator, mss.denominator) + return _decimal_sqrt_of_frac(mss.numerator, mss.denominator) + return _float_sqrt_of_frac(mss.numerator, mss.denominator) # === Statistics for relations between two inputs === diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index 74ab445e649600..a39659fa31fde9 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -2177,7 +2177,7 @@ def test_isqrt_frac_rto(self): self.assertTrue(m * (r - 1)**2 < n < m * (r + 1)**2) @requires_IEEE_754 - def test_sqrt_frac(self): + def test_float_sqrt_of_frac(self): def is_root_correctly_rounded(x: Fraction, root: float) -> bool: if not x: @@ -2204,24 +2204,24 @@ def is_root_correctly_rounded(x: Fraction, root: float) -> bool: denonimator: int = randrange(10 ** randrange(50)) + 1 with self.subTest(numerator=numerator, denonimator=denonimator): x: Fraction = Fraction(numerator, denonimator) - root: float = statistics._sqrt_frac(numerator, denonimator) + root: float = statistics._float_sqrt_of_frac(numerator, denonimator) self.assertTrue(is_root_correctly_rounded(x, root)) # Verify that corner cases and error handling match math.sqrt() - self.assertEqual(statistics._sqrt_frac(0, 1), 0.0) + self.assertEqual(statistics._float_sqrt_of_frac(0, 1), 0.0) with self.assertRaises(ValueError): - statistics._sqrt_frac(-1, 1) + statistics._float_sqrt_of_frac(-1, 1) with self.assertRaises(ValueError): - statistics._sqrt_frac(1, -1) + statistics._float_sqrt_of_frac(1, -1) # Error handling for zero denominator matches that for Fraction(1, 0) with self.assertRaises(ZeroDivisionError): - statistics._sqrt_frac(1, 0) + statistics._float_sqrt_of_frac(1, 0) # The result is well defined if both inputs are negative - self.assertEqual(statistics._sqrt_frac(-2, -1), statistics._sqrt_frac(2, 1)) + self.assertEqual(statistics._float_sqrt_of_frac(-2, -1), statistics._float_sqrt_of_frac(2, 1)) - def test_deci_sqrt(self): + def test_decimal_sqrt_of_frac(self): root: Decimal numerator: int denominator: int @@ -2232,7 +2232,7 @@ def test_deci_sqrt(self): (Decimal('0.8500554152289934068192208727'), 722594208960136395984391238251, 1000000000000000000000000000000), # Adj down ]: with decimal.localcontext(decimal.DefaultContext): - self.assertEqual(statistics._deci_sqrt(numerator, denominator), root) + self.assertEqual(statistics._decimal_sqrt_of_frac(numerator, denominator), root) # Confirm expected root with a quad precision decimal computation with decimal.localcontext(decimal.DefaultContext) as ctx: @@ -2243,18 +2243,18 @@ def test_deci_sqrt(self): self.assertEqual(root, target_root) # Verify that corner cases and error handling match Decimal.sqrt() - self.assertEqual(statistics._deci_sqrt(0, 1), 0.0) + self.assertEqual(statistics._decimal_sqrt_of_frac(0, 1), 0.0) with self.assertRaises(decimal.InvalidOperation): - statistics._deci_sqrt(-1, 1) + statistics._decimal_sqrt_of_frac(-1, 1) with self.assertRaises(decimal.InvalidOperation): - statistics._deci_sqrt(1, -1) + statistics._decimal_sqrt_of_frac(1, -1) # Error handling for zero denominator matches that for Fraction(1, 0) with self.assertRaises(ZeroDivisionError): - statistics._deci_sqrt(1, 0) + statistics._decimal_sqrt_of_frac(1, 0) # The result is well defined if both inputs are negative - self.assertEqual(statistics._deci_sqrt(-2, -1), statistics._deci_sqrt(2, 1)) + self.assertEqual(statistics._decimal_sqrt_of_frac(-2, -1), statistics._decimal_sqrt_of_frac(2, 1)) class TestStdev(VarianceStdevMixin, NumericTestCase): From 82dbec67c63216f17d7bc80d283a3d8fdf5e367c Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 28 Nov 2021 21:22:28 -0600 Subject: [PATCH 04/16] Improve comment --- Lib/statistics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index f48dce79f59511..6f2137ee25039b 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -332,7 +332,7 @@ def _float_sqrt_of_frac(n: int, m: int) -> float: def _decimal_sqrt_of_frac(n: int, m: int) -> Decimal: """Square root of n/m as a float, correctly rounded.""" - # Premise: For decimal, computing sqrt(m / n) can be off by 1 ulp. + # Premise: For decimal, computing (n/m).sqrt() can be off by 1 ulp. # Method: Check the result, moving up or down a step if needed. if not n: return 0.0 From 1a6c58d391b27689805a884c351cbbd23b0cfca4 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 28 Nov 2021 21:29:31 -0600 Subject: [PATCH 05/16] Tweak variable names --- Lib/statistics.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index 6f2137ee25039b..997127a3a28d5e 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -337,20 +337,21 @@ def _decimal_sqrt_of_frac(n: int, m: int) -> Decimal: if not n: return 0.0 f_square = Fraction(n, m) - d_mid = (Decimal(n) / Decimal(m)).sqrt() - f_mid = Fraction(*d_mid.as_integer_ratio()) - d_plus = d_mid.next_plus() + d_start = (Decimal(n) / Decimal(m)).sqrt() + f_start = Fraction(*d_start.as_integer_ratio()) + + d_plus = d_start.next_plus() f_plus = Fraction(*d_plus.as_integer_ratio()) - if f_square > ((f_mid + f_plus) / 2) ** 2: + if f_square > ((f_start + f_plus) / 2) ** 2: return d_plus - d_minus = d_mid.next_minus() + d_minus = d_start.next_minus() f_minus = Fraction(*d_minus.as_integer_ratio()) - if f_square < ((f_mid + f_minus) / 2) ** 2: + if f_square < ((f_start + f_minus) / 2) ** 2: return d_minus - return d_mid + return d_start # === Measures of central tendency (averages) === From b2385b0efbb581c6ba0544b0e2900b19c133accd Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 29 Nov 2021 00:43:41 -0600 Subject: [PATCH 06/16] Replace Fraction arithmetic with integer arithmetic --- Lib/statistics.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index 997127a3a28d5e..4a861b9dbf1b16 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -334,24 +334,27 @@ def _decimal_sqrt_of_frac(n: int, m: int) -> Decimal: """Square root of n/m as a float, correctly rounded.""" # Premise: For decimal, computing (n/m).sqrt() can be off by 1 ulp. # Method: Check the result, moving up or down a step if needed. - if not n: - return 0.0 - f_square = Fraction(n, m) + if n <= 0: + if not n: + return 0.0 + n, m = -n, -m - d_start = (Decimal(n) / Decimal(m)).sqrt() - f_start = Fraction(*d_start.as_integer_ratio()) + root = (Decimal(n) / Decimal(m)).sqrt() + nr, dr = root.as_integer_ratio() - d_plus = d_start.next_plus() - f_plus = Fraction(*d_plus.as_integer_ratio()) - if f_square > ((f_start + f_plus) / 2) ** 2: - return d_plus + plus = root.next_plus() + np, dp = plus.as_integer_ratio() + # test: n / m > ((root + plus) / 2) ** 2 + if 4*dr**2*dp**2*n > m*(dr*np + dp*nr)**2: + return plus - d_minus = d_start.next_minus() - f_minus = Fraction(*d_minus.as_integer_ratio()) - if f_square < ((f_start + f_minus) / 2) ** 2: - return d_minus + minus = root.next_minus() + nm, dm = minus.as_integer_ratio() + # test: n / m < ((root + minus) / 2) ** 2 + if 4*dr**2*dm**2*n < m*(dr*nm + dm*nr)**2: + return minus - return d_start + return root # === Measures of central tendency (averages) === From 594ea2766e824c6d2b4e01f3e185cc3742c3d4dd Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 29 Nov 2021 00:58:10 -0600 Subject: [PATCH 07/16] Add spacing between terms --- Lib/statistics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index 4a861b9dbf1b16..0feeb642a76992 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -345,13 +345,13 @@ def _decimal_sqrt_of_frac(n: int, m: int) -> Decimal: plus = root.next_plus() np, dp = plus.as_integer_ratio() # test: n / m > ((root + plus) / 2) ** 2 - if 4*dr**2*dp**2*n > m*(dr*np + dp*nr)**2: + if 4 * dr**2 * dp**2 * n > m * (dr*np + dp*nr)**2: return plus minus = root.next_minus() nm, dm = minus.as_integer_ratio() # test: n / m < ((root + minus) / 2) ** 2 - if 4*dr**2*dm**2*n < m*(dr*nm + dm*nr)**2: + if 4 * dr**2 * dm**2 * n < m * (dr*nm + dm*nr)**2: return minus return root From 3911581f59e7ec168acdfdd04f2b138c002fa7c6 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 29 Nov 2021 01:03:14 -0600 Subject: [PATCH 08/16] Fix type annotation --- Lib/statistics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index 0feeb642a76992..ae86cc933922eb 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -306,7 +306,7 @@ def _fail_neg(values, errmsg='negative value'): yield x -def _isqrt_frac_rto(n: int, m: int) -> float: +def _isqrt_frac_rto(n: int, m: int) -> int: """Square root of n/m, rounded to the nearest integer using round-to-odd.""" # Reference: https://www.lri.fr/~melquion/doc/05-imacs17_1-expose.pdf a = math.isqrt(n // m) From a09e3c479b473626c302137ed7f6bca887a0a83d Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 29 Nov 2021 01:07:45 -0600 Subject: [PATCH 09/16] Return a Decimal zero when the numerator is zero --- Lib/statistics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index ae86cc933922eb..8187c2ac887787 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -336,7 +336,7 @@ def _decimal_sqrt_of_frac(n: int, m: int) -> Decimal: # Method: Check the result, moving up or down a step if needed. if n <= 0: if not n: - return 0.0 + return Decimal('0.0') n, m = -n, -m root = (Decimal(n) / Decimal(m)).sqrt() From 152ed3fc8d75d0fe336a8832c194ad4a24929833 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 29 Nov 2021 01:10:22 -0600 Subject: [PATCH 10/16] Remove unused import --- Lib/statistics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index 8187c2ac887787..fac4357a2e20c1 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -137,7 +137,7 @@ from itertools import groupby, repeat from bisect import bisect_left, bisect_right from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum -from operator import itemgetter, mul +from operator import mul from collections import Counter, namedtuple _SQRT2 = sqrt(2.0) From 80371c1cbbbe6e4ae5a0498e8dc4edfb8d2b3c88 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 29 Nov 2021 09:12:50 -0600 Subject: [PATCH 11/16] Factor lhs of inequality. Rename helper function for consistency. --- Lib/statistics.py | 10 +++++----- Lib/test/test_statistics.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index fac4357a2e20c1..3ac320f5e20dea 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -306,7 +306,7 @@ def _fail_neg(values, errmsg='negative value'): yield x -def _isqrt_frac_rto(n: int, m: int) -> int: +def _integer_sqrt_of_frac_rto(n: int, m: int) -> int: """Square root of n/m, rounded to the nearest integer using round-to-odd.""" # Reference: https://www.lri.fr/~melquion/doc/05-imacs17_1-expose.pdf a = math.isqrt(n // m) @@ -322,10 +322,10 @@ def _float_sqrt_of_frac(n: int, m: int) -> float: # See principle and proof sketch at: https://bugs.python.org/msg407078 q = (n.bit_length() - m.bit_length() - _sqrt_shift) // 2 if q >= 0: - numerator = _isqrt_frac_rto(n, m << 2 * q) << q + numerator = _integer_sqrt_of_frac_rto(n, m << 2 * q) << q denominator = 1 else: - numerator = _isqrt_frac_rto(n << -2 * q, m) + numerator = _integer_sqrt_of_frac_rto(n << -2 * q, m) denominator = 1 << -q return numerator / denominator # Convert to float @@ -345,13 +345,13 @@ def _decimal_sqrt_of_frac(n: int, m: int) -> Decimal: plus = root.next_plus() np, dp = plus.as_integer_ratio() # test: n / m > ((root + plus) / 2) ** 2 - if 4 * dr**2 * dp**2 * n > m * (dr*np + dp*nr)**2: + if 4 * n * (dr*dp)**2 > m * (dr*np + dp*nr)**2: return plus minus = root.next_minus() nm, dm = minus.as_integer_ratio() # test: n / m < ((root + minus) / 2) ** 2 - if 4 * dr**2 * dm**2 * n < m * (dr*nm + dm*nr)**2: + if 4 * n * (dr*dm)**2 < m * (dr*nm + dm*nr)**2: return minus return root diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index a39659fa31fde9..b0d0f86f44e93c 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -2164,9 +2164,9 @@ def test_center_not_at_mean(self): class TestSqrtHelpers(unittest.TestCase): - def test_isqrt_frac_rto(self): + def test_integer_sqrt_of_frac_rto(self): for n, m in itertools.product(range(100), range(1, 1000)): - r = statistics._isqrt_frac_rto(n, m) + r = statistics._integer_sqrt_of_frac_rto(n, m) self.assertIsInstance(r, int) if r*r*m == n: # Root is exact From 1c86e7cc4958b3e50a3c0a71df1345f76a923c03 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 29 Nov 2021 10:12:04 -0600 Subject: [PATCH 12/16] Add comment for future work. --- Lib/statistics.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Lib/statistics.py b/Lib/statistics.py index 3ac320f5e20dea..91ac06a24334d2 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -248,6 +248,15 @@ def _exact_ratio(x): x is expected to be an int, Fraction, Decimal or float. """ + + # XXX We should revisit whether accumulating exact ratios is the + # right way to go. The default decimal context supports a huge range + # of exponents. When expanded with as_integer_ratio(), numbers like + # Decimal('3.14E+5000') and Decimal('3.14E-5000') have large + # numerators or denominators that will slow computation. This + # doesn't seem to have been problem in practice, but it is a + # potential pitfall. + try: return x.as_integer_ratio() except AttributeError: From 0684face8058ebf7b1f84564896727548a87c2b8 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 29 Nov 2021 17:40:06 -0600 Subject: [PATCH 13/16] Fix typo in docstring. Refine wording in comment. --- Lib/statistics.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index 91ac06a24334d2..868024a3961e18 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -340,8 +340,9 @@ def _float_sqrt_of_frac(n: int, m: int) -> float: def _decimal_sqrt_of_frac(n: int, m: int) -> Decimal: - """Square root of n/m as a float, correctly rounded.""" - # Premise: For decimal, computing (n/m).sqrt() can be off by 1 ulp. + """Square root of n/m as a Decimal, correctly rounded.""" + # Premise: For decimal, computing (n/m).sqrt() can be off + # by 1 ulp from the correctly rounded result. # Method: Check the result, moving up or down a step if needed. if n <= 0: if not n: From 8b5e377efc650df49e1b52421056cafe020dfd40 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 29 Nov 2021 18:14:58 -0600 Subject: [PATCH 14/16] Add more detail to the comment about numerator and denominator sizes --- Lib/statistics.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index 868024a3961e18..1b8c7fb7c17f72 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -249,12 +249,25 @@ def _exact_ratio(x): x is expected to be an int, Fraction, Decimal or float. """ - # XXX We should revisit whether accumulating exact ratios is the - # right way to go. The default decimal context supports a huge range - # of exponents. When expanded with as_integer_ratio(), numbers like + # XXX We should revisit whether using fractions to accumulate exact + # ratios is the right way to go. + + # The integer ratios for binary floats can have numerators or + # denominators with over 300 decimal digits. The problem is more + # acute with decimal floats where the the default decimal context + # supports a huge range of exponents from Emin=-999999 to + # Emax=999999. When expanded with as_integer_ratio(), numbers like # Decimal('3.14E+5000') and Decimal('3.14E-5000') have large - # numerators or denominators that will slow computation. This - # doesn't seem to have been problem in practice, but it is a + # numerators or denominators that will slow computation. + + # When the integer ratios are accumulated as fractions, the size + # grows to cover the full range from the smallest magnitude to the + # largest. For example, Fraction(3.14E+300) + Fraction(3.14E-300), + # has a 616 digit numerator. Likewise, + # Fraction(Decimal('3.14E+5000')) + Fraction(Decimal('3.14E-5000')) + # has 10,003 digit numerator. + + # This doesn't seem to have been problem in practice, but it is a # potential pitfall. try: From d11d56763822b25b93cc9660551a2f3a0e1674da Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 29 Nov 2021 19:28:17 -0600 Subject: [PATCH 15/16] Improve variable name --- Lib/statistics.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index 1b8c7fb7c17f72..9f1efa21b15e3c 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -335,14 +335,15 @@ def _integer_sqrt_of_frac_rto(n: int, m: int) -> int: return a | (a*a*m != n) -# For 53 bit precision floats, the _float_sqrt_of_frac() shift is 109. -_sqrt_shift: int = 2 * sys.float_info.mant_dig + 3 +# For 53 bit precision floats, the bit width used in +# _float_sqrt_of_frac() is 109. +_sqrt_bit_width: int = 2 * sys.float_info.mant_dig + 3 def _float_sqrt_of_frac(n: int, m: int) -> float: """Square root of n/m as a float, correctly rounded.""" # See principle and proof sketch at: https://bugs.python.org/msg407078 - q = (n.bit_length() - m.bit_length() - _sqrt_shift) // 2 + q = (n.bit_length() - m.bit_length() - _sqrt_bit_width) // 2 if q >= 0: numerator = _integer_sqrt_of_frac_rto(n, m << 2 * q) << q denominator = 1 From 309cb0a2be354c7611f74d45eda0f3cc5531a55a Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 30 Nov 2021 10:44:18 -0600 Subject: [PATCH 16/16] Avoid double rounding in test code --- Lib/test/test_statistics.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index b0d0f86f44e93c..bacb76a9b036bf 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -2237,7 +2237,9 @@ def test_decimal_sqrt_of_frac(self): # Confirm expected root with a quad precision decimal computation with decimal.localcontext(decimal.DefaultContext) as ctx: ctx.prec *= 4 - high_prec_root = (Decimal(numerator) / Decimal(denominator)).sqrt() + high_prec_ratio = Decimal(numerator) / Decimal(denominator) + ctx.rounding = decimal.ROUND_05UP + high_prec_root = high_prec_ratio.sqrt() with decimal.localcontext(decimal.DefaultContext): target_root = +high_prec_root self.assertEqual(root, target_root)