diff --git a/doc/source/release.rst b/doc/source/release.rst index 6eeaa55280e43..7171b48f4097a 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -414,6 +414,7 @@ See :ref:`Internal Refactoring` compatible. (:issue:`5213`, :issue:`5214`) - Unity ``dropna`` for Series/DataFrame signature (:issue:`5250`), tests from :issue:`5234`, courtesy of @rockg + - Rewrite assert_almost_equal() in cython for performance (:issue:`4398`) .. _release.bug_fixes-0.13.0: diff --git a/pandas/src/testing.pyx b/pandas/src/testing.pyx new file mode 100644 index 0000000000000..b324c6652d58f --- /dev/null +++ b/pandas/src/testing.pyx @@ -0,0 +1,142 @@ +import numpy as np + +from pandas import compat +from pandas.core.common import isnull + +cdef NUMERIC_TYPES = ( + bool, + int, + float, + np.bool, + np.int8, + np.int16, + np.int32, + np.int64, + np.uint8, + np.uint16, + np.uint32, + np.uint64, + np.float16, + np.float32, + np.float64, +) + +cdef bint is_comparable_as_number(obj): + return isinstance(obj, NUMERIC_TYPES) + +cdef bint isiterable(obj): + return hasattr(obj, '__iter__') + +cdef bint has_length(obj): + return hasattr(obj, '__len__') + +cdef bint is_dictlike(obj): + return hasattr(obj, 'keys') and hasattr(obj, '__getitem__') + +cdef bint decimal_almost_equal(double desired, double actual, int decimal): + # Code from + # http://docs.scipy.org/doc/numpy/reference/generated + # /numpy.testing.assert_almost_equal.html + return abs(desired - actual) < (0.5 * 10.0 ** -decimal) + +cpdef assert_dict_equal(a, b, bint compare_keys=True): + assert is_dictlike(a) and is_dictlike(b), ( + "Cannot compare dict objects, one or both is not dict-like" + ) + + a_keys = frozenset(a.keys()) + b_keys = frozenset(b.keys()) + + if compare_keys: + assert a_keys == b_keys + + for k in a_keys: + assert_almost_equal(a[k], b[k]) + + return True + +cpdef assert_almost_equal(a, b, bint check_less_precise=False): + cdef: + int decimal + Py_ssize_t i, na, nb + double fa, fb + + if isinstance(a, dict) or isinstance(b, dict): + return assert_dict_equal(a, b) + + if (isinstance(a, compat.string_types) or + isinstance(b, compat.string_types)): + assert a == b, "%r != %r" % (a, b) + return True + + if isiterable(a): + assert isiterable(b), ( + "First object is iterable, second isn't: %r != %r" % (a, b) + ) + assert has_length(a) and has_length(b), ( + "Can't compare objects without length, one or both is invalid: " + "(%r, %r)" % (a, b) + ) + + na, nb = len(a), len(b) + assert na == nb, ( + "Length of two iterators not the same: %r != %r" % (na, nb) + ) + if (isinstance(a, np.ndarray) and + isinstance(b, np.ndarray) and + np.array_equal(a, b)): + return True + else: + for i in xrange(na): + assert_almost_equal(a[i], b[i], check_less_precise) + return True + elif isiterable(b): + assert False, ( + "Second object is iterable, first isn't: %r != %r" % (a, b) + ) + + if isnull(a): + assert isnull(b), ( + "First object is null, second isn't: %r != %r" % (a, b) + ) + return True + elif isnull(b): + assert isnull(a), ( + "First object is not null, second is null: %r != %r" % (a, b) + ) + return True + + if is_comparable_as_number(a): + assert is_comparable_as_number(b), ( + "First object is numeric, second is not: %r != %r" % (a, b) + ) + + decimal = 5 + + # deal with differing dtypes + if check_less_precise: + dtype_a = np.dtype(type(a)) + dtype_b = np.dtype(type(b)) + if dtype_a.kind == 'f' and dtype_b == 'f': + if dtype_a.itemsize <= 4 and dtype_b.itemsize <= 4: + decimal = 3 + + if np.isinf(a): + assert np.isinf(b), "First object is inf, second isn't" + else: + fa, fb = a, b + + # case for zero + if abs(fa) < 1e-5: + if not decimal_almost_equal(fa, fb, decimal): + assert False, ( + '(very low values) expected %.5f but got %.5f' % (b, a) + ) + else: + if not decimal_almost_equal(1, fb / fa, decimal): + assert False, 'expected %.5f but got %.5f' % (b, a) + + else: + assert a == b, "%r != %r" % (a, b) + + return True diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py new file mode 100644 index 0000000000000..fa295838d47e9 --- /dev/null +++ b/pandas/tests/test_testing.py @@ -0,0 +1,123 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +import pandas as pd +import unittest +import warnings +import nose +import numpy as np +import sys + +from pandas.util.testing import ( + assert_almost_equal, assertRaisesRegexp, raise_with_traceback +) + +# let's get meta. + +class TestAssertAlmostEqual(unittest.TestCase): + _multiprocess_can_split_ = True + + def _assert_almost_equal_both(self, a, b, **kwargs): + assert_almost_equal(a, b, **kwargs) + assert_almost_equal(b, a, **kwargs) + + def _assert_not_almost_equal_both(self, a, b, **kwargs): + self.assertRaises(AssertionError, assert_almost_equal, a, b, **kwargs) + self.assertRaises(AssertionError, assert_almost_equal, b, a, **kwargs) + + def test_assert_almost_equal_numbers(self): + self._assert_almost_equal_both(1.1, 1.1) + self._assert_almost_equal_both(1.1, 1.100001) + self._assert_almost_equal_both(np.int16(1), 1.000001) + self._assert_almost_equal_both(np.float64(1.1), 1.1) + self._assert_almost_equal_both(np.uint32(5), 5) + + self._assert_not_almost_equal_both(1.1, 1) + self._assert_not_almost_equal_both(1.1, True) + self._assert_not_almost_equal_both(1, 2) + self._assert_not_almost_equal_both(1.0001, np.int16(1)) + + def test_assert_almost_equal_numbers_with_zeros(self): + self._assert_almost_equal_both(0, 0) + self._assert_almost_equal_both(0.000001, 0) + + self._assert_not_almost_equal_both(0.001, 0) + self._assert_not_almost_equal_both(1, 0) + + def test_assert_almost_equal_numbers_with_mixed(self): + self._assert_not_almost_equal_both(1, 'abc') + self._assert_not_almost_equal_both(1, [1,]) + self._assert_not_almost_equal_both(1, object()) + + def test_assert_almost_equal_dicts(self): + self._assert_almost_equal_both({'a': 1, 'b': 2}, {'a': 1, 'b': 2}) + + self._assert_not_almost_equal_both({'a': 1, 'b': 2}, {'a': 1, 'b': 3}) + self._assert_not_almost_equal_both( + {'a': 1, 'b': 2}, {'a': 1, 'b': 2, 'c': 3} + ) + self._assert_not_almost_equal_both({'a': 1}, 1) + self._assert_not_almost_equal_both({'a': 1}, 'abc') + self._assert_not_almost_equal_both({'a': 1}, [1,]) + + def test_assert_almost_equal_dict_like_object(self): + class DictLikeObj(object): + def keys(self): + return ('a',) + + def __getitem__(self, item): + if item == 'a': + return 1 + + self._assert_almost_equal_both({'a': 1}, DictLikeObj()) + + self._assert_not_almost_equal_both({'a': 2}, DictLikeObj()) + + def test_assert_almost_equal_strings(self): + self._assert_almost_equal_both('abc', 'abc') + + self._assert_not_almost_equal_both('abc', 'abcd') + self._assert_not_almost_equal_both('abc', 'abd') + self._assert_not_almost_equal_both('abc', 1) + self._assert_not_almost_equal_both('abc', [1,]) + + def test_assert_almost_equal_iterables(self): + self._assert_almost_equal_both([1, 2, 3], [1, 2, 3]) + self._assert_almost_equal_both(np.array([1, 2, 3]), [1, 2, 3]) + + # Can't compare generators + self._assert_not_almost_equal_both(iter([1, 2, 3]), [1, 2, 3]) + + self._assert_not_almost_equal_both([1, 2, 3], [1, 2, 4]) + self._assert_not_almost_equal_both([1, 2, 3], [1, 2, 3, 4]) + self._assert_not_almost_equal_both([1, 2, 3], 1) + + def test_assert_almost_equal_null(self): + self._assert_almost_equal_both(None, None) + self._assert_almost_equal_both(None, np.NaN) + + self._assert_not_almost_equal_both(None, 0) + self._assert_not_almost_equal_both(np.NaN, 0) + + def test_assert_almost_equal_inf(self): + self._assert_almost_equal_both(np.inf, np.inf) + self._assert_almost_equal_both(np.inf, float("inf")) + + self._assert_not_almost_equal_both(np.inf, 0) + +class TestUtilTesting(unittest.TestCase): + _multiprocess_can_split_ = True + + def test_raise_with_traceback(self): + with assertRaisesRegexp(LookupError, "error_text"): + try: + raise ValueError("THIS IS AN ERROR") + except ValueError as e: + e = LookupError("error_text") + raise_with_traceback(e) + with assertRaisesRegexp(LookupError, "error_text"): + try: + raise ValueError("This is another error") + except ValueError: + e = LookupError("error_text") + _, _, traceback = sys.exc_info() + raise_with_traceback(e, traceback) diff --git a/pandas/tests/test_tests.py b/pandas/tests/test_tests.py deleted file mode 100644 index 1890c2607fc89..0000000000000 --- a/pandas/tests/test_tests.py +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -import pandas as pd -import unittest -import warnings -import nose -import sys - -from pandas.util.testing import ( - assert_almost_equal, assertRaisesRegexp, raise_with_traceback -) - -# let's get meta. - -class TestUtilTesting(unittest.TestCase): - _multiprocess_can_split_ = True - - def test_assert_almost_equal(self): - # don't die because values are not ndarrays - assert_almost_equal(1.1,1.1,check_less_precise=True) - - def test_raise_with_traceback(self): - with assertRaisesRegexp(LookupError, "error_text"): - try: - raise ValueError("THIS IS AN ERROR") - except ValueError as e: - e = LookupError("error_text") - raise_with_traceback(e) - with assertRaisesRegexp(LookupError, "error_text"): - try: - raise ValueError("This is another error") - except ValueError: - e = LookupError("error_text") - _, _, traceback = sys.exc_info() - raise_with_traceback(e, traceback) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 7a37be30f7bf6..be6f593da2043 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -37,6 +37,8 @@ from pandas.tseries.index import DatetimeIndex from pandas.tseries.period import PeriodIndex +from pandas import _testing + from pandas.io.common import urlopen Index = index.Index @@ -50,6 +52,11 @@ K = 4 _RAISE_NETWORK_ERROR_DEFAULT = False +# NOTE: don't pass an NDFrame or index to this function - may not handle it +# well. +assert_almost_equal = _testing.assert_almost_equal + +assert_dict_equal = _testing.assert_dict_equal def randbool(size=(), p=0.5): return rand(*size) <= p @@ -374,75 +381,9 @@ def assert_attr_equal(attr, left, right): def isiterable(obj): return hasattr(obj, '__iter__') - -# NOTE: don't pass an NDFrame or index to this function - may not handle it -# well. -def assert_almost_equal(a, b, check_less_precise=False): - if isinstance(a, dict) or isinstance(b, dict): - return assert_dict_equal(a, b) - - if isinstance(a, compat.string_types): - assert a == b, "%r != %r" % (a, b) - return True - - if isiterable(a): - np.testing.assert_(isiterable(b)) - na, nb = len(a), len(b) - assert na == nb, "%s != %s" % (na, nb) - if isinstance(a, np.ndarray) and isinstance(b, np.ndarray) and\ - np.array_equal(a, b): - return True - else: - for i in range(na): - assert_almost_equal(a[i], b[i], check_less_precise) - return True - - err_msg = lambda a, b: 'expected %.5f but got %.5f' % (b, a) - - if isnull(a): - np.testing.assert_(isnull(b)) - return - - if isinstance(a, (bool, float, int, np.float32)): - decimal = 5 - - # deal with differing dtypes - if check_less_precise: - dtype_a = np.dtype(type(a)) - dtype_b = np.dtype(type(b)) - if dtype_a.kind == 'f' and dtype_b == 'f': - if dtype_a.itemsize <= 4 and dtype_b.itemsize <= 4: - decimal = 3 - - if np.isinf(a): - assert np.isinf(b), err_msg(a, b) - - # case for zero - elif abs(a) < 1e-5: - np.testing.assert_almost_equal( - a, b, decimal=decimal, err_msg=err_msg(a, b), verbose=False) - else: - np.testing.assert_almost_equal( - 1, a / b, decimal=decimal, err_msg=err_msg(a, b), verbose=False) - else: - assert a == b, "%s != %s" % (a, b) - - def is_sorted(seq): return assert_almost_equal(seq, np.sort(np.array(seq))) - -def assert_dict_equal(a, b, compare_keys=True): - a_keys = frozenset(a.keys()) - b_keys = frozenset(b.keys()) - - if compare_keys: - assert(a_keys == b_keys) - - for k in a_keys: - assert_almost_equal(a[k], b[k]) - - def assert_series_equal(left, right, check_dtype=True, check_index_type=False, check_series_type=False, diff --git a/setup.py b/setup.py index c326d14f552e0..635da56d7339f 100755 --- a/setup.py +++ b/setup.py @@ -304,7 +304,8 @@ class CheckSDist(sdist): 'pandas/index.pyx', 'pandas/algos.pyx', 'pandas/parser.pyx', - 'pandas/src/sparse.pyx'] + 'pandas/src/sparse.pyx', + 'pandas/src/testing.pyx'] def initialize_options(self): sdist.initialize_options(self) @@ -464,6 +465,13 @@ def pxd(name): extensions.extend([sparse_ext]) +testing_ext = Extension('pandas._testing', + sources=[srcpath('testing', suffix=suffix)], + include_dirs=[], + libraries=libraries) + +extensions.extend([testing_ext]) + #---------------------------------------------------------------------- # msgpack stuff here