Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve utilities for structure comparison in tests #896

Merged
merged 8 commits into from
Oct 24, 2024
57 changes: 32 additions & 25 deletions invisible_cities/core/testing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,22 +87,8 @@ def FLOAT_ARRAY(*args, **kwargs):
def random_length_float_arrays(min_length = 0,
max_length = 100,
**kwargs ):
lengths = integers(min_length,
max_length)

return lengths.flatmap(lambda n: float_arrays( n,
**kwargs))


def _compare_dataframes(assertion, df1, df2, check_types=True, **kwargs):
assert sorted(df1.columns) == sorted(df2.columns), "DataFrames with different structure cannot be compared"

for col in df1.columns:
col1 = df1[col]
col2 = df2[col]
if check_types:
assert col1.dtype == col2.dtype
assertion(col1.values, col2.values, **kwargs)
lengths = integers(min_length, max_length)
return lengths.flatmap(lambda n: float_arrays(n, **kwargs))


def assert_dataframes_equal(df1, df2, **kwargs):
Expand Down Expand Up @@ -142,26 +128,47 @@ def assert_PMap_equality(pmp0, pmp1):
assert_Peak_equality(s2_0, s2_1)


def _get_table_name(t):
return t.name if hasattr(t, "name") else "unknown"

def assert_tables_equality(got_table, expected_table, rtol=1e-7, atol=0):
table_got = got_table[:]
table_expected = expected_table[:]
assert len(table_got ) == len(table_expected )
assert len(table_got.dtype) == len(table_expected.dtype)
# we keep both names to be as generic as possible
names = _get_table_name(got_table), _get_table_name(expected_table)

shape_got = len(table_got ), len(table_got .dtype)
shape_expected = len(table_expected), len(table_expected.dtype)
assert shape_got == shape_expected, f"Tables {names} have different shapes: {shape_got} vs. {shape_expected}"

if table_got.dtype.names is not None:
for col_name in table_got.dtype.names:
assert col_name in table_expected.dtype.names, f"Column {col_name} missing in {names[1]}"

got = table_got [col_name]
expected = table_expected[col_name]
assert type(got) == type(expected)
assert got.dtype.kind == expected.dtype.kind, f"Tables {names} have different types ({got.dtype} {expected.dtype}) for column {col_name}"

try:
assert_allclose(got, expected, rtol=rtol, atol=atol)
except TypeError:
assert_equal (got, expected)
is_float = got.dtype.kind == 'f'
if is_float: assert_allclose(got, expected, rtol=rtol, atol=atol)
else : assert_equal (got, expected)
except:
print(f"Mismatch in column {col_name} of tables {names}")
raise
else:
got = table_got
expected = table_expected
assert got.dtype == expected.dtype, f"Tables {names} have different types ({got.dtype} {expected.dtype})"

try:
assert_allclose(got_table, expected_table, rtol=rtol, atol=atol)
except TypeError:
assert_equal (got_table, expected_table)
is_float = got.dtype.kind == 'f'
if is_float: assert_allclose(got, expected, rtol=rtol, atol=atol)
else : assert_equal (got, expected)
except:
print(f"Mismatch in tables {names}")
raise


def assert_cluster_equality(a_cluster, b_cluster):
assert np.allclose(a_cluster.posxy , b_cluster.posxy )
Expand Down
96 changes: 96 additions & 0 deletions invisible_cities/core/testing_utils_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np

from pytest import mark
from pytest import raises
from flaky import flaky
from hypothesis import given
from hypothesis.strategies import floats
Expand Down Expand Up @@ -41,3 +42,98 @@ def test_assert_tables_equality_withNaN():
table = np.array([('Rex', 9, 81.0), ('Fido', 3, np.nan)],
dtype=[('name', 'U10'), ('age', 'i4'), ('weight', 'f4')])
assert_tables_equality(table, table)


@mark.parametrize("index value".split(), ((0, "three"), (1, 3), (2, 3.0)))
def test_assert_tables_equality_fails_different_values(index, value):
# modify a value in the second row and check that the function
# picks up the difference
table1 = np.array([ ('one', 1, 1.0)
, ('two', 2, 2.0)],
dtype=[('text', 'U10'), ('integer', 'i4'), ('float', 'f4')])

table2 = table1.copy()
table2[1][index] = value
with raises(AssertionError):
assert_tables_equality(table1, table2)


def test_assert_tables_equality_fails_different_names():
# modify the type of a column and check that the function picks up
# the difference
dtypes1 = [('text', 'U10'), ('integer', 'i4'), ('float', 'f4')]
table1 = np.array([ ('one', 1, 1.0)
, ('two', 2, 2.0)],
dtype=dtypes1)

dtypes2 = [('different_name', 'U10'), ('integer', 'i4'), ('float', 'f4')]
table2 = np.array([ ('one', 1, 1.0)
, ('two', 2, 2.0)],
dtype=dtypes2)

with raises(AssertionError):
assert_tables_equality(table1, table2)


def test_assert_tables_equality_fails_different_values():
# modify the type of a column and check that the function picks up
# the difference
dtypes1 = [('text', 'U10'), ('integer', 'i4'), ('float', 'f4')]
table1 = np.array([ ('one', 1, 1.0)
, ('two', 2, 2.0)],
dtype=dtypes1)

dtypes2 = list(dtypes1)
dtypes2[1] = ("integer", "f4")
table2 = table1.copy().astype(dtypes2)

with raises(AssertionError):
assert_tables_equality(table1, table2)


@mark.parametrize("dtype", (int, float))
def test_assert_tables_equality_equal_arrays(dtype):
array1 = np.arange(20, dtype=dtype)
array2 = array1.copy()
assert_tables_equality(array1, array2)


@mark.parametrize("dtype", (int, float))
def test_assert_tables_equality_different_arrays(dtype):
array1 = np.arange(20, dtype=dtype)
array2 = array1 + 1
with raises(AssertionError):
assert_tables_equality(array1, array2)


@mark.parametrize( "shape1 shape2".split()
, ( ( (3, 4), (4, 4) ) # different lengths
, ( (3, 3), (3, 4) ))) # different widths
def test_assert_tables_equality_different_array_shapes(shape1, shape2):
array1 = np.ones(shape1)
array2 = np.ones(shape2)
with raises(AssertionError):
assert_tables_equality(array1, array2)


def test_assert_tables_equality_different_table_shapes():
table1 = np.array([ (1, 1.0)
, (2, 2.0)],
dtype=[('integer', 'i4'), ('float', 'f4')])

table2 = np.array([ (1, 1.0)
, (2, 2.0)
, (3, 3.0)],
dtype=[('integer', 'i4'), ('float', 'f4')])

table3 = np.array([ ('one', 1, 1.0)
, ('two', 2, 2.0)],
dtype=[('text', 'U10'), ('integer', 'i4'), ('float', 'f4')])

# different lengths
with raises(AssertionError):
assert_tables_equality(table1, table2)

# different widths
with raises(AssertionError):
assert_tables_equality(table1, table3)
Loading