Remove skip when numpy on test_convert_builtin.py and add individual …

…test skips
raulcd · Aug 1, 2024 · ef97ad3 · ef97ad3
1 parent 3584d5e
commit ef97ad3
Showing 1 changed file with 92 additions and 55 deletions.
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
@@ -27,26 +27,25 @@
 try:
  import numpy as np
 except ImportError:
- pytest.skip(reason="Failures on test collection due to numpy NOT enabled",
- allow_module_level=True)
+ np = None
 
 from pyarrow.pandas_compat import _pandas_api # noqa
 import pyarrow as pa
 import pyarrow.tests.strategies as past
 
 
 int_type_pairs = [
- (np.int8, pa.int8()),
- (np.int16, pa.int16()),
- (np.int32, pa.int32()),
- (np.int64, pa.int64()),
- (np.uint8, pa.uint8()),
- (np.uint16, pa.uint16()),
- (np.uint32, pa.uint32()),
- (np.uint64, pa.uint64())]
+ ("int8", pa.int8()),
+ ("int16", pa.int16()),
+ ("int32", pa.int32()),
+ ("int64", pa.int64()),
+ ("uint8", pa.uint8()),
+ ("uint16", pa.uint16()),
+ ("uint32", pa.uint32()),
+ ("uint64", pa.uint64())]
 
 
-np_int_types, pa_int_types = zip(*int_type_pairs)
+np_str_int_types, pa_int_types = zip(*int_type_pairs)
 
 
 class StrangeIterable:
@@ -178,7 +177,9 @@ def _as_set(xs):
  return set(xs)
 
 
-SEQUENCE_TYPES = [_as_list, _as_tuple, _as_numpy_array]
+SEQUENCE_TYPES = [_as_list, _as_tuple]
+if np is not None:
+ SEQUENCE_TYPES.append(_as_numpy_array)
 ITERABLE_TYPES = [_as_set, _as_dict_values] + SEQUENCE_TYPES
 COLLECTIONS_TYPES = [_as_deque] + ITERABLE_TYPES
 
@@ -221,6 +222,7 @@ def test_sequence_boolean(seq):
  assert arr.to_pylist() == expected
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 def test_sequence_numpy_boolean(seq):
  expected = [np.bool_(True), None, np.bool_(False), None]
@@ -229,6 +231,7 @@ def test_sequence_numpy_boolean(seq):
  assert arr.to_pylist() == [True, None, False, None]
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 def test_sequence_mixed_numpy_python_bools(seq):
  values = np.array([True, False])
@@ -285,15 +288,16 @@ def test_list_with_non_list(seq):
 @pytest.mark.parametrize("factory", [
  pa.list_, pa.large_list, pa.list_view, pa.large_list_view])
 def test_nested_arrays(seq, factory):
- arr = pa.array(seq([np.array([], dtype=np.int64),
- np.array([1, 2], dtype=np.int64), None]),
+ arr = pa.array(seq([pa.array([], type=pa.int64()),
+ pa.array([1, 2], type=pa.int64()), None]),
  type=factory(pa.int64()))
  assert len(arr) == 3
  assert arr.null_count == 1
  assert arr.type == factory(pa.int64())
  assert arr.to_pylist() == [[], [1, 2], None]
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 def test_nested_fixed_size_list(seq):
  # sequence of lists
@@ -338,10 +342,12 @@ def test_sequence_all_none(seq):
  assert arr.to_pylist() == [None, None]
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 @pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
 def test_sequence_integer(seq, np_scalar_pa_type):
- np_scalar, pa_type = np_scalar_pa_type
+ np_str_scalar, pa_type = np_scalar_pa_type
+ np_scalar = getattr(np, np_str_scalar)
  expected = [1, None, 3, None,
  np.iinfo(np_scalar).min, np.iinfo(np_scalar).max]
  arr = pa.array(seq(expected), type=pa_type)
@@ -351,12 +357,12 @@ def test_sequence_integer(seq, np_scalar_pa_type):
  assert arr.to_pylist() == expected
 
 
+@pytest.mark.numpy
 @parametrize_with_collections_types
-@pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
-def test_sequence_integer_np_nan(seq, np_scalar_pa_type):
+@pytest.mark.parametrize("pa_type", pa_int_types)
+def test_sequence_integer_np_nan(seq, pa_type):
  # ARROW-2806: numpy.nan is a double value and thus should produce
  # a double array.
- _, pa_type = np_scalar_pa_type
  with pytest.raises(ValueError):
  pa.array(seq([np.nan]), type=pa_type, from_pandas=False)
 
@@ -368,12 +374,12 @@ def test_sequence_integer_np_nan(seq, np_scalar_pa_type):
  assert arr.to_pylist() == expected
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
-@pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
-def test_sequence_integer_nested_np_nan(seq, np_scalar_pa_type):
+@pytest.mark.parametrize("pa_type", pa_int_types)
+def test_sequence_integer_nested_np_nan(seq, pa_type):
  # ARROW-2806: numpy.nan is a double value and thus should produce
  # a double array.
- _, pa_type = np_scalar_pa_type
  with pytest.raises(ValueError):
  pa.array(seq([[np.nan]]), type=pa.list_(pa_type), from_pandas=False)
 
@@ -395,10 +401,12 @@ def test_sequence_integer_inferred(seq):
  assert arr.to_pylist() == expected
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 @pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
 def test_sequence_numpy_integer(seq, np_scalar_pa_type):
- np_scalar, pa_type = np_scalar_pa_type
+ np_str_scalar, pa_type = np_scalar_pa_type
+ np_scalar = getattr(np, np_str_scalar)
  expected = [np_scalar(1), None, np_scalar(3), None,
  np_scalar(np.iinfo(np_scalar).min),
  np_scalar(np.iinfo(np_scalar).max)]
@@ -409,10 +417,12 @@ def test_sequence_numpy_integer(seq, np_scalar_pa_type):
  assert arr.to_pylist() == expected
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 @pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
 def test_sequence_numpy_integer_inferred(seq, np_scalar_pa_type):
- np_scalar, pa_type = np_scalar_pa_type
+ np_str_scalar, pa_type = np_scalar_pa_type
+ np_scalar = getattr(np, np_str_scalar)
  expected = [np_scalar(1), None, np_scalar(3), None]
  expected += [np_scalar(np.iinfo(np_scalar).min),
  np_scalar(np.iinfo(np_scalar).max)]
@@ -438,6 +448,7 @@ def test_broken_integers(seq):
  pa.array(seq(data), type=pa.int64())
 
 
+@pytest.mark.numpy
 def test_numpy_scalars_mixed_type():
  # ARROW-4324
  data = [np.int32(10), np.float32(0.5)]
@@ -452,6 +463,7 @@ def test_numpy_scalars_mixed_type():
  assert arr.equals(expected)
 
 
+@pytest.mark.numpy
 @pytest.mark.xfail(reason="Type inference for uint64 not implemented",
  raises=OverflowError)
 def test_uint64_max_convert():
@@ -495,7 +507,7 @@ def test_integer_from_string_error(seq, typ):
 
 def test_convert_with_mask():
  data = [1, 2, 3, 4, 5]
- mask = np.array([False, True, False, False, True])
+ mask = [False, True, False, False, True]
 
  result = pa.array(data, mask=mask)
  expected = pa.array([1, None, 3, 4, None])
@@ -563,6 +575,7 @@ def test_double_integer_coerce_representable_range():
  pa.array(invalid_values2)
 
 
+@pytest.mark.numpy
 def test_float32_integer_coerce_representable_range():
  f32 = np.float32
  valid_values = [f32(1.5), 1 << 24, -(1 << 24)]
@@ -591,14 +604,16 @@ def test_mixed_sequence_errors():
  pa.array([1.5, 'foo'])
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
-@pytest.mark.parametrize("np_scalar,pa_type", [
- (np.float16, pa.float16()),
- (np.float32, pa.float32()),
- (np.float64, pa.float64())
+@pytest.mark.parametrize("np_str_scalar,pa_type", [
+ ("float16", pa.float16()),
+ ("float32", pa.float32()),
+ ("float64", pa.float64())
 ])
 @pytest.mark.parametrize("from_pandas", [True, False])
-def test_sequence_numpy_double(seq, np_scalar, pa_type, from_pandas):
+def test_sequence_numpy_double(seq, np_str_scalar, pa_type, from_pandas):
+ np_scalar = getattr(np, np_str_scalar)
  data = [np_scalar(1.5), np_scalar(1), None, np_scalar(2.5), None, np.nan]
  arr = pa.array(seq(data), from_pandas=from_pandas)
  assert len(arr) == 6
@@ -620,27 +635,29 @@ def test_sequence_numpy_double(seq, np_scalar, pa_type, from_pandas):
  assert np.isnan(arr.to_pylist()[5])
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize("from_pandas", [True, False])
-@pytest.mark.parametrize("inner_seq", [np.array, list])
-def test_ndarray_nested_numpy_double(from_pandas, inner_seq):
+def test_ndarray_nested_numpy_double(from_pandas):
  # ARROW-2806
- data = np.array([
- inner_seq([1., 2.]),
- inner_seq([1., 2., 3.]),
- inner_seq([np.nan]),
- None
- ], dtype=object)
- arr = pa.array(data, from_pandas=from_pandas)
- assert len(arr) == 4
- assert arr.null_count == 1
- assert arr.type == pa.list_(pa.float64())
- if from_pandas:
- assert arr.to_pylist() == [[1.0, 2.0], [1.0, 2.0, 3.0], [None], None]
- else:
- np.testing.assert_equal(arr.to_pylist(),
- [[1., 2.], [1., 2., 3.], [np.nan], None])
+ for inner_seq in (np.array, list):
+ data = np.array([
+ inner_seq([1., 2.]),
+ inner_seq([1., 2., 3.]),
+ inner_seq([np.nan]),
+ None
+ ], dtype=object)
+ arr = pa.array(data, from_pandas=from_pandas)
+ assert len(arr) == 4
+ assert arr.null_count == 1
+ assert arr.type == pa.list_(pa.float64())
+ if from_pandas:
+ assert arr.to_pylist() == [[1.0, 2.0], [1.0, 2.0, 3.0], [None], None]
+ else:
+ np.testing.assert_equal(arr.to_pylist(),
+ [[1., 2.], [1., 2., 3.], [np.nan], None])
 
 
+@pytest.mark.numpy
 def test_nested_ndarray_in_object_array():
  # ARROW-4350
  arr = np.empty(2, dtype=object)
@@ -668,6 +685,7 @@ def test_nested_ndarray_in_object_array():
  assert result.to_pylist() == [[[1], [2]], [[1], [2]]]
 
 
+@pytest.mark.numpy
 @pytest.mark.xfail(reason=("Type inference for multidimensional ndarray "
  "not yet implemented"),
  raises=AssertionError)
@@ -686,6 +704,7 @@ def test_multidimensional_ndarray_as_nested_list():
  assert result.equals(expected)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(('data', 'value_type'), [
  ([True, False], pa.bool_()),
  ([None, None], pa.null()),
@@ -715,6 +734,7 @@ def test_list_array_from_object_ndarray(data, value_type):
  assert arr.to_pylist() == [data]
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(('data', 'value_type'), [
  ([[1, 2], [3]], pa.list_(pa.int64())),
  ([[1, 2], [3, 4]], pa.list_(pa.int64(), 2)),
@@ -734,13 +754,14 @@ def test_array_ignore_nan_from_pandas():
  # See ARROW-4324, this reverts logic that was introduced in
  # ARROW-2240
  with pytest.raises(ValueError):
- pa.array([np.nan, 'str'])
+ pa.array([float("nan"), 'str'])
 
- arr = pa.array([np.nan, 'str'], from_pandas=True)
+ arr = pa.array([float("nan"), 'str'], from_pandas=True)
  expected = pa.array([None, 'str'])
  assert arr.equals(expected)
 
 
+@pytest.mark.numpy
 def test_nested_ndarray_different_dtypes():
  data = [
  np.array([1, 2, 3], dtype='int64'),
@@ -1242,6 +1263,7 @@ def test_sequence_timestamp_out_of_bounds_nanosecond():
  assert arr.to_pylist()[0] == datetime.datetime(2262, 4, 12)
 
 
+@pytest.mark.numpy
 def test_sequence_numpy_timestamp():
  data = [
  np.datetime64(datetime.datetime(2007, 7, 13, 1, 23, 34, 123456)),
@@ -1411,14 +1433,25 @@ class CustomClass():
  pa.array([1, CustomClass()], type=ty)
 
 
-@pytest.mark.parametrize('np_scalar', [True, False])
-def test_sequence_duration(np_scalar):
+def test_sequence_duration():
  td1 = datetime.timedelta(2, 3601, 1)
  td2 = datetime.timedelta(1, 100, 1000)
- if np_scalar:
- data = [np.timedelta64(td1), None, np.timedelta64(td2)]
- else:
- data = [td1, None, td2]
+ data = [td1, None, td2]
+
+ arr = pa.array(data)
+ assert len(arr) == 3
+ assert arr.type == pa.duration('us')
+ assert arr.null_count == 1
+ assert arr[0].as_py() == td1
+ assert arr[1].as_py() is None
+ assert arr[2].as_py() == td2
+
+
+@pytest.mark.numpy
+def test_sequence_duration_np_scalar():
+ td1 = datetime.timedelta(2, 3601, 1)
+ td2 = datetime.timedelta(1, 100, 1000)
+ data = [np.timedelta64(td1), None, np.timedelta64(td2)]
 
  arr = pa.array(data)
  assert len(arr) == 3
@@ -1484,6 +1517,7 @@ def test_sequence_duration_nested_lists_with_explicit_type(factory):
  assert arr.to_pylist() == data
 
 
+@pytest.mark.numpy
 def test_sequence_duration_nested_lists_numpy():
  td1 = datetime.timedelta(1, 1, 1000)
  td2 = datetime.timedelta(1, 100)
@@ -1773,6 +1807,7 @@ def test_struct_from_dicts_bytes_keys():
  ]
 
 
+@pytest.mark.numpy
 def test_struct_from_tuples():
  ty = pa.struct([pa.field('a', pa.int32()),
  pa.field('b', pa.string()),
@@ -1919,6 +1954,7 @@ def test_struct_from_mixed_sequence():
  pa.array(data, type=ty)
 
 
+@pytest.mark.numpy
 def test_struct_from_dicts_inference():
  expected_type = pa.struct([pa.field('a', pa.int64()),
  pa.field('b', pa.string()),
@@ -1996,7 +2032,7 @@ def test_structarray_from_arrays_coerce():
 
 
 def test_decimal_array_with_none_and_nan():
- values = [decimal.Decimal('1.234'), None, np.nan, decimal.Decimal('nan')]
+ values = [decimal.Decimal('1.234'), None, float("nan"), decimal.Decimal('nan')]
 
  with pytest.raises(TypeError):
  # ARROW-6227: Without from_pandas=True, NaN is considered a float
@@ -2502,6 +2538,7 @@ def test_array_accepts_pyarrow_scalar(seq, data, scalar_data, value_type):
  assert expect.equals(result)
 
 
+@pytest.mark.numpy
 @parametrize_with_collections_types
 def test_array_accepts_pyarrow_scalar_errors(seq):
  sequence = seq([pa.scalar(1), pa.scalar("a"), pa.scalar(3.0)])