diff --git a/cpp/src/arrow/python/helpers.cc b/cpp/src/arrow/python/helpers.cc index 6f49a7cdb2294..75a77c640bbc4 100644 --- a/cpp/src/arrow/python/helpers.cc +++ b/cpp/src/arrow/python/helpers.cc @@ -201,7 +201,7 @@ Status CIntFromPythonImpl(PyObject* obj, Int* out, const std::string& overflow_m // PyLong_AsUnsignedLong() and PyLong_AsUnsignedLongLong() don't handle // conversion from non-ints (e.g. np.uint64), so do it ourselves if (!PyLong_Check(obj)) { - ref.reset(PyNumber_Long(obj)); + ref.reset(PyNumber_Index(obj)); if (!ref) { RETURN_IF_PYERROR(); } @@ -348,16 +348,14 @@ bool IsPandasTimestamp(PyObject* obj) { } Status InvalidValue(PyObject* obj, const std::string& why) { - std::string obj_as_str; - RETURN_NOT_OK(internal::PyObject_StdStringStr(obj, &obj_as_str)); - return Status::Invalid("Could not convert ", obj_as_str, " with type ", + auto obj_as_str = PyObject_StdStringRepr(obj); + return Status::Invalid("Could not convert ", std::move(obj_as_str), " with type ", Py_TYPE(obj)->tp_name, ": ", why); } Status InvalidType(PyObject* obj, const std::string& why) { - std::string obj_as_str; - RETURN_NOT_OK(internal::PyObject_StdStringStr(obj, &obj_as_str)); - return Status::TypeError("Could not convert ", obj_as_str, " with type ", + auto obj_as_str = PyObject_StdStringRepr(obj); + return Status::TypeError("Could not convert ", std::move(obj_as_str), " with type ", Py_TYPE(obj)->tp_name, ": ", why); } diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py index 665c14f5daaea..933d8d612148e 100644 --- a/python/pyarrow/tests/test_convert_builtin.py +++ b/python/pyarrow/tests/test_convert_builtin.py @@ -20,6 +20,7 @@ import decimal import itertools import math +import re import hypothesis as h import numpy as np @@ -42,7 +43,7 @@ (np.uint64, pa.uint64())] -np_int_types, _ = zip(*int_type_pairs) +np_int_types, pa_int_types = zip(*int_type_pairs) class StrangeIterable: @@ -433,6 +434,14 @@ def test_unsigned_integer_overflow(bits): pa.array([-1], ty) +@parametrize_with_iterable_types +@pytest.mark.parametrize("typ", pa_int_types) +def test_integer_from_string_error(seq, typ): + # ARROW-9451: pa.array(['1'], type=pa.uint32()) should not succeed + with pytest.raises(pa.ArrowInvalid): + pa.array(seq(['1']), type=typ) + + def test_convert_with_mask(): data = [1, 2, 3, 4, 5] mask = np.array([False, True, False, False, True]) @@ -1684,7 +1693,7 @@ def test_struct_from_list_of_pairs_errors(): # type inference template = ( r"Could not convert {} with type {}: was expecting tuple of " - r"\(key, value\) pair" + r"(key, value) pair" ) cases = [ tuple(), # empty key-value pair @@ -1693,10 +1702,9 @@ def test_struct_from_list_of_pairs_errors(): 'string', # not a tuple ] for key_value_pair in cases: - msg = template.format( - str(key_value_pair).replace('(', r'\(').replace(')', r'\)'), - type(key_value_pair).__name__ - ) + msg = re.escape(template.format( + repr(key_value_pair), type(key_value_pair).__name__ + )) with pytest.raises(TypeError, match=msg): pa.array([