diff --git a/python/pyarrow/array.pyx b/python/pyarrow/array.pyx index 6c862751fc2..d44212f4aed 100644 --- a/python/pyarrow/array.pyx +++ b/python/pyarrow/array.pyx @@ -167,6 +167,12 @@ cdef class Array: return PyObject_to_object(np_arr) + def to_pylist(self): + """ + Convert to an list of native Python objects. + """ + return [x.as_py() for x in self] + cdef class NullArray(Array): pass diff --git a/python/pyarrow/scalar.pyx b/python/pyarrow/scalar.pyx index 0d391e5f26b..c2d20e460c3 100644 --- a/python/pyarrow/scalar.pyx +++ b/python/pyarrow/scalar.pyx @@ -194,7 +194,9 @@ cdef object box_arrow_scalar(DataType type, const shared_ptr[CArray]& sp_array, int index): cdef ArrayValue val - if sp_array.get().IsNull(index): + if type.type.type == Type_NA: + return NA + elif sp_array.get().IsNull(index): return NA else: val = _scalar_classes[type.type.type]() diff --git a/python/pyarrow/table.pyx b/python/pyarrow/table.pyx index 333686f810e..2f7d4309e45 100644 --- a/python/pyarrow/table.pyx +++ b/python/pyarrow/table.pyx @@ -108,6 +108,15 @@ cdef class ChunkedArray: for i in range(self.num_chunks): yield self.chunk(i) + def to_pylist(self): + """ + Convert to a list of native Python objects. + """ + result = [] + for i in range(self.num_chunks): + result += self.chunk(i).to_pylist() + return result + cdef class Column: """ @@ -143,6 +152,12 @@ cdef class Column: return pd.Series(PyObject_to_object(arr), name=self.name) + def to_pylist(self): + """ + Convert to a list of native Python objects. + """ + return self.data.to_pylist() + cdef _check_nullptr(self): if self.column == NULL: raise ReferenceError("Column object references a NULL pointer." diff --git a/python/pyarrow/tests/test_column.py b/python/pyarrow/tests/test_column.py index b62f58236e0..32202cb5a9a 100644 --- a/python/pyarrow/tests/test_column.py +++ b/python/pyarrow/tests/test_column.py @@ -35,6 +35,7 @@ def test_basics(self): assert column.length() == 5 assert len(column) == 5 assert column.shape == (5,) + assert column.to_pylist() == [-10, -5, 0, 5, 10] def test_pandas(self): data = [ diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py index 8937f8db694..34371b0bdd7 100644 --- a/python/pyarrow/tests/test_convert_builtin.py +++ b/python/pyarrow/tests/test_convert_builtin.py @@ -22,28 +22,34 @@ class TestConvertList(unittest.TestCase): def test_boolean(self): - arr = pyarrow.from_pylist([True, None, False, None]) + expected = [True, None, False, None] + arr = pyarrow.from_pylist(expected) assert len(arr) == 4 assert arr.null_count == 2 assert arr.type == pyarrow.bool_() + assert arr.to_pylist() == expected def test_empty_list(self): arr = pyarrow.from_pylist([]) assert len(arr) == 0 assert arr.null_count == 0 assert arr.type == pyarrow.null() + assert arr.to_pylist() == [] def test_all_none(self): arr = pyarrow.from_pylist([None, None]) assert len(arr) == 2 assert arr.null_count == 2 assert arr.type == pyarrow.null() + assert arr.to_pylist() == [None, None] def test_integer(self): - arr = pyarrow.from_pylist([1, None, 3, None]) + expected = [1, None, 3, None] + arr = pyarrow.from_pylist(expected) assert len(arr) == 4 assert arr.null_count == 2 assert arr.type == pyarrow.int64() + assert arr.to_pylist() == expected def test_garbage_collection(self): import gc @@ -62,6 +68,7 @@ def test_double(self): assert len(arr) == 6 assert arr.null_count == 3 assert arr.type == pyarrow.double() + assert arr.to_pylist() == data def test_string(self): data = ['foo', b'bar', None, 'arrow'] @@ -69,6 +76,7 @@ def test_string(self): assert len(arr) == 4 assert arr.null_count == 1 assert arr.type == pyarrow.string() + assert arr.to_pylist() == ['foo', 'bar', None, 'arrow'] def test_mixed_nesting_levels(self): pyarrow.from_pylist([1, 2, None]) @@ -90,3 +98,4 @@ def test_list_of_int(self): assert len(arr) == 4 assert arr.null_count == 1 assert arr.type == pyarrow.list_(pyarrow.int64()) + assert arr.to_pylist() == data