diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc b/cpp/src/arrow/python/arrow_to_pandas.cc index 23bef7bcae6..d1fca70412c 100644 --- a/cpp/src/arrow/python/arrow_to_pandas.cc +++ b/cpp/src/arrow/python/arrow_to_pandas.cc @@ -498,6 +498,7 @@ inline Status ConvertListsLike(PandasOptions options, const std::shared_ptr(data.chunk(c)); @@ -507,8 +508,8 @@ inline Status ConvertListsLike(PandasOptions options, const std::shared_ptrvalue_offset(i)); - PyObject* end = PyLong_FromLong(arr->value_offset(i + 1)); + PyObject* start = PyLong_FromLongLong(arr->value_offset(i) + chunk_offset); + PyObject* end = PyLong_FromLongLong(arr->value_offset(i + 1) + chunk_offset); PyObject* slice = PySlice_New(start, end, NULL); *out_values = PyObject_GetItem(numpy_array, slice); Py_DECREF(start); @@ -517,6 +518,8 @@ inline Status ConvertListsLike(PandasOptions options, const std::shared_ptrlength(); } Py_XDECREF(numpy_array); diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py index 61bd072f6ba..c2631842278 100644 --- a/python/pyarrow/tests/test_convert_pandas.py +++ b/python/pyarrow/tests/test_convert_pandas.py @@ -534,6 +534,31 @@ def test_column_of_lists(self): field = schema.field_by_name(column) self._check_array_roundtrip(df[column], type=field.type) + def test_column_of_lists_chunked(self): + # ARROW-1357 + df = pd.DataFrame({ + 'lists': np.array([ + [1, 2], + None, + [2, 3], + [4, 5], + [6, 7], + [8, 9] + ], dtype=object) + }) + + schema = pa.schema([ + pa.field('lists', pa.list_(pa.int64())) + ]) + + t1 = pa.Table.from_pandas(df[:2], schema=schema) + t2 = pa.Table.from_pandas(df[2:], schema=schema) + + table = pa.concat_tables([t1, t2]) + result = table.to_pandas() + + tm.assert_frame_equal(result, df) + def test_column_of_lists_strided(self): df, schema = dataframe_with_lists() df = pd.concat([df] * 6, ignore_index=True) diff --git a/python/pyarrow/tests/test_serialization.py b/python/pyarrow/tests/test_serialization.py index f6f98402ac0..013d86ebf9c 100644 --- a/python/pyarrow/tests/test_serialization.py +++ b/python/pyarrow/tests/test_serialization.py @@ -20,7 +20,6 @@ import pytest from collections import namedtuple -import os import string import sys