2020
2121pd_data = pd .DataFrame (
2222 {
23- "ints" : [10 , 20 , 30 , 40 ],
24- "nested_ints" : [[1 , 2 ], [3 , 4 , 5 ], [], [20 , 30 ]],
25- "structs" : [{"a" : 100 }, {}, {"b" : 200 }, {"b" : 300 }],
23+ "ints" : [10 , 20 , 30 , 40 , 50 ],
24+ "nested_ints" : [[1 , 2 ], None , [3 , 4 , 5 ], [], [20 , 30 ]],
25+ "structs" : [{"a" : 100 }, None , {}, {"b" : 200 }, {"b" : 300 }],
2626 }
2727)
2828
2929pd_data_normalized = pd .DataFrame (
3030 {
31- "ints" : pd .Series ([10 , 20 , 30 , 40 ], dtype = dtypes .INT_DTYPE ),
31+ "ints" : pd .Series ([10 , 20 , 30 , 40 , 50 ], dtype = dtypes .INT_DTYPE ),
3232 "nested_ints" : pd .Series (
33- [[1 , 2 ], [3 , 4 , 5 ], [], [20 , 30 ]], dtype = pd .ArrowDtype (pa .list_ (pa .int64 ()))
33+ [[1 , 2 ], None , [3 , 4 , 5 ], [], [20 , 30 ]],
34+ dtype = pd .ArrowDtype (pa .list_ (pa .int64 ())),
3435 ),
3536 "structs" : pd .Series (
36- [{"a" : 100 }, {}, {"b" : 200 }, {"b" : 300 }],
37+ [{"a" : 100 }, None , {}, {"b" : 200 }, {"b" : 300 }],
3738 dtype = pd .ArrowDtype (pa .struct ({"a" : pa .int64 (), "b" : pa .int64 ()})),
3839 ),
3940 }
@@ -122,11 +123,11 @@ def test_local_data_well_formed_round_trip_chunked():
122123
123124def test_local_data_well_formed_round_trip_sliced ():
124125 pa_table = pa .Table .from_pandas (pd_data , preserve_index = False )
125- as_rechunked_pyarrow = pa .Table .from_batches (pa_table .slice (2 , 4 ).to_batches ())
126+ as_rechunked_pyarrow = pa .Table .from_batches (pa_table .slice (0 , 4 ).to_batches ())
126127 local_entry = local_data .ManagedArrowTable .from_pyarrow (as_rechunked_pyarrow )
127128 result = pd .DataFrame (local_entry .itertuples (), columns = pd_data .columns )
128129 pandas .testing .assert_frame_equal (
129- pd_data_normalized [2 :4 ].reset_index (drop = True ),
130+ pd_data_normalized [0 :4 ].reset_index (drop = True ),
130131 result .reset_index (drop = True ),
131132 check_dtype = False ,
132133 )
@@ -143,3 +144,25 @@ def test_local_data_not_equal_other():
143144 local_entry2 = local_data .ManagedArrowTable .from_pandas (pd_data [::2 ])
144145 assert local_entry != local_entry2
145146 assert hash (local_entry ) != hash (local_entry2 )
147+
148+
149+ def test_local_data_itertuples_struct_none ():
150+ pd_data = pd .DataFrame (
151+ {
152+ "structs" : [{"a" : 100 }, None , {"b" : 200 }, {"b" : 300 }],
153+ }
154+ )
155+ local_entry = local_data .ManagedArrowTable .from_pandas (pd_data )
156+ result = list (local_entry .itertuples ())
157+ assert result [1 ][0 ] is None
158+
159+
160+ def test_local_data_itertuples_list_none ():
161+ pd_data = pd .DataFrame (
162+ {
163+ "lists" : [[1 , 2 ], None , [3 , 4 ]],
164+ }
165+ )
166+ local_entry = local_data .ManagedArrowTable .from_pandas (pd_data )
167+ result = list (local_entry .itertuples ())
168+ assert result [1 ][0 ] is None
0 commit comments