@@ -105,7 +105,7 @@ def test_options_py(df_compat, pa):
105
105
with pd .option_context ('io.parquet.engine' , 'pyarrow' ):
106
106
df .to_parquet (path )
107
107
108
- result = read_parquet (path , compression = None )
108
+ result = read_parquet (path )
109
109
tm .assert_frame_equal (result , df )
110
110
111
111
@@ -118,7 +118,7 @@ def test_options_fp(df_compat, fp):
118
118
with pd .option_context ('io.parquet.engine' , 'fastparquet' ):
119
119
df .to_parquet (path , compression = None )
120
120
121
- result = read_parquet (path , compression = None )
121
+ result = read_parquet (path )
122
122
tm .assert_frame_equal (result , df )
123
123
124
124
@@ -130,7 +130,7 @@ def test_options_auto(df_compat, fp, pa):
130
130
with pd .option_context ('io.parquet.engine' , 'auto' ):
131
131
df .to_parquet (path )
132
132
133
- result = read_parquet (path , compression = None )
133
+ result = read_parquet (path )
134
134
tm .assert_frame_equal (result , df )
135
135
136
136
@@ -162,7 +162,7 @@ def test_cross_engine_pa_fp(df_cross_compat, pa, fp):
162
162
with tm .ensure_clean () as path :
163
163
df .to_parquet (path , engine = pa , compression = None )
164
164
165
- result = read_parquet (path , engine = fp , compression = None )
165
+ result = read_parquet (path , engine = fp )
166
166
tm .assert_frame_equal (result , df )
167
167
168
168
@@ -174,7 +174,7 @@ def test_cross_engine_fp_pa(df_cross_compat, pa, fp):
174
174
with tm .ensure_clean () as path :
175
175
df .to_parquet (path , engine = fp , compression = None )
176
176
177
- result = read_parquet (path , engine = pa , compression = None )
177
+ result = read_parquet (path , engine = pa )
178
178
tm .assert_frame_equal (result , df )
179
179
180
180
@@ -188,19 +188,23 @@ def check_error_on_write(self, df, engine, exc):
188
188
with tm .ensure_clean () as path :
189
189
to_parquet (df , path , engine , compression = None )
190
190
191
- def check_round_trip (self , df , engine , expected = None , ** kwargs ):
192
-
191
+ def check_round_trip (self , df , engine , expected = None ,
192
+ write_kwargs = None , read_kwargs = None ):
193
+ if write_kwargs is None :
194
+ write_kwargs = {}
195
+ if read_kwargs is None :
196
+ read_kwargs = {}
193
197
with tm .ensure_clean () as path :
194
- df .to_parquet (path , engine , ** kwargs )
195
- result = read_parquet (path , engine , ** kwargs )
198
+ df .to_parquet (path , engine , ** write_kwargs )
199
+ result = read_parquet (path , engine , ** read_kwargs )
196
200
197
201
if expected is None :
198
202
expected = df
199
203
tm .assert_frame_equal (result , expected )
200
204
201
205
# repeat
202
- to_parquet (df , path , engine , ** kwargs )
203
- result = pd .read_parquet (path , engine , ** kwargs )
206
+ to_parquet (df , path , engine , ** write_kwargs )
207
+ result = pd .read_parquet (path , engine , ** read_kwargs )
204
208
205
209
if expected is None :
206
210
expected = df
@@ -222,7 +226,7 @@ def test_columns_dtypes(self, engine):
222
226
223
227
# unicode
224
228
df .columns = [u'foo' , u'bar' ]
225
- self .check_round_trip (df , engine , compression = None )
229
+ self .check_round_trip (df , engine , write_kwargs = { 'compression' : None } )
226
230
227
231
def test_columns_dtypes_invalid (self , engine ):
228
232
@@ -246,7 +250,7 @@ def test_columns_dtypes_invalid(self, engine):
246
250
def test_write_with_index (self , engine ):
247
251
248
252
df = pd .DataFrame ({'A' : [1 , 2 , 3 ]})
249
- self .check_round_trip (df , engine , compression = None )
253
+ self .check_round_trip (df , engine , write_kwargs = { 'compression' : None } )
250
254
251
255
# non-default index
252
256
for index in [[2 , 3 , 4 ],
@@ -280,7 +284,8 @@ def test_compression(self, engine, compression):
280
284
pytest .importorskip ('brotli' )
281
285
282
286
df = pd .DataFrame ({'A' : [1 , 2 , 3 ]})
283
- self .check_round_trip (df , engine , compression = compression )
287
+ self .check_round_trip (df , engine ,
288
+ write_kwargs = {'compression' : compression })
284
289
285
290
def test_read_columns (self , engine ):
286
291
# GH18154
@@ -289,7 +294,8 @@ def test_read_columns(self, engine):
289
294
290
295
expected = pd .DataFrame ({'string' : list ('abc' )})
291
296
self .check_round_trip (df , engine , expected = expected ,
292
- compression = None , columns = ["string" ])
297
+ write_kwargs = {'compression' : None },
298
+ read_kwargs = {'columns' : ['string' ]})
293
299
294
300
295
301
class TestParquetPyArrow (Base ):
@@ -377,7 +383,7 @@ def test_basic(self, fp):
377
383
'timedelta' : pd .timedelta_range ('1 day' , periods = 3 ),
378
384
})
379
385
380
- self .check_round_trip (df , fp , compression = None )
386
+ self .check_round_trip (df , fp , write_kwargs = { 'compression' : None } )
381
387
382
388
@pytest .mark .skip (reason = "not supported" )
383
389
def test_duplicate_columns (self , fp ):
@@ -390,7 +396,8 @@ def test_duplicate_columns(self, fp):
390
396
def test_bool_with_none (self , fp ):
391
397
df = pd .DataFrame ({'a' : [True , None , False ]})
392
398
expected = pd .DataFrame ({'a' : [1.0 , np .nan , 0.0 ]}, dtype = 'float16' )
393
- self .check_round_trip (df , fp , expected = expected , compression = None )
399
+ self .check_round_trip (df , fp , expected = expected ,
400
+ write_kwargs = {'compression' : None })
394
401
395
402
def test_unsupported (self , fp ):
396
403
@@ -406,7 +413,7 @@ def test_categorical(self, fp):
406
413
if LooseVersion (fastparquet .__version__ ) < LooseVersion ("0.1.3" ):
407
414
pytest .skip ("CategoricalDtype not supported for older fp" )
408
415
df = pd .DataFrame ({'a' : pd .Categorical (list ('abc' ))})
409
- self .check_round_trip (df , fp , compression = None )
416
+ self .check_round_trip (df , fp , write_kwargs = { 'compression' : None } )
410
417
411
418
def test_datetime_tz (self , fp ):
412
419
# doesn't preserve tz
@@ -416,4 +423,13 @@ def test_datetime_tz(self, fp):
416
423
# warns on the coercion
417
424
with catch_warnings (record = True ):
418
425
self .check_round_trip (df , fp , df .astype ('datetime64[ns]' ),
419
- compression = None )
426
+ write_kwargs = {'compression' : None })
427
+
428
+ def test_filter_row_groups (self , fp ):
429
+ d = {'a' : list (range (0 , 3 ))}
430
+ df = pd .DataFrame (d )
431
+ with tm .ensure_clean () as path :
432
+ df .to_parquet (path , fp , compression = None ,
433
+ row_group_offsets = 1 )
434
+ result = read_parquet (path , fp , filters = [('a' , '==' , 0 )])
435
+ assert len (result ) == 1
0 commit comments