115
115
dtype : Type name or dict of column -> type, default None
116
116
Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32}
117
117
Use `str` or `object` to preserve and not interpret dtype.
118
- If converters are specified, they will be applied AFTER
119
- dtype conversion.
118
+ If converters are specified, they will be applied INSTEAD
119
+ of dtype conversion.
120
+
121
+ .. versionadded:: 0.20.0 support for the Python parser.
122
+
120
123
%s
121
124
converters : dict, default None
122
125
Dict of functions for converting values in certain columns. Keys can either
@@ -1295,15 +1298,6 @@ def _agg_index(self, index, try_parse_dates=True):
1295
1298
def _apply_converter (self , values , conv_f , na_values , col_na_values ,
1296
1299
col_na_fvalues ):
1297
1300
""" apply converter function to values, respecting NAs """
1298
- try :
1299
- values = lib .map_infer (values , conv_f )
1300
- except ValueError :
1301
- mask = lib .ismember (values , na_values ).view (np .uint8 )
1302
- values = lib .map_infer_mask (values , conv_f , mask )
1303
-
1304
- cvals , na_count = self ._infer_types (
1305
- values , set (col_na_values ) | col_na_fvalues ,
1306
- try_num_bool = False )
1307
1301
return cvals , na_count
1308
1302
1309
1303
def _convert_to_ndarrays (self , dct , na_values , na_fvalues , verbose = False ,
@@ -1323,45 +1317,58 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
1323
1317
else :
1324
1318
col_na_values , col_na_fvalues = set (), set ()
1325
1319
1326
- if conv_f is not None and cast_type is None :
1327
- # if type is not specified, apply the conversion first, without
1328
- # inference
1329
- cvals , na_count = self ._apply_converter (
1330
- values , conv_f , na_values ,
1331
- col_na_values , col_na_fvalues )
1320
+ if conv_f is not None :
1321
+ # conv_f applied to data before inference
1322
+ # dtype isn't used if a converted specified
1323
+ try :
1324
+ values = lib .map_infer (values , conv_f )
1325
+ except ValueError :
1326
+ mask = lib .ismember (values , na_values ).view (np .uint8 )
1327
+ values = lib .map_infer_mask (values , conv_f , mask )
1328
+
1329
+ cvals , na_count = self ._infer_types (
1330
+ values , set (col_na_values ) | col_na_fvalues ,
1331
+ try_num_bool = False )
1332
1332
else :
1333
- try_num_bool = True
1334
- if cast_type and is_object_dtype (cast_type ):
1335
- # skip inference if specified dtype is object
1336
- try_num_bool = False
1333
+ # skip inference if specified dtype is object
1334
+ try_num_bool = not (cast_type and is_object_dtype (cast_type ))
1337
1335
1338
1336
# general type inference and conversion
1339
1337
cvals , na_count = self ._infer_types (
1340
1338
values , set (col_na_values ) | col_na_fvalues ,
1341
1339
try_num_bool )
1342
1340
1343
- if issubclass (cvals .dtype .type , np .integer ) and self .compact_ints :
1344
- cvals = lib .downcast_int64 (
1345
- cvals , _parser .na_values ,
1346
- self .use_unsigned )
1341
+ if issubclass (cvals .dtype .type , np .integer ) and self .compact_ints :
1342
+ cvals = lib .downcast_int64 (
1343
+ cvals , _parser .na_values ,
1344
+ self .use_unsigned )
1347
1345
1348
- if cast_type and not is_dtype_equal (cvals , cast_type ):
1349
1346
# type specificed in dtype param
1350
-
1351
- cvals = self ._cast_types (cvals , cast_type , c )
1352
- # for consistency with c-parser, if a converter and dtype are
1353
- # specified, apply the converter last
1354
- if conv_f is not None :
1355
- values , na_count = self ._apply_converter (
1356
- values , conv_f , na_values ,
1357
- col_na_values , col_na_fvalues )
1347
+ if cast_type and not is_dtype_equal (cvals , cast_type ):
1348
+ cvals = self ._cast_types (cvals , cast_type , c )
1358
1349
1359
1350
result [c ] = cvals
1360
1351
if verbose and na_count :
1361
1352
print ('Filled %d NA values in column %s' % (na_count , str (c )))
1362
1353
return result
1363
1354
1364
1355
def _infer_types (self , values , na_values , try_num_bool = True ):
1356
+ """
1357
+ Infer types of values, possibly casting
1358
+
1359
+ Parameters
1360
+ ----------
1361
+ values : ndarray
1362
+ na_values : set
1363
+ try_num_bool : bool, default try
1364
+ try to cast values to numeric (first preference) or boolean
1365
+
1366
+ Returns:
1367
+ --------
1368
+ converted : ndarray
1369
+ na_count : int
1370
+ """
1371
+
1365
1372
na_count = 0
1366
1373
if issubclass (values .dtype .type , (np .number , np .bool_ )):
1367
1374
mask = lib .ismember (values , na_values )
@@ -1393,7 +1400,22 @@ def _infer_types(self, values, na_values, try_num_bool=True):
1393
1400
return result , na_count
1394
1401
1395
1402
def _cast_types (self , values , cast_type , column ):
1396
- """ cast column to type specified in dtypes= param """
1403
+ """
1404
+ Cast values to specified type
1405
+
1406
+ Parameters
1407
+ ----------
1408
+ values : ndarray
1409
+ cast_type : string or np.dtype
1410
+ dtype to cast values to
1411
+ column : string
1412
+ column name - used only for error reporting
1413
+
1414
+ Returns
1415
+ -------
1416
+ converted : ndarray
1417
+ """
1418
+
1397
1419
if is_categorical_dtype (cast_type ):
1398
1420
# XXX this is for consistency with
1399
1421
# c-parser which parses all categories
0 commit comments