Skip to content

Commit

Permalink
Fix low_memory C engine parser
Browse files Browse the repository at this point in the history
  • Loading branch information
kprestel committed Dec 9, 2018
1 parent 9746e4b commit 2c3d27a
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 9 deletions.
9 changes: 6 additions & 3 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -986,7 +986,6 @@ cdef class TextReader:
footer=footer,
upcast_na=True)
self._end_clock('Type conversion')

self._start_clock()
if len(columns) > 0:
rows_read = len(list(columns.values())[0])
Expand Down Expand Up @@ -1241,7 +1240,7 @@ cdef class TextReader:
try:
# use _from_sequence_of_strings if the class defines it
result = array_type._from_sequence_of_strings(result,
dtype=dtype) # noqa
dtype=dtype) # noqa
except AbstractMethodError:
result = array_type._from_sequence(result, dtype=dtype)
else:
Expand Down Expand Up @@ -2201,7 +2200,11 @@ def _concatenate_chunks(list chunks):
result[name] = union_categoricals(arrs,
sort_categories=sort_categories)
else:
result[name] = np.concatenate(arrs)
if is_extension_array_dtype(dtype):
result[name] = dtype \
.construct_array_type()._concat_same_type(arrs)
else:
result[name] = np.concatenate(arrs)

if warning_columns:
warning_names = ','.join(warning_columns)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,7 +559,7 @@ def sanitize_array(data, index, dtype=None, copy=False,

# possibility of nan -> garbage
if is_float_dtype(data.dtype) and is_integer_dtype(dtype) \
and not is_extension_array_dtype(dtype):
and not is_extension_array_dtype(dtype):
if not isna(data).any():
subarr = _try_cast(data, True, dtype, copy,
raise_cast_failure)
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1669,8 +1669,8 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
try_num_bool)

# type specified in dtype param
if cast_type and not is_dtype_equal(cvals, cast_type):
# or is_extension_array_dtype(cast_type)):
if cast_type and (not is_dtype_equal(cvals, cast_type)
or is_extension_array_dtype(cast_type)):
try:
if (is_bool_dtype(cast_type) and
not is_categorical_dtype(cast_type)
Expand Down
10 changes: 7 additions & 3 deletions pandas/tests/extension/base/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,19 @@ def data(dtype):


class ExtensionParsingTests(BaseExtensionTests):
def test_EA_types(self):

@pytest.mark.parametrize('engine', ['c', 'python'])
def test_EA_types(self, engine):
df = pd.DataFrame({'Int': pd.Series([1, 2, 3], dtype='Int64'),
'A': [1, 2, 1]})
data = df.to_csv(index=False)
result = pd.read_csv(StringIO(data), dtype={'Int': Int64Dtype})
result = pd.read_csv(StringIO(data), dtype={'Int': Int64Dtype},
engine=engine)
assert result is not None

df = pd.DataFrame({'Int': pd.Series([1, 2, 3], dtype='Int8'),
'A': [1, 2, 1]})
data = df.to_csv(index=False)
result = pd.read_csv(StringIO(data), dtype={'Int': 'Int8'})
result = pd.read_csv(StringIO(data), dtype={'Int': 'Int8'},
engine=engine)
assert result is not None

0 comments on commit 2c3d27a

Please sign in to comment.