diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 7a10447e3ad40..e5f46103e0f04 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -161,7 +161,7 @@ I/O - :meth:`read_csv` now accepts binary mode file buffers when using the Python csv engine (:issue:`23779`) - Bug in :meth:`DataFrame.to_json` where using a Tuple as a column or index value and using ``orient="columns"`` or ``orient="index"`` would produce invalid JSON (:issue:`20500`) -- +- Improve infinity parsing. :meth:`read_csv` now interprets ``Infinity``, ``+Infinity``, ``-Infinity`` as floating point values (:issue:`10065`) Plotting ^^^^^^^^ diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 6cc9dd22ce7c9..62a3568932def 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -1693,6 +1693,10 @@ cdef: char* cposinf = b'+inf' char* cneginf = b'-inf' + char* cinfty = b'Infinity' + char* cposinfty = b'+Infinity' + char* cneginfty = b'-Infinity' + cdef _try_double(parser_t *parser, int64_t col, int64_t line_start, int64_t line_end, @@ -1772,9 +1776,12 @@ cdef inline int _try_double_nogil(parser_t *parser, if error != 0 or p_end == word or p_end[0]: error = 0 if (strcasecmp(word, cinf) == 0 or - strcasecmp(word, cposinf) == 0): + strcasecmp(word, cposinf) == 0 or + strcasecmp(word, cinfty) == 0 or + strcasecmp(word, cposinfty) == 0): data[0] = INF - elif strcasecmp(word, cneginf) == 0: + elif (strcasecmp(word, cneginf) == 0 or + strcasecmp(word, cneginfty) == 0 ): data[0] = NEGINF else: return 1 @@ -1793,9 +1800,12 @@ cdef inline int _try_double_nogil(parser_t *parser, if error != 0 or p_end == word or p_end[0]: error = 0 if (strcasecmp(word, cinf) == 0 or - strcasecmp(word, cposinf) == 0): + strcasecmp(word, cposinf) == 0 or + strcasecmp(word, cinfty) == 0 or + strcasecmp(word, cposinfty) == 0): data[0] = INF - elif strcasecmp(word, cneginf) == 0: + elif (strcasecmp(word, cneginf) == 0 or + strcasecmp(word, cneginfty) == 0): data[0] = NEGINF else: return 1 diff --git a/pandas/_libs/src/parse_helper.h b/pandas/_libs/src/parse_helper.h index 1db1878a8a773..1db4c813bb493 100644 --- a/pandas/_libs/src/parse_helper.h +++ b/pandas/_libs/src/parse_helper.h @@ -50,7 +50,7 @@ int floatify(PyObject *str, double *result, int *maybe_int) { status = to_double(data, result, sci, dec, maybe_int); if (!status) { - /* handle inf/-inf */ + /* handle inf/-inf infinity/-infinity */ if (strlen(data) == 3) { if (0 == strcasecmp(data, "inf")) { *result = HUGE_VAL; @@ -68,6 +68,23 @@ int floatify(PyObject *str, double *result, int *maybe_int) { } else { goto parsingerror; } + } else if (strlen(data) == 8) { + if (0 == strcasecmp(data, "infinity")) { + *result = HUGE_VAL; + *maybe_int = 0; + } else { + goto parsingerror; + } + } else if (strlen(data) == 9) { + if (0 == strcasecmp(data, "-infinity")) { + *result = -HUGE_VAL; + *maybe_int = 0; + } else if (0 == strcasecmp(data, "+infinity")) { + *result = HUGE_VAL; + *maybe_int = 0; + } else { + goto parsingerror; + } } else { goto parsingerror; } diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index e04535df56663..0586593c87cc5 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -1865,6 +1865,23 @@ def test_inf_parsing(all_parsers, na_filter): tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize("na_filter", [True, False]) +def test_infinity_parsing(all_parsers, na_filter): + parser = all_parsers + data = """\ +,A +a,Infinity +b,-Infinity +c,+Infinity +""" + expected = DataFrame( + {"A": [float("infinity"), float("-infinity"), float("+infinity")]}, + index=["a", "b", "c"], + ) + result = parser.read_csv(StringIO(data), index_col=0, na_filter=na_filter) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("nrows", [0, 1, 2, 3, 4, 5]) def test_raise_on_no_columns(all_parsers, nrows): parser = all_parsers