Skip to content

Commit 23dbbb1

Browse files
committed
BUG: Coerce to numeric despite uint64 conflict
Closes pandas-devgh-17007. Closes pandas-devgh-17125.
1 parent 9f0ee53 commit 23dbbb1

File tree

4 files changed

+25
-16
lines changed

4 files changed

+25
-16
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -827,6 +827,7 @@ Conversion
827827
- Bug in ``Timestamp.replace`` when replacing ``tzinfo`` around DST changes (:issue:`15683`)
828828
- Bug in ``Timedelta`` construction and arithmetic that would not propagate the ``Overflow`` exception (:issue:`17367`)
829829
- Bug in :meth:`~DataFrame.astype` converting to object dtype when passed extension type classes (`DatetimeTZDtype``, ``CategoricalDtype``) rather than instances. Now a ``TypeError`` is raised when a class is passed (:issue:`17780`).
830+
- Bug in :meth:`to_numeric` in which elements were not always being coerced to numeric when ``errors='coerce'`` (:issue:`17007`, :issue:`17125`)
830831

831832
Indexing
832833
^^^^^^^^

pandas/_libs/src/inference.pyx

+6-15
Original file line numberDiff line numberDiff line change
@@ -165,20 +165,8 @@ cdef class Seen(object):
165165
two conflict cases was also detected. However, we are
166166
trying to force conversion to a numeric dtype.
167167
"""
168-
if self.uint_ and (self.null_ or self.sint_):
169-
if not self.coerce_numeric:
170-
return True
171-
172-
if self.null_:
173-
msg = ("uint64 array detected, and such an "
174-
"array cannot contain NaN.")
175-
else: # self.sint_ = 1
176-
msg = ("uint64 and negative values detected. "
177-
"Cannot safely return a numeric array "
178-
"without truncating data.")
179-
180-
raise ValueError(msg)
181-
return False
168+
return (self.uint_ and (self.null_ or self.sint_)
169+
and not self.coerce_numeric)
182170

183171
cdef inline saw_null(self):
184172
"""
@@ -1103,7 +1091,10 @@ def maybe_convert_numeric(ndarray[object] values, set na_values,
11031091
seen.saw_int(val)
11041092

11051093
if val >= 0:
1106-
uints[i] = val
1094+
if val <= oUINT64_MAX:
1095+
uints[i] = val
1096+
else:
1097+
seen.float_ = True
11071098

11081099
if val <= oINT64_MAX:
11091100
ints[i] = val

pandas/core/tools/numeric.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ def to_numeric(arg, errors='raise', downcast=None):
132132
values = lib.maybe_convert_numeric(values, set(),
133133
coerce_numeric=coerce_numeric)
134134

135-
except Exception:
135+
except Exception as e:
136136
if errors == 'raise':
137137
raise
138138

pandas/tests/tools/test_numeric.py

+17
Original file line numberDiff line numberDiff line change
@@ -381,3 +381,20 @@ def test_downcast_limits(self):
381381
for dtype, downcast, min_max in dtype_downcast_min_max:
382382
series = pd.to_numeric(pd.Series(min_max), downcast=downcast)
383383
assert series.dtype == dtype
384+
385+
def test_coerce_uint64_conflict(self):
386+
# see gh-17007 and gh-17125
387+
#
388+
# Still returns float despite the uint64-nan conflict,
389+
# which would normally force the casting to object.
390+
df = pd.DataFrame({"a": [200, 300, "", "NaN", 30000000000000000000]})
391+
expected = pd.Series([200, 300, np.nan, np.nan,
392+
30000000000000000000], dtype=float, name="a")
393+
result = to_numeric(df["a"], errors="coerce")
394+
tm.assert_series_equal(expected, result)
395+
396+
s = pd.Series(["12345678901234567890", "1234567890", "ITEM"])
397+
expected = pd.Series([12345678901234567890,
398+
1234567890, np.nan], dtype=float)
399+
result = to_numeric(s, errors="coerce")
400+
tm.assert_series_equal(expected, result)

0 commit comments

Comments
 (0)