BUG: Coerce to numeric despite uint64 conflict

gfyoung · gfyoung · commit 23dbbb18b5ba · 2017-10-08T20:05:48.000-07:00
Closes pandas-devgh-17007. Closes pandas-devgh-17125.
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
@@ -827,6 +827,7 @@ Conversion
 - Bug in ``Timestamp.replace`` when replacing ``tzinfo`` around DST changes (:issue:`15683`)
 - Bug in ``Timedelta`` construction and arithmetic that would not propagate the ``Overflow`` exception (:issue:`17367`)
 - Bug in :meth:`~DataFrame.astype` converting to object dtype when passed extension type classes (`DatetimeTZDtype``, ``CategoricalDtype``) rather than instances. Now a ``TypeError`` is raised when a class is passed (:issue:`17780`).
+- Bug in :meth:`to_numeric` in which elements were not always being coerced to numeric when ``errors='coerce'`` (:issue:`17007`, :issue:`17125`)
 
 Indexing
 ^^^^^^^^
diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx
@@ -165,20 +165,8 @@ cdef class Seen(object):
                      two conflict cases was also detected. However, we are
                      trying to force conversion to a numeric dtype.
         """
-        if self.uint_ and (self.null_ or self.sint_):
-            if not self.coerce_numeric:
-                return True
-
-            if self.null_:
-                msg = ("uint64 array detected, and such an "
-                       "array cannot contain NaN.")
-            else:  # self.sint_ = 1
-                msg = ("uint64 and negative values detected. "
-                       "Cannot safely return a numeric array "
-                       "without truncating data.")
-
-            raise ValueError(msg)
-        return False
+        return (self.uint_ and (self.null_ or self.sint_)
+                and not self.coerce_numeric)
 
     cdef inline saw_null(self):
         """
@@ -1103,7 +1091,10 @@ def maybe_convert_numeric(ndarray[object] values, set na_values,
             seen.saw_int(val)
 
             if val >= 0:
-                uints[i] = val
+                if val <= oUINT64_MAX:
+                    uints[i] = val
+                else:
+                    seen.float_ = True
 
             if val <= oINT64_MAX:
                 ints[i] = val
diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py
@@ -132,7 +132,7 @@ def to_numeric(arg, errors='raise', downcast=None):
             values = lib.maybe_convert_numeric(values, set(),
                                                coerce_numeric=coerce_numeric)
 
-    except Exception:
+    except Exception as e:
         if errors == 'raise':
             raise
 
diff --git a/pandas/tests/tools/test_numeric.py b/pandas/tests/tools/test_numeric.py
@@ -381,3 +381,20 @@ def test_downcast_limits(self):
         for dtype, downcast, min_max in dtype_downcast_min_max:
             series = pd.to_numeric(pd.Series(min_max), downcast=downcast)
             assert series.dtype == dtype
+
+    def test_coerce_uint64_conflict(self):
+        # see gh-17007 and gh-17125
+        #
+        # Still returns float despite the uint64-nan conflict,
+        # which would normally force the casting to object.
+        df = pd.DataFrame({"a": [200, 300, "", "NaN", 30000000000000000000]})
+        expected = pd.Series([200, 300, np.nan, np.nan,
+                              30000000000000000000], dtype=float, name="a")
+        result = to_numeric(df["a"], errors="coerce")
+        tm.assert_series_equal(expected, result)
+
+        s = pd.Series(["12345678901234567890", "1234567890", "ITEM"])
+        expected = pd.Series([12345678901234567890,
+                              1234567890, np.nan], dtype=float)
+        result = to_numeric(s, errors="coerce")
+        tm.assert_series_equal(expected, result)