From 9edf9b3243dd6d343be03789e06126b247c14cf1 Mon Sep 17 00:00:00 2001
From: Ryan Grout <ryan@ryangrout.org>
Date: Wed, 11 Dec 2019 00:26:29 -0600
Subject: [PATCH 1/9] Move skipna check to after type casting of values.

Fixes performance regression introduced in
aaaac86ee019675119cb0ae9c3fb7a2b7eef9959
---
 pandas/_libs/lib.pyx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index a6b02e016823c..305d1d543f7ac 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -1259,9 +1259,6 @@ def infer_dtype(value: object, skipna: bool = True) -> str:
     # make contiguous
     values = values.ravel()
 
-    if skipna:
-        values = values[~isnaobj(values)]
-
     val = _try_infer_map(values)
     if val is not None:
         return val
@@ -1269,6 +1266,9 @@ def infer_dtype(value: object, skipna: bool = True) -> str:
     if values.dtype != np.object_:
         values = values.astype('O')
 
+    if skipna:
+        values = values[~isnaobj(values)]
+
     n = len(values)
     if n == 0:
         return 'empty'

From 70c8bed5aa49ae12d737b7761e99515bd0d496a4 Mon Sep 17 00:00:00 2001
From: Ryan Grout <ryan@ryangrout.org>
Date: Wed, 11 Dec 2019 01:10:03 -0600
Subject: [PATCH 2/9] Add entry to whatsnew.

---
 doc/source/whatsnew/v1.0.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 3e72072eae303..a6f10c5aa7b37 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -645,6 +645,7 @@ Performance improvements
 - Performance improvement when checking if values in a :class:`Categorical` are equal, equal or larger or larger than a given scalar.
   The improvement is not present if checking if the :class:`Categorical` is less than or less than or equal than the scalar (:issue:`29820`)
 - Performance improvement in :meth:`Index.equals` and  :meth:`MultiIndex.equals` (:issue:`29134`)
+- Performance improvement in :func:`infer_dtype` when ``skipna`` is ``True`` (:issue:`28814`)
 
 .. _whatsnew_1000.bug_fixes:
 

From ea579f7e5069fb849b69e73f4246509b92d9a0e0 Mon Sep 17 00:00:00 2001
From: Ryan Grout <groutr@users.noreply.github.com>
Date: Wed, 11 Dec 2019 17:18:43 +0000
Subject: [PATCH 3/9] Update doc/source/whatsnew/v1.0.0.rst

Co-Authored-By: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 doc/source/whatsnew/v1.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index a6f10c5aa7b37..d1e22293487e7 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -645,7 +645,7 @@ Performance improvements
 - Performance improvement when checking if values in a :class:`Categorical` are equal, equal or larger or larger than a given scalar.
   The improvement is not present if checking if the :class:`Categorical` is less than or less than or equal than the scalar (:issue:`29820`)
 - Performance improvement in :meth:`Index.equals` and  :meth:`MultiIndex.equals` (:issue:`29134`)
-- Performance improvement in :func:`infer_dtype` when ``skipna`` is ``True`` (:issue:`28814`)
+- Performance improvement in :func:`~pandas.api.types.infer_dtype` when ``skipna`` is ``True`` (:issue:`28814`)
 
 .. _whatsnew_1000.bug_fixes:
 

From df558643a322f8aba667a59fe0ff0654c90f8776 Mon Sep 17 00:00:00 2001
From: Ryan Grout <ryan@ryangrout.org>
Date: Wed, 11 Dec 2019 14:26:50 -0600
Subject: [PATCH 4/9] Add asv tests for skipna.

---
 asv_bench/benchmarks/dtypes.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/asv_bench/benchmarks/dtypes.py b/asv_bench/benchmarks/dtypes.py
index 24cc1c6f9fa70..a3a8b1bab7499 100644
--- a/asv_bench/benchmarks/dtypes.py
+++ b/asv_bench/benchmarks/dtypes.py
@@ -7,6 +7,7 @@
     extension_dtypes,
     numeric_dtypes,
     string_dtypes,
+    lib
 )
 
 _numpy_dtypes = [
@@ -39,5 +40,26 @@ def time_pandas_dtype_invalid(self, dtype):
         except TypeError:
             pass
 
+class InferDtypes:
+    params = _dtypes
+    param_names = ['dtype']
+    data_dict = {
+        "np-object": np.array([1] * 1000, dtype='O'),
+        "py-object": [1] * 1000,
+        "np-null": np.array([1] * 500 + [np.nan] * 500),
+        "py-null": [1] * 500 + [None] * 500,
+        "np-int": np.array([1] * 1000, dtype=int),
+        "np-floating": np.array([1.0] * 1000, dtype=float),
+        "empty": [],
+        "bytes": [b'a'] * 1000,
+    }
+    params = list(data_dict.keys())
+
+    def time_infer_skipna(self, dtype):
+        lib.infer_dtype(dtype, skipna=True)
+
+    def time_infer(self, dtype):
+        lib.infer_dtype(dtype, skipna=False)
+
 
 from .pandas_vb_common import setup  # noqa: F401 isort:skip

From 94a968c16b0a83e9d5565a2dd86941d67677b0e9 Mon Sep 17 00:00:00 2001
From: Ryan Grout <ryan@ryangrout.org>
Date: Wed, 11 Dec 2019 14:28:57 -0600
Subject: [PATCH 5/9] make pep8 happy.

---
 asv_bench/benchmarks/dtypes.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/asv_bench/benchmarks/dtypes.py b/asv_bench/benchmarks/dtypes.py
index a3a8b1bab7499..4bbb6b47f84b5 100644
--- a/asv_bench/benchmarks/dtypes.py
+++ b/asv_bench/benchmarks/dtypes.py
@@ -40,6 +40,7 @@ def time_pandas_dtype_invalid(self, dtype):
         except TypeError:
             pass
 
+
 class InferDtypes:
     params = _dtypes
     param_names = ['dtype']

From 3ba2229ccad0926a34f883972b86840868407183 Mon Sep 17 00:00:00 2001
From: Ryan Grout <ryan@ryangrout.org>
Date: Thu, 12 Dec 2019 09:19:47 -0600
Subject: [PATCH 6/9] make black happy.

---
 asv_bench/benchmarks/dtypes.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/asv_bench/benchmarks/dtypes.py b/asv_bench/benchmarks/dtypes.py
index 4bbb6b47f84b5..254bd43c740cd 100644
--- a/asv_bench/benchmarks/dtypes.py
+++ b/asv_bench/benchmarks/dtypes.py
@@ -7,7 +7,7 @@
     extension_dtypes,
     numeric_dtypes,
     string_dtypes,
-    lib
+    lib,
 )
 
 _numpy_dtypes = [
@@ -43,16 +43,16 @@ def time_pandas_dtype_invalid(self, dtype):
 
 class InferDtypes:
     params = _dtypes
-    param_names = ['dtype']
+    param_names = ["dtype"]
     data_dict = {
-        "np-object": np.array([1] * 1000, dtype='O'),
+        "np-object": np.array([1] * 1000, dtype="O"),
         "py-object": [1] * 1000,
         "np-null": np.array([1] * 500 + [np.nan] * 500),
         "py-null": [1] * 500 + [None] * 500,
         "np-int": np.array([1] * 1000, dtype=int),
         "np-floating": np.array([1.0] * 1000, dtype=float),
         "empty": [],
-        "bytes": [b'a'] * 1000,
+        "bytes": [b"a"] * 1000,
     }
     params = list(data_dict.keys())
 

From f97fb1fb2bdd7afe54f20d0e96f5b1589604282a Mon Sep 17 00:00:00 2001
From: Ryan Grout <ryan@ryangrout.org>
Date: Thu, 12 Dec 2019 21:22:21 -0600
Subject: [PATCH 7/9] make isort happy.

---
 asv_bench/benchmarks/dtypes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/asv_bench/benchmarks/dtypes.py b/asv_bench/benchmarks/dtypes.py
index 254bd43c740cd..2c6d06dbf8f2a 100644
--- a/asv_bench/benchmarks/dtypes.py
+++ b/asv_bench/benchmarks/dtypes.py
@@ -5,9 +5,9 @@
 from .pandas_vb_common import (
     datetime_dtypes,
     extension_dtypes,
+    lib,
     numeric_dtypes,
     string_dtypes,
-    lib,
 )
 
 _numpy_dtypes = [

From a5efb8f7c51e66b9e5cfadaefaefa217d6d98b9b Mon Sep 17 00:00:00 2001
From: Ryan Grout <ryan@ryangrout.org>
Date: Mon, 16 Dec 2019 10:53:35 -0600
Subject: [PATCH 8/9] Make dtype inference test cases larger.

---
 asv_bench/benchmarks/dtypes.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/asv_bench/benchmarks/dtypes.py b/asv_bench/benchmarks/dtypes.py
index 2c6d06dbf8f2a..1b0c8dc487c9c 100644
--- a/asv_bench/benchmarks/dtypes.py
+++ b/asv_bench/benchmarks/dtypes.py
@@ -45,14 +45,14 @@ class InferDtypes:
     params = _dtypes
     param_names = ["dtype"]
     data_dict = {
-        "np-object": np.array([1] * 1000, dtype="O"),
-        "py-object": [1] * 1000,
-        "np-null": np.array([1] * 500 + [np.nan] * 500),
-        "py-null": [1] * 500 + [None] * 500,
-        "np-int": np.array([1] * 1000, dtype=int),
-        "np-floating": np.array([1.0] * 1000, dtype=float),
+        "np-object": np.array([1] * 100000, dtype="O"),
+        "py-object": [1] * 100000,
+        "np-null": np.array([1] * 50000 + [np.nan] * 50000),
+        "py-null": [1] * 50000 + [None] * 50000,
+        "np-int": np.array([1] * 100000, dtype=int),
+        "np-floating": np.array([1.0] * 100000, dtype=float),
         "empty": [],
-        "bytes": [b"a"] * 1000,
+        "bytes": [b"a"] * 100000,
     }
     params = list(data_dict.keys())
 

From 8d7ec5deb92ff045fc1935f75f98494b4216d41e Mon Sep 17 00:00:00 2001
From: Ryan Grout <ryan@ryangrout.org>
Date: Mon, 16 Dec 2019 18:15:54 -0600
Subject: [PATCH 9/9] Fix benchmark params.

---
 asv_bench/benchmarks/dtypes.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/asv_bench/benchmarks/dtypes.py b/asv_bench/benchmarks/dtypes.py
index 1b0c8dc487c9c..bd17b710b108d 100644
--- a/asv_bench/benchmarks/dtypes.py
+++ b/asv_bench/benchmarks/dtypes.py
@@ -42,7 +42,6 @@ def time_pandas_dtype_invalid(self, dtype):
 
 
 class InferDtypes:
-    params = _dtypes
     param_names = ["dtype"]
     data_dict = {
         "np-object": np.array([1] * 100000, dtype="O"),
@@ -57,10 +56,10 @@ class InferDtypes:
     params = list(data_dict.keys())
 
     def time_infer_skipna(self, dtype):
-        lib.infer_dtype(dtype, skipna=True)
+        lib.infer_dtype(self.data_dict[dtype], skipna=True)
 
     def time_infer(self, dtype):
-        lib.infer_dtype(dtype, skipna=False)
+        lib.infer_dtype(self.data_dict[dtype], skipna=False)
 
 
 from .pandas_vb_common import setup  # noqa: F401 isort:skip