pandas-dev
diff --git a/‎.coveragerc
+2 b/‎.coveragerc
+2
diff --git a/‎asv_bench/benchmarks/groupby.py
+1-1 b/‎asv_bench/benchmarks/groupby.py
+1-1
diff --git a/‎asv_bench/benchmarks/reshape.py
+18 b/‎asv_bench/benchmarks/reshape.py
+18
diff --git a/‎doc/source/whatsnew/v0.24.0.txt
+60-1 b/‎doc/source/whatsnew/v0.24.0.txt
+60-1
diff --git a/‎pandas/_libs/internals.pyx
+1-1 b/‎pandas/_libs/internals.pyx
+1-1
diff --git a/‎pandas/_libs/tslibs/period.pyx
+2-2 b/‎pandas/_libs/tslibs/period.pyx
+2-2
diff --git a/‎pandas/_libs/src/util.pxd renamed to ‎pandas/_libs/tslibs/util.pxd
+3-3 b/‎pandas/_libs/src/util.pxd renamed to ‎pandas/_libs/tslibs/util.pxd
+3-3
diff --git a/‎pandas/_libs/util.pxd
+1 b/‎pandas/_libs/util.pxd
+1
diff --git a/‎pandas/_libs/window.pyx
+1-1 b/‎pandas/_libs/window.pyx
+1-1
diff --git a/‎pandas/core/arrays/__init__.py
+3 b/‎pandas/core/arrays/__init__.py
+3
diff --git a/‎pandas/core/arrays/base.py
+10-6 b/‎pandas/core/arrays/base.py
+10-6
diff --git a/‎pandas/core/arrays/categorical.py
+2-2 b/‎pandas/core/arrays/categorical.py
+2-2
@@ -2,6 +2,7 @@
 [run]
 branch = False
 omit = */tests/*
+plugins = Cython.Coverage
 
 [report]
 # Regexes for lines to exclude from consideration
@@ -22,6 +23,7 @@ exclude_lines =
     if __name__ == .__main__.:
 
 ignore_errors = False
+show_missing = True
 
 [html]
 directory = coverage_html_report
@@ -142,7 +142,7 @@ def time_frame_nth(self, dtype):
     def time_series_nth_any(self, dtype):
         self.df['values'].groupby(self.df['key']).nth(0, dropna='any')
 
-    def time_groupby_nth_all(self, dtype):
+    def time_series_nth_all(self, dtype):
         self.df['values'].groupby(self.df['key']).nth(0, dropna='all')
 
     def time_series_nth(self, dtype):
 
@@ -1,7 +1,9 @@
+import string
 from itertools import product
 
 import numpy as np
 from pandas import DataFrame, MultiIndex, date_range, melt, wide_to_long
+import pandas as pd
 
 from .pandas_vb_common import setup  # noqa
 
@@ -132,3 +134,19 @@ def setup(self):
 
     def time_pivot_table(self):
         self.df.pivot_table(index='key1', columns=['key2', 'key3'])
+
+
+class GetDummies(object):
+    goal_time = 0.2
+
+    def setup(self):
+        categories = list(string.ascii_letters[:12])
+        s = pd.Series(np.random.choice(categories, size=1_000_000),
+                      dtype=pd.api.types.CategoricalDtype(categories))
+        self.s = s
+
+    def time_get_dummies_1d(self):
+        pd.get_dummies(self.s, sparse=False)
+
+    def time_get_dummies_1d_sparse(self):
+        pd.get_dummies(self.s, sparse=True)
@@ -13,6 +13,7 @@ v0.24.0 (Month XX, 2018)
 New features
 ~~~~~~~~~~~~
 
+
 - ``ExcelWriter`` now accepts ``mode`` as a keyword argument, enabling append to existing workbooks when using the ``openpyxl`` engine (:issue:`3441`)
 
 .. _whatsnew_0240.enhancements.extension_array_operators:
@@ -31,6 +32,62 @@ See the :ref:`ExtensionArray Operator Support
 <extending.extension.operator>` documentation section for details on both
 ways of adding operator support.
 
+.. _whatsnew_0240.enhancements.intna:
+
+Optional Integer NA Support
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Pandas has gained the ability to hold integer dtypes with missing values. This long requested feature is enabled through the use of :ref:`extension types <extending.extension-types>`.
+Here is an example of the usage.
+
+We can construct a ``Series`` with the specified dtype. The dtype string ``Int64`` is a pandas ``ExtensionDtype``. Specifying a list or array using the traditional missing value
+marker of ``np.nan`` will infer to integer dtype. The display of the ``Series`` will also use the ``NaN`` to indicate missing values in string outputs. (:issue:`20700`, :issue:`20747`)
+
+.. ipython:: python
+
+   s = pd.Series([1, 2, np.nan], dtype='Int64')
+   s
+
+
+Operations on these dtypes will propagate ``NaN`` as other pandas operations.
+
+.. ipython:: python
+
+   # arithmetic
+   s + 1
+
+   # comparison
+   s == 1
+
+   # indexing
+   s.iloc[1:3]
+
+   # operate with other dtypes
+   s + s.iloc[1:3].astype('Int8')
+
+   # coerce when needed
+   s + 0.01
+
+These dtypes can operate as part of of ``DataFrame``.
+
+.. ipython:: python
+
+   df = pd.DataFrame({'A': s, 'B': [1, 1, 3], 'C': list('aab')})
+   df
+   df.dtypes
+
+
+These dtypes can be merged & reshaped & casted.
+
+.. ipython:: python
+
+   pd.concat([df[['A']], df[['B', 'C']]], axis=1).dtypes
+   df['A'].astype(float)
+
+.. warning::
+
+   The Integer NA support currently uses the captilized dtype version, e.g. ``Int8`` as compared to the traditional ``int8``. This may be changed at a future date.
+
 .. _whatsnew_0240.enhancements.read_html:
 
 ``read_html`` Enhancements
@@ -258,6 +315,7 @@ Previous Behavior:
 ExtensionType Changes
 ^^^^^^^^^^^^^^^^^^^^^
 
+- ``ExtensionArray`` has gained the abstract methods ``.dropna()`` (:issue:`21185`)
 - ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instantiate a registered ``DecimalDtype``; furthermore
   the ``ExtensionDtype`` has gained the method ``construct_array_type`` (:issue:`21185`)
 - The ``ExtensionArray`` constructor, ``_from_sequence`` now take the keyword arg ``copy=False`` (:issue:`21185`)
@@ -312,6 +370,7 @@ Other API Changes
 - Invalid construction of ``IntervalDtype`` will now always raise a ``TypeError`` rather than a ``ValueError`` if the subdtype is invalid (:issue:`21185`)
 - Trying to reindex a ``DataFrame`` with a non unique ``MultiIndex`` now raises a ``ValueError`` instead of an ``Exception`` (:issue:`21770`)
 - :meth:`PeriodIndex.tz_convert` and :meth:`PeriodIndex.tz_localize` have been removed (:issue:`21781`)
+- :class:`Index` subtraction will attempt to operate element-wise instead of raising ``TypeError`` (:issue:`19369`)
 - :class:`pandas.io.formats.style.Styler` supports a ``number-format`` property when using :meth:`~pandas.io.formats.style.Styler.to_excel`
 
 .. _whatsnew_0240.deprecations:
@@ -351,7 +410,7 @@ Performance Improvements
 - Improved performance of :meth:`HDFStore.groups` (and dependent functions like
   :meth:`~HDFStore.keys`.  (i.e. ``x in store`` checks are much faster)
   (:issue:`21372`)
--
+- Improved the performance of :func:`pandas.get_dummies` with ``sparse=True`` (:issue:`21997`)
 
 .. _whatsnew_0240.docs:
 
 
@@ -390,7 +390,7 @@ def get_blkno_indexers(int64_t[:] blknos, bint group=True):
     start = 0
     cur_blkno = blknos[start]
 
-    if group == False:
+    if group is False:
         for i in range(1, n):
             if blknos[i] != cur_blkno:
                 yield cur_blkno, slice(start, i)
 
@@ -1655,8 +1655,8 @@ cdef class _Period(object):
         return value
 
     def __setstate__(self, state):
-        self.freq=state[1]
-        self.ordinal=state[2]
+        self.freq = state[1]
+        self.ordinal = state[2]
 
     def __reduce__(self):
         object_state = None, self.freq, self.ordinal
 
@@ -1,4 +1,4 @@
-from numpy cimport ndarray, NPY_C_CONTIGUOUS, NPY_F_CONTIGUOUS
+from numpy cimport ndarray
 cimport numpy as cnp
 cnp.import_array()
 
@@ -64,7 +64,7 @@ cdef inline bint is_datetime64_object(object obj) nogil:
 
 # --------------------------------------------------------------------
 
-cdef extern from "numpy_helper.h":
+cdef extern from "../src/numpy_helper.h":
     void set_array_not_contiguous(ndarray ao)
 
     int assign_value_1d(ndarray, Py_ssize_t, object) except -1
@@ -87,7 +87,7 @@ ctypedef fused numeric:
     cnp.float32_t
     cnp.float64_t
 
-cdef extern from "headers/stdint.h":
+cdef extern from "../src/headers/stdint.h":
     enum: UINT8_MAX
     enum: UINT16_MAX
     enum: UINT32_MAX
 
@@ -0,0 +1 @@
+from tslibs.util cimport *
@@ -13,7 +13,7 @@ from numpy cimport ndarray, double_t, int64_t, float64_t
 cnp.import_array()
 
 
-cdef extern from "../src/headers/cmath" namespace "std":
+cdef extern from "src/headers/cmath" namespace "std":
     int signbit(double) nogil
     double sqrt(double x) nogil
 
 
@@ -1,7 +1,10 @@
 from .base import (ExtensionArray,    # noqa
+                   ExtensionOpsMixin,
                    ExtensionScalarOpsMixin)
 from .categorical import Categorical  # noqa
 from .datetimes import DatetimeArrayMixin  # noqa
 from .interval import IntervalArray  # noqa
 from .period import PeriodArrayMixin  # noqa
 from .timedeltas import TimedeltaArrayMixin  # noqa
+from .integer import (  # noqa
+    IntegerArray, to_integer_array)
@@ -12,8 +12,8 @@
 from pandas.errors import AbstractMethodError
 from pandas.compat.numpy import function as nv
 from pandas.compat import set_function_name, PY3
-from pandas.core.dtypes.common import is_list_like
 from pandas.core import ops
+from pandas.core.dtypes.common import is_list_like
 
 _not_implemented_message = "{} does not implement {}."
 
@@ -88,16 +88,19 @@ class ExtensionArray(object):
     # Constructors
     # ------------------------------------------------------------------------
     @classmethod
-    def _from_sequence(cls, scalars, copy=False):
+    def _from_sequence(cls, scalars, dtype=None, copy=False):
         """Construct a new ExtensionArray from a sequence of scalars.
 
         Parameters
         ----------
         scalars : Sequence
             Each element will be an instance of the scalar type for this
             array, ``cls.dtype.type``.
+        dtype : dtype, optional
+            Construct for this particular dtype. This should be a Dtype
+            compatible with the ExtensionArray.
         copy : boolean, default False
-            if True, copy the underlying data
+            If True, copy the underlying data.
         Returns
         -------
         ExtensionArray
@@ -378,7 +381,7 @@ def fillna(self, value=None, method=None, limit=None):
                 func = pad_1d if method == 'pad' else backfill_1d
                 new_values = func(self.astype(object), limit=limit,
                                   mask=mask)
-                new_values = self._from_sequence(new_values)
+                new_values = self._from_sequence(new_values, dtype=self.dtype)
             else:
                 # fill with value
                 new_values = self.copy()
@@ -407,7 +410,7 @@ def unique(self):
         from pandas import unique
 
         uniques = unique(self.astype(object))
-        return self._from_sequence(uniques)
+        return self._from_sequence(uniques, dtype=self.dtype)
 
     def _values_for_factorize(self):
         # type: () -> Tuple[ndarray, Any]
@@ -559,7 +562,7 @@ def take(self, indices, allow_fill=False, fill_value=None):
 
                result = take(data, indices, fill_value=fill_value,
                              allow_fill=allow_fill)
-               return self._from_sequence(result)
+               return self._from_sequence(result, dtype=self.dtype)
         """
         # Implementer note: The `fill_value` parameter should be a user-facing
         # value, an instance of self.dtype.type. When passed `fill_value=None`,
@@ -634,6 +637,7 @@ class ExtensionOpsMixin(object):
     """
     A base class for linking the operators to their dunder names
     """
+
     @classmethod
     def _add_arithmetic_ops(cls):
         cls.__add__ = cls._create_arithmetic_method(operator.add)
 
@@ -488,8 +488,8 @@ def _constructor(self):
         return Categorical
 
     @classmethod
-    def _from_sequence(cls, scalars):
-        return Categorical(scalars)
+    def _from_sequence(cls, scalars, dtype=None, copy=False):
+        return Categorical(scalars, dtype=dtype)
 
     def copy(self):
         """ Copy constructor. """