pandas-dev · mroeschke · Aug 18, 2022 · Jul 25, 2022 · Jul 26, 2022 · Jul 29, 2022
diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst
@@ -19,19 +19,20 @@ objects contained with a :class:`Index`, :class:`Series`, or
 For some data types, pandas extends NumPy's type system. String aliases for these types
 can be found at :ref:`basics.dtypes`.
 
-=================== ========================= ================== =============================
-Kind of Data        pandas Data Type          Scalar             Array
-=================== ========================= ================== =============================
-TZ-aware datetime   :class:`DatetimeTZDtype`  :class:`Timestamp` :ref:`api.arrays.datetime`
-Timedeltas          (none)                    :class:`Timedelta` :ref:`api.arrays.timedelta`
-Period (time spans) :class:`PeriodDtype`      :class:`Period`    :ref:`api.arrays.period`
-Intervals           :class:`IntervalDtype`    :class:`Interval`  :ref:`api.arrays.interval`
-Nullable Integer    :class:`Int64Dtype`, ...  (none)             :ref:`api.arrays.integer_na`
-Categorical         :class:`CategoricalDtype` (none)             :ref:`api.arrays.categorical`
-Sparse              :class:`SparseDtype`      (none)             :ref:`api.arrays.sparse`
-Strings             :class:`StringDtype`      :class:`str`       :ref:`api.arrays.string`
-Boolean (with NA)   :class:`BooleanDtype`     :class:`bool`      :ref:`api.arrays.bool`
-=================== ========================= ================== =============================
+=================== ========================= ============================= =============================
+Kind of Data        pandas Data Type          Scalar                        Array
+=================== ========================= ============================= =============================
+TZ-aware datetime   :class:`DatetimeTZDtype`  :class:`Timestamp`            :ref:`api.arrays.datetime`
+Timedeltas          (none)                    :class:`Timedelta`            :ref:`api.arrays.timedelta`
+Period (time spans) :class:`PeriodDtype`      :class:`Period`               :ref:`api.arrays.period`
+Intervals           :class:`IntervalDtype`    :class:`Interval`             :ref:`api.arrays.interval`
+Nullable Integer    :class:`Int64Dtype`, ...  (none)                        :ref:`api.arrays.integer_na`
+Categorical         :class:`CategoricalDtype` (none)                        :ref:`api.arrays.categorical`
+Sparse              :class:`SparseDtype`      (none)                        :ref:`api.arrays.sparse`
+Strings             :class:`StringDtype`      :class:`str`                  :ref:`api.arrays.string`
+Boolean (with NA)   :class:`BooleanDtype`     :class:`bool`                 :ref:`api.arrays.bool`
+PyArrow             :class:`ArrowDtype`       Python Scalars or :class:`NA` :ref:`api.arrays.arrow`
+=================== ========================= ============================= =============================
 
 pandas and third-party libraries can extend NumPy's type system (see :ref:`extending.extension-types`).
 The top-level :meth:`array` method can be used to create a new array, which may be
@@ -42,6 +43,44 @@ stored in a :class:`Series`, :class:`Index`, or as a column in a :class:`DataFra
 
    array
 
+.. _api.arrays.arrow:
+
+PyArrow
+-------
+
+.. warning::
+
+    This feature is experimental, and the API can change in a future release without warning.
+
+The :class:`arrays.ArrowExtensionArray` is backed by a :external+pyarrow:py:class:`pyarrow.ChunkedArray` with a
+:external+pyarrow:py:class:`pyarrow.DataType` instead of a NumPy array and data type. The ``.dtype`` of a :class:`arrays.ArrowExtensionArray`
+is an :class:`ArrowDtype`.
+
+`Pyarrow <https://arrow.apache.org/docs/python/index.html>`__ provides similar array and `data type <https://arrow.apache.org/docs/python/api/datatypes.html>`__
+support as NumPy including first-class nullability support for all data types, immutability and more.
+
+.. note::
+
+    For string types (``pyarrow.string()``, ``string[pyarrow]``), PyArrow support is still facilitated
+    by :class:`arrays.ArrowStringArray` and ``StringDtype("pyarrow")``. See the :ref:`string section <api.arrays.string>`
+    below.
+
+While individual values in an :class:`arrays.ArrowExtensionArray` are stored as a PyArrow objects, scalars are **returned**
+as Python scalars corresponding to the data type, e.g. a PyArrow int64 will be returned as Python int, or :class:`NA` for missing
+values.
+
+.. autosummary::
+   :toctree: api/
+   :template: autosummary/class_without_autosummary.rst
+
+   arrays.ArrowExtensionArray
+
+.. autosummary::
+   :toctree: api/
+   :template: autosummary/class_without_autosummary.rst
+
+   ArrowDtype
+
 .. _api.arrays.datetime:
 
 Datetimes

diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
@@ -24,6 +24,40 @@ https://github.com/pandas-dev/pandas-stubs for more information.
 
 We thank VirtusLab and Microsoft for their initial, significant contributions to ``pandas-stubs``
 
+.. _whatsnew_150.enhancements.arrow:
+
+Native PyArrow-backed ExtensionArray
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+With `Pyarrow <https://arrow.apache.org/docs/python/index.html>`__ installed, users can now create pandas objects
+that are backed by a ``pyarrow.ChunkedArray`` and ``pyarrow.DataType``.
+
+The ``dtype`` argument can accept a string of a `pyarrow data type <https://arrow.apache.org/docs/python/api/datatypes.html>`__
+with ``pyarrow`` in brackets e.g. ``"int64[pyarrow]"`` or, for pyarrow data types that take parameters, a :class:`ArrowDtype`
+initialized with a ``pyarrow.DataType``.
+
+.. ipython:: python
+
+    import pyarrow as pa
+    ser_float = pd.Series([1.0, 2.0, None], dtype="float32[pyarrow]")
+    ser_float
+
+    list_of_int_type = pd.ArrowDtype(pa.list_(pa.int64()))
+    ser_list = pd.Series([[1, 2], [3, None]], dtype=list_of_int_type)
+    ser_list
+
+    ser_list.take([1, 0])
+    ser_float * 5
+    ser_float.mean()
+    ser_float.dropna()
+
+Most operations are supported and have been implemented using `pyarrow compute <https://arrow.apache.org/docs/python/api/compute.html>`__ functions.
+We recommend installing the latest version of PyArrow to access the most recently implemented compute functions.
+
+.. warning::
+
+    This feature is experimental, and the API can change in a future release without warning.
+
 .. _whatsnew_150.enhancements.dataframe_interchange:
 
 DataFrame interchange protocol implementation

diff --git a/pandas/arrays/__init__.py b/pandas/arrays/__init__.py
@@ -4,6 +4,7 @@
 See :ref:`extending.extension-types` for more.
 """
 from pandas.core.arrays import (
+    ArrowExtensionArray,
     ArrowStringArray,
     BooleanArray,
     Categorical,
@@ -19,6 +20,7 @@
 )
 
 __all__ = [
+    "ArrowExtensionArray",
     "ArrowStringArray",
     "BooleanArray",
     "Categorical",

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
@@ -159,8 +159,47 @@ def to_pyarrow_type(
 
 class ArrowExtensionArray(OpsMixin, ExtensionArray):
     """
-    Base class for ExtensionArray backed by Arrow ChunkedArray.
-    """
+    Pandas ExtensionArray backed by a PyArrow ChunkedArray.
+
+    .. warning::
+
+       ArrowExtensionArray is considered experimental. The implementation and
+       parts of the API may change without warning.
+
+    Parameters
+    ----------
+    values : pyarrow.Array or pyarrow.ChunkedArray
+
+    Attributes
+    ----------
+    None
+
+    Methods
+    -------
+    None
+
+    Returns
+    -------
+    ArrowExtensionArray
+
+    Notes
+    -----
+    Most methods are implemented using `pyarrow compute functions. <https://arrow.apache.org/docs/python/api/compute.html>`__
+    Some methods may either raise an exception or raise a ``PerformanceWarning`` if an
+    associated compute function is not available based on the installed version of PyArrow.
+
+    Please install the latest version of PyArrow to enable the best functionality and avoid
+    potential bugs in prior versions of PyArrow.
+
+    Examples
+    --------
+    Create an ArrowExtensionArray with :func:`pandas.array`:
+
+    >>> pd.array([1, 1, None], dtype="int64[pyarrow]")
+    <ArrowExtensionArray>
+    [1, 1, <NA>]
+    Length: 3, dtype: int64[pyarrow]
+    """  # noqa: E501 (http link too long)
 
     _data: pa.ChunkedArray
     _dtype: ArrowDtype

diff --git a/pandas/core/arrays/arrow/dtype.py b/pandas/core/arrays/arrow/dtype.py
@@ -20,9 +20,47 @@
 @register_extension_dtype
 class ArrowDtype(StorageExtensionDtype):
     """
-    Base class for dtypes for ArrowExtensionArray.
-    Modeled after BaseMaskedDtype
-    """
+    An ExtensionDtype for PyArrow data types.
+
+    .. warning::
+
+       ArrowDtype is considered experimental. The implementation and
+       parts of the API may change without warning.
+
+    While most ``dtype`` arguments can accept the "string"
+    constructor, e.g. ``"int64[pyarrow]"``, ArrowDtype is useful
+    if the data type contains parameters like ``pyarrow.timestamp``.
+
+    Parameters
+    ----------
+    pyarrow_dtype : pa.DataType
+        An instance of a `pyarrow.DataType <https://arrow.apache.org/docs/python/api/datatypes.html#factory-functions>`__.
+
+    Attributes
+    ----------
+    pyarrow_dtype
+
+    Methods
+    -------
+    None
+
+    Returns
+    -------
+    ArrowDtype
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> pd.ArrowDtype(pa.int64())
+    int64[pyarrow]
+
+    Types with parameters must be constructed with ArrowDtype.
+
+    >>> pd.ArrowDtype(pa.timestamp("s", tz="America/New_York"))
+    timestamp[s, tz=America/New_York][pyarrow]
+    >>> pd.ArrowDtype(pa.list_(pa.int64()))
+    list<item: int64>[pyarrow]
+    """  # noqa: E501
 
     _metadata = ("storage", "pyarrow_dtype")  # type: ignore[assignment]
 
@@ -37,6 +75,9 @@ def __init__(self, pyarrow_dtype: pa.DataType) -> None:
             )
         self.pyarrow_dtype = pyarrow_dtype
 
+    def __repr__(self) -> str:
+        return self.name
+
     @property
     def type(self):
         """

diff --git a/scripts/validate_rst_title_capitalization.py b/scripts/validate_rst_title_capitalization.py
@@ -150,6 +150,7 @@
     "LZMA",
     "Numba",
     "Timestamp",
+    "PyArrow",
 }
 
 CAP_EXCEPTIONS_DICT = {word.lower(): word for word in CAPITALIZATION_EXCEPTIONS}