From c2bb2aafa3fd1f3320ad76737c7d731b2f02bab9 Mon Sep 17 00:00:00 2001
From: Joseph Kleinhenz <kleinhej@gene.com>
Date: Thu, 29 Aug 2024 10:26:19 -0700
Subject: [PATCH 01/15] expose to_pandas_kwargs in pyarrow engine

---
 pandas/io/parquet.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index 24415299e799b..2b592736fcb9f 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -245,11 +245,12 @@ def read(
         dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
         storage_options: StorageOptions | None = None,
         filesystem=None,
+        to_pandas_kwargs=None,
         **kwargs,
     ) -> DataFrame:
         kwargs["use_pandas_metadata"] = True
 
-        to_pandas_kwargs = {}
+        to_pandas_kwargs = to_pandas_kwargs if to_pandas_kwargs is not None else {}
         if dtype_backend == "numpy_nullable":
             from pandas.io._util import _arrow_dtype_mapping
 

From 2a36913a60ea4eb399bcc121e0d612d0f4a4e4f5 Mon Sep 17 00:00:00 2001
From: Joseph Kleinhenz <kleinhej@gene.com>
Date: Thu, 29 Aug 2024 15:39:39 -0700
Subject: [PATCH 02/15] add test for roundtripping maps

---
 pandas/tests/io/test_parquet.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index a29e479b7c9f1..988f8a245358d 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -1173,6 +1173,19 @@ def test_non_nanosecond_timestamps(self, temp_file):
         )
         tm.assert_frame_equal(result, expected)
 
+    def test_maps_as_pydicts(self, pa):
+        import pyarrow
+
+        schema = pyarrow.schema(
+            [("foo", pyarrow.map_(pyarrow.string(), pyarrow.int64()))]
+        )
+        df = pd.DataFrame([{"foo": {"A": 1}}, {"foo": {"B": 2}}])
+        check_round_trip(
+            df,
+            pa,
+            write_kwargs={"schema": schema},
+        )
+
 
 class TestParquetFastParquet(Base):
     @pytest.mark.xfail(reason="datetime_with_nat gets incorrect values")

From 0d6cd16a7d96b438fb3780d6e056db535dd3878e Mon Sep 17 00:00:00 2001
From: Joseph Kleinhenz <kleinhej@gene.com>
Date: Thu, 29 Aug 2024 15:42:51 -0700
Subject: [PATCH 03/15] make test pass by using maps_as_pydicts

---
 pandas/tests/io/test_parquet.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 988f8a245358d..fc73d51799809 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -1184,6 +1184,7 @@ def test_maps_as_pydicts(self, pa):
             df,
             pa,
             write_kwargs={"schema": schema},
+            read_kwargs={"to_pandas_kwargs": {"maps_as_pydicts": "strict"}},
         )
 
 

From 908614d0eabe5f22cb2442b8bc13d4d7eccc796b Mon Sep 17 00:00:00 2001
From: Joseph Kleinhenz <kleinhej@gene.com>
Date: Thu, 29 Aug 2024 16:54:31 -0700
Subject: [PATCH 04/15] remove unused type ignore

---
 pandas/io/parquet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index 2b592736fcb9f..66a958a835fdd 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -257,7 +257,7 @@ def read(
             mapping = _arrow_dtype_mapping()
             to_pandas_kwargs["types_mapper"] = mapping.get
         elif dtype_backend == "pyarrow":
-            to_pandas_kwargs["types_mapper"] = pd.ArrowDtype  # type: ignore[assignment]
+            to_pandas_kwargs["types_mapper"] = pd.ArrowDtype
         elif using_string_dtype():
             to_pandas_kwargs["types_mapper"] = arrow_string_types_mapper()
 

From 29675cdf4fd80322ed2285563c56d2b19fa613c0 Mon Sep 17 00:00:00 2001
From: Joseph Kleinhenz <kleinhej@gene.com>
Date: Thu, 29 Aug 2024 17:41:01 -0700
Subject: [PATCH 05/15] skip test if pyarrow is too old

---
 pandas/tests/io/test_parquet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index fc73d51799809..cb88e8a08e539 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -1174,7 +1174,7 @@ def test_non_nanosecond_timestamps(self, temp_file):
         tm.assert_frame_equal(result, expected)
 
     def test_maps_as_pydicts(self, pa):
-        import pyarrow
+        pyarrow = pytest.importorskip("pyarrow", "13.0.0")
 
         schema = pyarrow.schema(
             [("foo", pyarrow.map_(pyarrow.string(), pyarrow.int64()))]

From 6d7accf975145c3fd33dfb121a86fb4fa6625a73 Mon Sep 17 00:00:00 2001
From: Joseph Kleinhenz <kleinhej@gene.com>
Date: Fri, 30 Aug 2024 16:08:04 -0700
Subject: [PATCH 06/15] update whatsnew

---
 doc/source/whatsnew/v2.3.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst
index 528226502da33..3f5527c5dcc8b 100644
--- a/doc/source/whatsnew/v2.3.0.rst
+++ b/doc/source/whatsnew/v2.3.0.rst
@@ -127,7 +127,7 @@ MultiIndex
 
 I/O
 ^^^
--
+- ``pyarrow`` engine for :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas`. This enables passing in ``maps_as_pydicts`` to read parquet map datatypes as python dictionaries. (:issue:`56842`)
 -
 
 Period

From 61dd637a56a7164a50378be9f9763509ec6970a8 Mon Sep 17 00:00:00 2001
From: Joseph Kleinhenz <kleinhenz.joseph@gmail.com>
Date: Wed, 18 Sep 2024 15:19:34 -0700
Subject: [PATCH 07/15] Apply suggestions from code review

Co-authored-by: Xiao Yuan <yuanx749@gmail.com>
---
 pandas/io/parquet.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index 66a958a835fdd..b4c45cb674b87 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -245,12 +245,12 @@ def read(
         dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
         storage_options: StorageOptions | None = None,
         filesystem=None,
-        to_pandas_kwargs=None,
+        to_pandas_kwargs: dict[str, Any] | None = None,
         **kwargs,
     ) -> DataFrame:
         kwargs["use_pandas_metadata"] = True
 
-        to_pandas_kwargs = to_pandas_kwargs if to_pandas_kwargs is not None else {}
+        to_pandas_kwargs = {} if to_pandas_kwargs is None else to_pandas_kwargs
         if dtype_backend == "numpy_nullable":
             from pandas.io._util import _arrow_dtype_mapping
 

From 67ecfb5f02c629c25034a465783c63c56ef2e237 Mon Sep 17 00:00:00 2001
From: Joseph Kleinhenz <kleinhej@gene.com>
Date: Mon, 18 Nov 2024 15:43:43 -0800
Subject: [PATCH 08/15] move to v3.0.0 whatsnew

---
 doc/source/whatsnew/v2.3.0.rst | 2 +-
 doc/source/whatsnew/v3.0.0.rst | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst
index 3f5527c5dcc8b..528226502da33 100644
--- a/doc/source/whatsnew/v2.3.0.rst
+++ b/doc/source/whatsnew/v2.3.0.rst
@@ -127,7 +127,7 @@ MultiIndex
 
 I/O
 ^^^
-- ``pyarrow`` engine for :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas`. This enables passing in ``maps_as_pydicts`` to read parquet map datatypes as python dictionaries. (:issue:`56842`)
+-
 -
 
 Period
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index da0d85b7bb529..74aadf6ece517 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -628,6 +628,7 @@ I/O
 - Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
 - Bug in :meth:`read_stata` where extreme value integers were incorrectly interpreted as missing for format versions 111 and prior (:issue:`58130`)
 - Bug in :meth:`read_stata` where the missing code for double was not recognised for format versions 105 and prior (:issue:`58149`)
+- ``pyarrow`` engine for :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas`. This enables passing in ``maps_as_pydicts`` to read parquet map datatypes as python dictionaries. (:issue:`56842`)
 
 Period
 ^^^^^^

From 937b29f42f0ebe828fb326d689f8ee90db01ad47 Mon Sep 17 00:00:00 2001
From: Joseph Kleinhenz <kleinhej@gene.com>
Date: Mon, 18 Nov 2024 15:51:26 -0800
Subject: [PATCH 09/15] add to docstring

---
 pandas/io/parquet.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index b4c45cb674b87..03f756e13e3f0 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -467,7 +467,9 @@ def to_parquet(
         .. versionadded:: 2.1.0
 
     kwargs
-        Additional keyword arguments passed to the engine
+        Additional keyword arguments passed to the engine.
+        When using the ``'pyarrow'`` engine ``to_pandas_kwargs`` can be used to pass
+        through arguments to ``pyarrow.Table.to_pandas``.
 
     Returns
     -------

From 15ed566fdd0a2d4816e2d20fba2b922123e5baf5 Mon Sep 17 00:00:00 2001
From: Joseph Kleinhenz <kleinhej@gene.com>
Date: Tue, 19 Nov 2024 08:59:49 -0800
Subject: [PATCH 10/15] try to make mypy happy

---
 pandas/io/_util.py   | 6 ++++--
 pandas/io/parquet.py | 5 +++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/pandas/io/_util.py b/pandas/io/_util.py
index 748205c088acf..9778a404e23e0 100644
--- a/pandas/io/_util.py
+++ b/pandas/io/_util.py
@@ -60,10 +60,12 @@ def arrow_table_to_pandas(
     table: pyarrow.Table,
     dtype_backend: DtypeBackend | Literal["numpy"] | lib.NoDefault = lib.no_default,
     null_to_int64: bool = False,
-    **kwargs,
+    to_pandas_kwargs: dict | None = None,
 ) -> pd.DataFrame:
     pa = import_optional_dependency("pyarrow")
 
+    to_pandas_kwargs = {} if to_pandas_kwargs is None else to_pandas_kwargs
+
     types_mapper: type[pd.ArrowDtype] | None | Callable
     if dtype_backend == "numpy_nullable":
         mapping = _arrow_dtype_mapping()
@@ -81,5 +83,5 @@ def arrow_table_to_pandas(
     else:
         raise NotImplementedError
 
-    df = table.to_pandas(types_mapper=types_mapper, **kwargs)
+    df = table.to_pandas(types_mapper=types_mapper, **to_pandas_kwargs)
     return df
diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index b4c043be36946..62eaf4b859aea 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -247,7 +247,6 @@ def read(
     ) -> DataFrame:
         kwargs["use_pandas_metadata"] = True
 
-        to_pandas_kwargs = {} if to_pandas_kwargs is None else to_pandas_kwargs
         path_or_handle, handles, filesystem = _get_path_or_handle(
             path,
             filesystem,
@@ -269,7 +268,9 @@ def read(
                     DeprecationWarning,
                 )
                 result = arrow_table_to_pandas(
-                    pa_table, dtype_backend=dtype_backend, **to_pandas_kwargs
+                    pa_table,
+                    dtype_backend=dtype_backend,
+                    to_pandas_kwargs=to_pandas_kwargs,
                 )
 
             if pa_table.schema.metadata:

From a13f0545a4504d166b9c63a2d4ed55a89739bd85 Mon Sep 17 00:00:00 2001
From: Joseph Kleinhenz <kleinhenz.joseph@gmail.com>
Date: Tue, 19 Nov 2024 17:38:56 -0800
Subject: [PATCH 11/15] Update doc/source/whatsnew/v3.0.0.rst

Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 doc/source/whatsnew/v3.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index c00e219d7d0d8..2d1467dda1548 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -54,7 +54,7 @@ Other enhancements
 - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
 - :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
 - :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
-- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing in ``maps_as_pydicts`` to read parquet map datatypes as python dictionaries (:issue:`56842`)
+- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
 - :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
 - :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
 - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)

From a092ede9d577be936d9140d5dcb831d666fc645b Mon Sep 17 00:00:00 2001
From: Joseph Kleinhenz <kleinhej@gene.com>
Date: Tue, 19 Nov 2024 17:47:11 -0800
Subject: [PATCH 12/15] make to_pandas_kwargs explicit parameter and update
 docstring

---
 pandas/io/parquet.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index 62eaf4b859aea..03df750a874c4 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -458,8 +458,6 @@ def to_parquet(
 
     kwargs
         Additional keyword arguments passed to the engine.
-        When using the ``'pyarrow'`` engine ``to_pandas_kwargs`` can be used to pass
-        through arguments to ``pyarrow.Table.to_pandas``.
 
     Returns
     -------
@@ -498,6 +496,7 @@ def read_parquet(
     dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
     filesystem: Any = None,
     filters: list[tuple] | list[list[tuple]] | None = None,
+    to_pandas_kwargs: dict | None = None,
     **kwargs,
 ) -> DataFrame:
     """
@@ -571,6 +570,12 @@ def read_parquet(
 
         .. versionadded:: 2.1.0
 
+    to_pandas_kwargs: dict | None, default None
+        keyword arguments to pass through to ``pyarrow.Table.to_pandas``
+        when ``engine="pyarrow"``.
+
+        .. versionadded:: 3.0.0
+
     **kwargs
         Any additional kwargs are passed to the engine.
 
@@ -643,5 +648,6 @@ def read_parquet(
         storage_options=storage_options,
         dtype_backend=dtype_backend,
         filesystem=filesystem,
+        to_pandas_kwargs=to_pandas_kwargs,
         **kwargs,
     )

From 1c0cd02c24ebac6ac150aacc9b706651d7c5e25f Mon Sep 17 00:00:00 2001
From: Joseph Kleinhenz <kleinhej@gene.com>
Date: Tue, 19 Nov 2024 21:06:07 -0800
Subject: [PATCH 13/15] fix FastParquetImpl

---
 pandas/io/parquet.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index 03df750a874c4..cc3e9be283fa8 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -352,6 +352,7 @@ def read(
         filters=None,
         storage_options: StorageOptions | None = None,
         filesystem=None,
+        to_pandas_kwargs: dict | None = None,
         **kwargs,
     ) -> DataFrame:
         parquet_kwargs: dict[str, Any] = {}
@@ -367,6 +368,10 @@ def read(
             raise NotImplementedError(
                 "filesystem is not implemented for the fastparquet engine."
             )
+        if to_pandas_kwargs is not None:
+            raise NotImplementedError(
+                "to_pandas_kwargs is not implemented for the fastparquet engine."
+            )
         path = stringify_path(path)
         handles = None
         if is_fsspec_url(path):

From dac237ac3de5679f0dea901627efca85d5502f89 Mon Sep 17 00:00:00 2001
From: Joseph Kleinhenz <kleinhej@gene.com>
Date: Wed, 20 Nov 2024 08:03:20 -0800
Subject: [PATCH 14/15] try to fix docstring

---
 pandas/io/parquet.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index cc3e9be283fa8..eafa3d990e532 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -575,8 +575,8 @@ def read_parquet(
 
         .. versionadded:: 2.1.0
 
-    to_pandas_kwargs: dict | None, default None
-        keyword arguments to pass through to ``pyarrow.Table.to_pandas``
+    to_pandas_kwargs : dict | None, default None
+        Keyword arguments to pass through to ``pyarrow.Table.to_pandas``
         when ``engine="pyarrow"``.
 
         .. versionadded:: 3.0.0

From d4f55a1da2896e339344e2e3efbc74c63628e3d2 Mon Sep 17 00:00:00 2001
From: Joseph Kleinhenz <kleinhenz.joseph@gmail.com>
Date: Wed, 20 Nov 2024 15:54:55 -0800
Subject: [PATCH 15/15] Update pandas/io/parquet.py

Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 pandas/io/parquet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index eafa3d990e532..6a5a83088e986 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -576,7 +576,7 @@ def read_parquet(
         .. versionadded:: 2.1.0
 
     to_pandas_kwargs : dict | None, default None
-        Keyword arguments to pass through to ``pyarrow.Table.to_pandas``
+        Keyword arguments to pass through to :func:`pyarrow.Table.to_pandas`
         when ``engine="pyarrow"``.
 
         .. versionadded:: 3.0.0