From 847598b5ca935d56f9f86d18b0bca446217c72f1 Mon Sep 17 00:00:00 2001
From: Diego Argueta <diego@goodrx.com>
Date: Thu, 9 Aug 2018 12:30:19 -0700
Subject: [PATCH 01/17] Add support for excluding the index from Parquet files

---
 pandas/core/frame.py |  7 +++++--
 pandas/io/parquet.py | 19 ++++++++++++-------
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index bb221ced9e6bd..05612e3705552 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1874,7 +1874,7 @@ def to_feather(self, fname):
         to_feather(self, fname)
 
     def to_parquet(self, fname, engine='auto', compression='snappy',
-                   **kwargs):
+                   index=True, **kwargs):
         """
         Write a DataFrame to the binary parquet format.
 
@@ -1896,6 +1896,9 @@ def to_parquet(self, fname, engine='auto', compression='snappy',
             'pyarrow' is unavailable.
         compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy'
             Name of the compression to use. Use ``None`` for no compression.
+        index : bool, default True
+            If ``True``, include the dataframe's index(es) in the file output.
+            If ``False``, they will not be written to the file.
         **kwargs
             Additional arguments passed to the parquet library. See
             :ref:`pandas io <io.parquet>` for more details.
@@ -1924,7 +1927,7 @@ def to_parquet(self, fname, engine='auto', compression='snappy',
         """
         from pandas.io.parquet import to_parquet
         to_parquet(self, fname, engine,
-                   compression=compression, **kwargs)
+                   compression=compression, index=index, **kwargs)
 
     @Substitution(header='Write out the column names. If a list of strings '
                          'is given, it is assumed to be aliases for the '
diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index a99014f07a6b3..1b4e50545e67c 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -103,14 +103,15 @@ def __init__(self):
         self.api = pyarrow
 
     def write(self, df, path, compression='snappy',
-              coerce_timestamps='ms', **kwargs):
+              coerce_timestamps='ms', index=True, **kwargs):
         self.validate_dataframe(df)
         if self._pyarrow_lt_070:
             self._validate_write_lt_070(df)
         path, _, _, _ = get_filepath_or_buffer(path, mode='wb')
 
         if self._pyarrow_lt_060:
-            table = self.api.Table.from_pandas(df, timestamps_to_ms=True)
+            table = self.api.Table.from_pandas(df, timestamps_to_ms=True,
+                                               preserve_index=index)
             self.api.parquet.write_table(
                 table, path, compression=compression, **kwargs)
 
@@ -197,7 +198,7 @@ def __init__(self):
             )
         self.api = fastparquet
 
-    def write(self, df, path, compression='snappy', **kwargs):
+    def write(self, df, path, compression='snappy', index=True, **kwargs):
         self.validate_dataframe(df)
         # thriftpy/protocol/compact.py:339:
         # DeprecationWarning: tostring() is deprecated.
@@ -214,8 +215,8 @@ def write(self, df, path, compression='snappy', **kwargs):
             path, _, _, _ = get_filepath_or_buffer(path)
 
         with catch_warnings(record=True):
-            self.api.write(path, df,
-                           compression=compression, **kwargs)
+            self.api.write(path, df, compression=compression,
+                           write_index=index, **kwargs)
 
     def read(self, path, columns=None, **kwargs):
         if is_s3_url(path):
@@ -234,7 +235,8 @@ def read(self, path, columns=None, **kwargs):
         return parquet_file.to_pandas(columns=columns, **kwargs)
 
 
-def to_parquet(df, path, engine='auto', compression='snappy', **kwargs):
+def to_parquet(df, path, engine='auto', compression='snappy', index=True,
+               **kwargs):
     """
     Write a DataFrame to the parquet format.
 
@@ -250,11 +252,14 @@ def to_parquet(df, path, engine='auto', compression='snappy', **kwargs):
         'pyarrow' is unavailable.
     compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy'
         Name of the compression to use. Use ``None`` for no compression.
+    index : bool, default True
+        If ``True``, include the dataframe's index(es) in the file output. If
+        ``False``, they will not be written to the file.
     kwargs
         Additional keyword arguments passed to the engine
     """
     impl = get_engine(engine)
-    return impl.write(df, path, compression=compression, **kwargs)
+    return impl.write(df, path, compression=compression, index=index, **kwargs)
 
 
 def read_parquet(path, engine='auto', columns=None, **kwargs):

From cb01127479e82451d58afba6bb02fe7ce888141f Mon Sep 17 00:00:00 2001
From: Diego Argueta <diego@goodrx.com>
Date: Thu, 9 Aug 2018 14:33:44 -0700
Subject: [PATCH 02/17] Update whatsnew

---
 doc/source/whatsnew/v0.24.0.txt | 41 +++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 9e2c20c78f489..5aabd8b949c96 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -17,6 +17,8 @@ New features
 
 - ``ExcelWriter`` now accepts ``mode`` as a keyword argument, enabling append to existing workbooks when using the ``openpyxl`` engine (:issue:`3441`)
 
+- ``DataFrame.to_parquet()`` now accepts ``index`` as a keyword argument, allowing the user to optionally omit the dataframe's indexes from the resulting Parquet file. (:issue:`20768`)
+
 .. _whatsnew_0240.enhancements.extension_array_operators:
 
 ``ExtensionArray`` operator support
@@ -159,6 +161,45 @@ This is the same behavior as ``Series.values`` for categorical data. See
 :ref:`whatsnew_0240.api_breaking.interval_values` for more.
 
 
+Omitting Indexes in ``to_parquet()``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+You now have the option to omit a dataframe's indexes when writing to Parquet
+files with ``to_parquet``, just like ``to_csv`` (:issue:`20768`).
+
+*Previous Behavior*:
+
+Dumping a ``DataFrame`` to Parquet would include the implicit index as a column
+in the output file. Thus, this code:
+
+.. ipython:: python
+
+    import pandas
+
+    df = pandas.DataFrame({'a': [1, 2], 'b': [3, 4]})
+    df.to_parquet('test.parquet')
+
+would create a Parquet file with *three* columns: ``a``, ``b``, and ``__index_level_0__``.
+
+This unexpected extra column causes some databases like Amazon Redshift to reject
+the file, because that column doesn't exist in the target table.
+
+*New Behavior*:
+
+If you want to omit a dataframe's indexes when writing, pass ``index=False`` to
+``to_parquet()``:
+
+.. ipython:: python
+
+    import pandas
+
+    df = pandas.DataFrame({'a': [1, 2], 'b': [3, 4]})
+    df.to_parquet('test.parquet', index=False)
+
+This creates a Parquet file with just the two expected columns, ``a`` and ``b``.
+The preexisting behavior is still the default.
+
+
 .. _whatsnew_0240.enhancements.other:
 
 Other Enhancements

From 3bec3c294b719e0fb4dd672618c2ccba13b09672 Mon Sep 17 00:00:00 2001
From: Diego Argueta <diego@goodrx.com>
Date: Sat, 11 Aug 2018 00:12:21 -0700
Subject: [PATCH 03/17] Test index omission?

---
 pandas/tests/io/test_parquet.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index fefbe8afb59cb..c9249eef6df90 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -368,6 +368,12 @@ def test_multiindex_with_columns(self, pa_ge_070):
             check_round_trip(df, engine, read_kwargs={'columns': ['A', 'B']},
                              expected=df[['A', 'B']])
 
+    def test_write_ignoring_index(self):
+        """Ensure index=False omits the index from the written Parquet file."""
+        df = pd.DataFrame({'a': [1, 2, 3], 'b': ['q', 'r', 's']})
+        check_round_trip(df, write_kwargs={'index': False},
+                         check_names=['a', 'b'])
+
 
 class TestParquetPyArrow(Base):
 

From 377cda5e3cd464c2b748441863a91847ede66185 Mon Sep 17 00:00:00 2001
From: Diego Argueta <diego@goodrx.com>
Date: Sat, 11 Aug 2018 18:47:14 -0700
Subject: [PATCH 04/17] PR feedback

---
 pandas/tests/io/test_parquet.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index c9249eef6df90..dcd06304d05df 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -369,7 +369,8 @@ def test_multiindex_with_columns(self, pa_ge_070):
                              expected=df[['A', 'B']])
 
     def test_write_ignoring_index(self):
-        """Ensure index=False omits the index from the written Parquet file."""
+        # ENH 20768
+        # Ensure index=False omits the index from the written Parquet file.
         df = pd.DataFrame({'a': [1, 2, 3], 'b': ['q', 'r', 's']})
         check_round_trip(df, write_kwargs={'index': False},
                          check_names=['a', 'b'])

From ec58c1aae676bc6e03e093efc073821551c8f595 Mon Sep 17 00:00:00 2001
From: Diego Argueta <diego@goodrx.com>
Date: Sat, 11 Aug 2018 19:12:33 -0700
Subject: [PATCH 05/17] Add tests for custom indexes and a multiindex.

---
 pandas/tests/io/test_parquet.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index dcd06304d05df..86f0b328182f0 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -375,6 +375,26 @@ def test_write_ignoring_index(self):
         check_round_trip(df, write_kwargs={'index': False},
                          check_names=['a', 'b'])
 
+    def test_write_ignoring_custom_index(self):
+        # ENH 20768
+        # Ensure index=False omits the index from the written Parquet file,
+        # even if we're using a custom one.s
+        df = pd.DataFrame({'a': [1, 2, 3], 'b': ['q', 'r', 's']},
+                          index=['zyx', 'wvu', 'tsr'])
+        check_round_trip(df, write_kwargs={'index': False},
+                         check_names=['a', 'b'])
+
+    def test_write_ignoring_multiindex(self):
+        # ENH 20768
+        # Ensure index=False omits multiindexes as well.
+        arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
+                  ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
+        df = pd.DataFrame({'one': [i for i in range(8)],
+                           'two': [-i for i in range(8)]}, index=arrays)
+
+        check_round_trip(df, write_kwargs={'index': False},
+                         check_names=['one', 'two'])
+
 
 class TestParquetPyArrow(Base):
 

From 46209e5308ffd1857bc79de0f0669c57f2a7ca57 Mon Sep 17 00:00:00 2001
From: Diego Argueta <diego@goodrx.com>
Date: Sat, 11 Aug 2018 19:18:12 -0700
Subject: [PATCH 06/17] Forgot to put preserve_index=index in one place

---
 pandas/io/parquet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index 1b4e50545e67c..8bacf879603d0 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -116,7 +116,7 @@ def write(self, df, path, compression='snappy',
                 table, path, compression=compression, **kwargs)
 
         else:
-            table = self.api.Table.from_pandas(df)
+            table = self.api.Table.from_pandas(df, preserve_index=index)
             self.api.parquet.write_table(
                 table, path, compression=compression,
                 coerce_timestamps=coerce_timestamps, **kwargs)

From 45b864deb3fd84acf86ed0802cba0292f9f53e81 Mon Sep 17 00:00:00 2001
From: Diego Argueta <diego@goodrx.com>
Date: Sun, 12 Aug 2018 19:44:17 -0700
Subject: [PATCH 07/17] Use `engine` fixture to test both implementations.

---
 pandas/tests/io/test_parquet.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 86f0b328182f0..683e0d852b373 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -368,23 +368,23 @@ def test_multiindex_with_columns(self, pa_ge_070):
             check_round_trip(df, engine, read_kwargs={'columns': ['A', 'B']},
                              expected=df[['A', 'B']])
 
-    def test_write_ignoring_index(self):
+    def test_write_ignoring_index(self, engine):
         # ENH 20768
         # Ensure index=False omits the index from the written Parquet file.
         df = pd.DataFrame({'a': [1, 2, 3], 'b': ['q', 'r', 's']})
-        check_round_trip(df, write_kwargs={'index': False},
+        check_round_trip(df, engine, write_kwargs={'index': False},
                          check_names=['a', 'b'])
 
-    def test_write_ignoring_custom_index(self):
+    def test_write_ignoring_custom_index(self, engine):
         # ENH 20768
         # Ensure index=False omits the index from the written Parquet file,
         # even if we're using a custom one.s
         df = pd.DataFrame({'a': [1, 2, 3], 'b': ['q', 'r', 's']},
                           index=['zyx', 'wvu', 'tsr'])
-        check_round_trip(df, write_kwargs={'index': False},
+        check_round_trip(df, engine, write_kwargs={'index': False},
                          check_names=['a', 'b'])
 
-    def test_write_ignoring_multiindex(self):
+    def test_write_ignoring_multiindex(self, engine):
         # ENH 20768
         # Ensure index=False omits multiindexes as well.
         arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
@@ -392,7 +392,7 @@ def test_write_ignoring_multiindex(self):
         df = pd.DataFrame({'one': [i for i in range(8)],
                            'two': [-i for i in range(8)]}, index=arrays)
 
-        check_round_trip(df, write_kwargs={'index': False},
+        check_round_trip(df, engine, write_kwargs={'index': False},
                          check_names=['one', 'two'])
 
 

From 5768b53b727c5f4690f17a18ed83b8bc921509e4 Mon Sep 17 00:00:00 2001
From: Diego Argueta <diego@goodrx.com>
Date: Mon, 13 Aug 2018 20:35:24 -0700
Subject: [PATCH 08/17] Fix tests: Remove indexes in expected value.

---
 pandas/tests/io/test_parquet.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 683e0d852b373..b2d2ff7cb382c 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -372,8 +372,13 @@ def test_write_ignoring_index(self, engine):
         # ENH 20768
         # Ensure index=False omits the index from the written Parquet file.
         df = pd.DataFrame({'a': [1, 2, 3], 'b': ['q', 'r', 's']})
+
+        # Because we're dropping the index, we expect the loaded dataframe to
+        # have the default integer index.
+        expected = df.reset_index(drop=True)
+
         check_round_trip(df, engine, write_kwargs={'index': False},
-                         check_names=['a', 'b'])
+                         expected=expected)
 
     def test_write_ignoring_custom_index(self, engine):
         # ENH 20768
@@ -381,8 +386,10 @@ def test_write_ignoring_custom_index(self, engine):
         # even if we're using a custom one.s
         df = pd.DataFrame({'a': [1, 2, 3], 'b': ['q', 'r', 's']},
                           index=['zyx', 'wvu', 'tsr'])
+
+        expected = df.reset_index(drop=True)
         check_round_trip(df, engine, write_kwargs={'index': False},
-                         check_names=['a', 'b'])
+                         expected=expected)
 
     def test_write_ignoring_multiindex(self, engine):
         # ENH 20768
@@ -392,8 +399,9 @@ def test_write_ignoring_multiindex(self, engine):
         df = pd.DataFrame({'one': [i for i in range(8)],
                            'two': [-i for i in range(8)]}, index=arrays)
 
+        expected = df.reset_index(drop=True)
         check_round_trip(df, engine, write_kwargs={'index': False},
-                         check_names=['one', 'two'])
+                         expected=expected)
 
 
 class TestParquetPyArrow(Base):

From f8bcf60013cc140d6fb05e32384f24572783c8d6 Mon Sep 17 00:00:00 2001
From: Diego Argueta <diego@goodrx.com>
Date: Tue, 14 Aug 2018 12:07:39 -0700
Subject: [PATCH 09/17] Move explanation of new argument to io.rst

---
 doc/source/io.rst               | 36 +++++++++++++++++++++++++++++
 doc/source/whatsnew/v0.24.0.txt | 41 +--------------------------------
 2 files changed, 37 insertions(+), 40 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index c2c8c1c17700f..371ca4a2fa80e 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -4570,6 +4570,8 @@ dtypes, including extension dtypes such as datetime with tz.
 Several caveats.
 
 * Duplicate column names and non-string columns names are not supported.
+* The index is included in the output by default, which can cause problems with non-Pandas consumers that are
+  not expecting that extra column. You can, however, omit indexes. (See below)
 * Index level names, if specified, must be strings.
 * Categorical dtypes can be serialized to parquet, but will de-serialize as ``object`` dtype.
 * Non supported types include ``Period`` and actual Python object types. These will raise a helpful error message
@@ -4633,6 +4635,40 @@ Read only certain columns of a parquet file.
    os.remove('example_pa.parquet')
    os.remove('example_fp.parquet')
 
+
+Omitting Indexes
+''''''''''''''''
+
+Dumping a ``DataFrame`` to parquet includes the implicit index as one or more
+columns in the output file. Thus, this code:
+
+.. ipython:: python
+
+    import pandas
+
+    df = pandas.DataFrame({'a': [1, 2], 'b': [3, 4]})
+    df.to_parquet('test.parquet')
+
+creates a parquet file with *three* columns: ``a``, ``b``, and ``__index_level_0__``.
+
+This unexpected extra column causes some databases like Amazon Redshift to reject
+the file, because that column doesn't exist in the target table.
+
+If you want to omit a dataframe's indexes when writing, pass ``index=False`` to
+:func:`~pandas.DataFrame.to_parquet`:
+
+.. ipython:: python
+
+    import pandas
+
+    df = pandas.DataFrame({'a': [1, 2], 'b': [3, 4]})
+    df.to_parquet('test.parquet', index=False)
+
+This creates a parquet file with just the two expected columns, ``a`` and ``b``.
+If your ``DataFrame`` has a custom index, you won't get it back when you load
+this file into a ``DataFrame``.
+
+
 .. _io.sql:
 
 SQL Queries
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 5aabd8b949c96..34d03580d4ef4 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -17,7 +17,7 @@ New features
 
 - ``ExcelWriter`` now accepts ``mode`` as a keyword argument, enabling append to existing workbooks when using the ``openpyxl`` engine (:issue:`3441`)
 
-- ``DataFrame.to_parquet()`` now accepts ``index`` as a keyword argument, allowing the user to optionally omit the dataframe's indexes from the resulting Parquet file. (:issue:`20768`)
+- :func:`DataFrame.to_parquet` now accepts ``index`` as a keyword argument, allowing the user to optionally omit the dataframe's indexes from the resulting Parquet file. (:issue:`20768`)
 
 .. _whatsnew_0240.enhancements.extension_array_operators:
 
@@ -161,45 +161,6 @@ This is the same behavior as ``Series.values`` for categorical data. See
 :ref:`whatsnew_0240.api_breaking.interval_values` for more.
 
 
-Omitting Indexes in ``to_parquet()``
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-You now have the option to omit a dataframe's indexes when writing to Parquet
-files with ``to_parquet``, just like ``to_csv`` (:issue:`20768`).
-
-*Previous Behavior*:
-
-Dumping a ``DataFrame`` to Parquet would include the implicit index as a column
-in the output file. Thus, this code:
-
-.. ipython:: python
-
-    import pandas
-
-    df = pandas.DataFrame({'a': [1, 2], 'b': [3, 4]})
-    df.to_parquet('test.parquet')
-
-would create a Parquet file with *three* columns: ``a``, ``b``, and ``__index_level_0__``.
-
-This unexpected extra column causes some databases like Amazon Redshift to reject
-the file, because that column doesn't exist in the target table.
-
-*New Behavior*:
-
-If you want to omit a dataframe's indexes when writing, pass ``index=False`` to
-``to_parquet()``:
-
-.. ipython:: python
-
-    import pandas
-
-    df = pandas.DataFrame({'a': [1, 2], 'b': [3, 4]})
-    df.to_parquet('test.parquet', index=False)
-
-This creates a Parquet file with just the two expected columns, ``a`` and ``b``.
-The preexisting behavior is still the default.
-
-
 .. _whatsnew_0240.enhancements.other:
 
 Other Enhancements

From e629ae80fd1da5b9e1437977161e19e47cc7f634 Mon Sep 17 00:00:00 2001
From: Diego Argueta <diego@goodrx.com>
Date: Tue, 14 Aug 2018 12:16:21 -0700
Subject: [PATCH 10/17] Don't validate the index if we're not writing it.

---
 pandas/io/parquet.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index 8bacf879603d0..c80e31a4e7d75 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -105,7 +105,9 @@ def __init__(self):
     def write(self, df, path, compression='snappy',
               coerce_timestamps='ms', index=True, **kwargs):
         self.validate_dataframe(df)
-        if self._pyarrow_lt_070:
+
+        # Only validate the index if we're writing it.
+        if self._pyarrow_lt_070 and index:
             self._validate_write_lt_070(df)
         path, _, _, _ = get_filepath_or_buffer(path, mode='wb')
 

From f3ddae0111c9b504fa15787193b03072c68b098a Mon Sep 17 00:00:00 2001
From: Diego Argueta <diego@goodrx.com>
Date: Tue, 14 Aug 2018 12:16:36 -0700
Subject: [PATCH 11/17] Test bugfixes and PR feedback.

---
 pandas/tests/io/test_parquet.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index b2d2ff7cb382c..9df4c0c3f8e62 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -373,34 +373,34 @@ def test_write_ignoring_index(self, engine):
         # Ensure index=False omits the index from the written Parquet file.
         df = pd.DataFrame({'a': [1, 2, 3], 'b': ['q', 'r', 's']})
 
+        write_kwargs = {
+            'compression': None,
+            'index': False,
+        }
+
         # Because we're dropping the index, we expect the loaded dataframe to
         # have the default integer index.
         expected = df.reset_index(drop=True)
 
-        check_round_trip(df, engine, write_kwargs={'index': False},
+        check_round_trip(df, engine, write_kwargs=write_kwargs,
                          expected=expected)
 
-    def test_write_ignoring_custom_index(self, engine):
-        # ENH 20768
-        # Ensure index=False omits the index from the written Parquet file,
-        # even if we're using a custom one.s
+        # Ignore custom index
         df = pd.DataFrame({'a': [1, 2, 3], 'b': ['q', 'r', 's']},
                           index=['zyx', 'wvu', 'tsr'])
 
         expected = df.reset_index(drop=True)
-        check_round_trip(df, engine, write_kwargs={'index': False},
+        check_round_trip(df, engine, write_kwargs=write_kwargs,
                          expected=expected)
 
-    def test_write_ignoring_multiindex(self, engine):
-        # ENH 20768
-        # Ensure index=False omits multiindexes as well.
+        # Ignore multi-indexes as well.
         arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
                   ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
         df = pd.DataFrame({'one': [i for i in range(8)],
                            'two': [-i for i in range(8)]}, index=arrays)
 
         expected = df.reset_index(drop=True)
-        check_round_trip(df, engine, write_kwargs={'index': False},
+        check_round_trip(df, engine, write_kwargs=write_kwargs,
                          expected=expected)
 
 

From d26fea87d6b5b8b93d7e6fbc46ed948d297385fc Mon Sep 17 00:00:00 2001
From: Diego Argueta <diego@goodrx.com>
Date: Tue, 14 Aug 2018 14:02:48 -0700
Subject: [PATCH 12/17] Allow using engine's default behavior.

---
 pandas/core/frame.py |  7 ++++---
 pandas/io/parquet.py | 22 ++++++++++++++--------
 2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 05612e3705552..997b3186c853a 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1874,7 +1874,7 @@ def to_feather(self, fname):
         to_feather(self, fname)
 
     def to_parquet(self, fname, engine='auto', compression='snappy',
-                   index=True, **kwargs):
+                   index=None, **kwargs):
         """
         Write a DataFrame to the binary parquet format.
 
@@ -1896,9 +1896,10 @@ def to_parquet(self, fname, engine='auto', compression='snappy',
             'pyarrow' is unavailable.
         compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy'
             Name of the compression to use. Use ``None`` for no compression.
-        index : bool, default True
+        index : bool, default None
             If ``True``, include the dataframe's index(es) in the file output.
-            If ``False``, they will not be written to the file.
+            If ``False``, they will not be written to the file. If ``None``, the
+            behavior depends on the chosen engine.
         **kwargs
             Additional arguments passed to the parquet library. See
             :ref:`pandas io <io.parquet>` for more details.
diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index c80e31a4e7d75..07f1e212d40c8 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -103,22 +103,27 @@ def __init__(self):
         self.api = pyarrow
 
     def write(self, df, path, compression='snappy',
-              coerce_timestamps='ms', index=True, **kwargs):
+              coerce_timestamps='ms', index=None, **kwargs):
         self.validate_dataframe(df)
 
         # Only validate the index if we're writing it.
-        if self._pyarrow_lt_070 and index:
+        if self._pyarrow_lt_070 and index is not False:
             self._validate_write_lt_070(df)
         path, _, _, _ = get_filepath_or_buffer(path, mode='wb')
 
+        if index is None:
+            from_pandas_kwargs = {}
+        else:
+            from_pandas_kwargs = {'preserve_index': index}
+
         if self._pyarrow_lt_060:
             table = self.api.Table.from_pandas(df, timestamps_to_ms=True,
-                                               preserve_index=index)
+                                               **from_pandas_kwargs)
             self.api.parquet.write_table(
                 table, path, compression=compression, **kwargs)
 
         else:
-            table = self.api.Table.from_pandas(df, preserve_index=index)
+            table = self.api.Table.from_pandas(df, **from_pandas_kwargs)
             self.api.parquet.write_table(
                 table, path, compression=compression,
                 coerce_timestamps=coerce_timestamps, **kwargs)
@@ -200,7 +205,7 @@ def __init__(self):
             )
         self.api = fastparquet
 
-    def write(self, df, path, compression='snappy', index=True, **kwargs):
+    def write(self, df, path, compression='snappy', index=None, **kwargs):
         self.validate_dataframe(df)
         # thriftpy/protocol/compact.py:339:
         # DeprecationWarning: tostring() is deprecated.
@@ -237,7 +242,7 @@ def read(self, path, columns=None, **kwargs):
         return parquet_file.to_pandas(columns=columns, **kwargs)
 
 
-def to_parquet(df, path, engine='auto', compression='snappy', index=True,
+def to_parquet(df, path, engine='auto', compression='snappy', index=None,
                **kwargs):
     """
     Write a DataFrame to the parquet format.
@@ -254,9 +259,10 @@ def to_parquet(df, path, engine='auto', compression='snappy', index=True,
         'pyarrow' is unavailable.
     compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy'
         Name of the compression to use. Use ``None`` for no compression.
-    index : bool, default True
+    index : bool, default None
         If ``True``, include the dataframe's index(es) in the file output. If
-        ``False``, they will not be written to the file.
+        ``False``, they will not be written to the file. If ``None``, the
+        engine's default behavior will be used.
     kwargs
         Additional keyword arguments passed to the engine
     """

From e54e5f17812347530335e2ab0108196818762cb7 Mon Sep 17 00:00:00 2001
From: Diego Argueta <diego@goodrx.com>
Date: Wed, 15 Aug 2018 19:45:08 -0700
Subject: [PATCH 13/17] Document behavior change.

---
 doc/source/io.rst               | 14 ++++++++++----
 doc/source/whatsnew/v0.24.0.txt |  2 +-
 pandas/core/frame.py            |  4 ++--
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index 371ca4a2fa80e..5b9383175aeff 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -4636,10 +4636,10 @@ Read only certain columns of a parquet file.
    os.remove('example_fp.parquet')
 
 
-Omitting Indexes
+Handling Indexes
 ''''''''''''''''
 
-Dumping a ``DataFrame`` to parquet includes the implicit index as one or more
+Dumping a ``DataFrame`` to parquet may include the implicit index as one or more
 columns in the output file. Thus, this code:
 
 .. ipython:: python
@@ -4647,9 +4647,12 @@ columns in the output file. Thus, this code:
     import pandas
 
     df = pandas.DataFrame({'a': [1, 2], 'b': [3, 4]})
-    df.to_parquet('test.parquet')
+    df.to_parquet('test.parquet', engine='pyarrow')
 
-creates a parquet file with *three* columns: ``a``, ``b``, and ``__index_level_0__``.
+creates a parquet file with *three* columns if you use ``pyarrow`` for serialization:
+``a``, ``b``, and ``__index_level_0__``. If you're using ``fastparquet``, the
+index `may or may not <https://fastparquet.readthedocs.io/en/latest/api.html#fastparquet.write>`_
+be written to the file.
 
 This unexpected extra column causes some databases like Amazon Redshift to reject
 the file, because that column doesn't exist in the target table.
@@ -4668,6 +4671,9 @@ This creates a parquet file with just the two expected columns, ``a`` and ``b``.
 If your ``DataFrame`` has a custom index, you won't get it back when you load
 this file into a ``DataFrame``.
 
+Passing ``index=True`` will *always* write the index, even if that's not the
+underlying engine's default behavior.
+
 
 .. _io.sql:
 
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 34d03580d4ef4..444d032ab23f3 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -17,7 +17,7 @@ New features
 
 - ``ExcelWriter`` now accepts ``mode`` as a keyword argument, enabling append to existing workbooks when using the ``openpyxl`` engine (:issue:`3441`)
 
-- :func:`DataFrame.to_parquet` now accepts ``index`` as a keyword argument, allowing the user to optionally omit the dataframe's indexes from the resulting Parquet file. (:issue:`20768`)
+- :func:`DataFrame.to_parquet` now accepts ``index`` as an argument, allowing the user to override the engine's default behavior and include or omit the dataframe's indexes from the resulting Parquet file. (:issue:`20768`)
 
 .. _whatsnew_0240.enhancements.extension_array_operators:
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 997b3186c853a..02ad02836f999 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1898,8 +1898,8 @@ def to_parquet(self, fname, engine='auto', compression='snappy',
             Name of the compression to use. Use ``None`` for no compression.
         index : bool, default None
             If ``True``, include the dataframe's index(es) in the file output.
-            If ``False``, they will not be written to the file. If ``None``, the
-            behavior depends on the chosen engine.
+            If ``False``, they will not be written to the file. If ``None``,
+            the behavior depends on the chosen engine.
         **kwargs
             Additional arguments passed to the parquet library. See
             :ref:`pandas io <io.parquet>` for more details.

From 46a4324e09dee963765237c503ebdeab61cec50a Mon Sep 17 00:00:00 2001
From: Diego Argueta <diego@goodrx.com>
Date: Thu, 16 Aug 2018 10:08:47 -0700
Subject: [PATCH 14/17] Code cleanup, PR feedback.

---
 doc/source/io.rst               | 7 +------
 pandas/tests/io/test_parquet.py | 1 -
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index 5b9383175aeff..3955209fca9d0 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -4644,9 +4644,7 @@ columns in the output file. Thus, this code:
 
 .. ipython:: python
 
-    import pandas
-
-    df = pandas.DataFrame({'a': [1, 2], 'b': [3, 4]})
+    df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]})
     df.to_parquet('test.parquet', engine='pyarrow')
 
 creates a parquet file with *three* columns if you use ``pyarrow`` for serialization:
@@ -4662,9 +4660,6 @@ If you want to omit a dataframe's indexes when writing, pass ``index=False`` to
 
 .. ipython:: python
 
-    import pandas
-
-    df = pandas.DataFrame({'a': [1, 2], 'b': [3, 4]})
     df.to_parquet('test.parquet', index=False)
 
 This creates a parquet file with just the two expected columns, ``a`` and ``b``.
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 9df4c0c3f8e62..ab7f04ad86ffc 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -389,7 +389,6 @@ def test_write_ignoring_index(self, engine):
         df = pd.DataFrame({'a': [1, 2, 3], 'b': ['q', 'r', 's']},
                           index=['zyx', 'wvu', 'tsr'])
 
-        expected = df.reset_index(drop=True)
         check_round_trip(df, engine, write_kwargs=write_kwargs,
                          expected=expected)
 

From 90361b6cd51f5e7ce4ffccfcb8138c473e2ad5be Mon Sep 17 00:00:00 2001
From: Diego Argueta <diego@goodrx.com>
Date: Mon, 20 Aug 2018 15:59:07 -0700
Subject: [PATCH 15/17] PR feedback for documentation

---
 doc/source/io.rst    | 5 +++--
 pandas/core/frame.py | 3 +++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index 3955209fca9d0..4098906e0e173 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -4570,8 +4570,9 @@ dtypes, including extension dtypes such as datetime with tz.
 Several caveats.
 
 * Duplicate column names and non-string columns names are not supported.
-* The index is included in the output by default, which can cause problems with non-Pandas consumers that are
-  not expecting that extra column. You can, however, omit indexes. (See below)
+* The ``pyarrow`` engine always writes the index to the output, but ``fastparquet`` only writes non-default
+  indexes. This extra column can cause problems for non-Pandas consumers that are not expecting it. You can
+  force including or omitting indexes with the ``index`` argument, regardless of the underlying engine.
 * Index level names, if specified, must be strings.
 * Categorical dtypes can be serialized to parquet, but will de-serialize as ``object`` dtype.
 * Non supported types include ``Period`` and actual Python object types. These will raise a helpful error message
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 02ad02836f999..770eca2210138 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1900,6 +1900,9 @@ def to_parquet(self, fname, engine='auto', compression='snappy',
             If ``True``, include the dataframe's index(es) in the file output.
             If ``False``, they will not be written to the file. If ``None``,
             the behavior depends on the chosen engine.
+
+            .. versionadded:: 0.24.0
+
         **kwargs
             Additional arguments passed to the parquet library. See
             :ref:`pandas io <io.parquet>` for more details.

From 759da77d1244fbfe095303ed5c2252d332917290 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 5 Sep 2018 14:23:36 +0200
Subject: [PATCH 16/17] add versionadded

---
 doc/source/whatsnew/v0.24.0.txt | 4 +++-
 pandas/io/parquet.py            | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 444d032ab23f3..fc594a2e8de2f 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -17,7 +17,9 @@ New features
 
 - ``ExcelWriter`` now accepts ``mode`` as a keyword argument, enabling append to existing workbooks when using the ``openpyxl`` engine (:issue:`3441`)
 
-- :func:`DataFrame.to_parquet` now accepts ``index`` as an argument, allowing the user to override the engine's default behavior and include or omit the dataframe's indexes from the resulting Parquet file. (:issue:`20768`)
+- :func:`DataFrame.to_parquet` now accepts ``index`` as an argument, allowing
+the user to override the engine's default behavior to include or omit the
+dataframe's indexes from the resulting Parquet file. (:issue:`20768`)
 
 .. _whatsnew_0240.enhancements.extension_array_operators:
 
diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index 07f1e212d40c8..6ab56c68a510a 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -263,6 +263,8 @@ def to_parquet(df, path, engine='auto', compression='snappy', index=None,
         If ``True``, include the dataframe's index(es) in the file output. If
         ``False``, they will not be written to the file. If ``None``, the
         engine's default behavior will be used.
+
+        .. versionadded 0.24.0
     kwargs
         Additional keyword arguments passed to the engine
     """

From 7dc53a19320162218671654d7a406f5777730ddc Mon Sep 17 00:00:00 2001
From: Diego Argueta <diego@goodrx.com>
Date: Wed, 19 Sep 2018 16:08:20 -0700
Subject: [PATCH 17/17] PR feedback about rephrasing

---
 doc/source/io.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index 4098906e0e173..cb22bb9198e25 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -4640,8 +4640,8 @@ Read only certain columns of a parquet file.
 Handling Indexes
 ''''''''''''''''
 
-Dumping a ``DataFrame`` to parquet may include the implicit index as one or more
-columns in the output file. Thus, this code:
+Serializing a ``DataFrame`` to parquet may include the implicit index as one or
+more columns in the output file. Thus, this code:
 
 .. ipython:: python