From 992dc210d8a4ed3d1bf65e76297f0afe9c60bd89 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Tue, 7 Oct 2025 04:17:32 +0000
Subject: [PATCH 01/37] change to ai.generate

---
 bigframes/operations/blob.py             | 12 +++++++-----
 tests/system/large/blob/test_function.py |  1 +
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/bigframes/operations/blob.py b/bigframes/operations/blob.py
index 1f6b75a8f5..038cc1d891 100644
--- a/bigframes/operations/blob.py
+++ b/bigframes/operations/blob.py
@@ -974,21 +974,23 @@ def audio_transcribe(
 
         prompt_text = "**Task:** Transcribe the provided audio. **Instructions:** - Your response must contain only the verbatim transcription of the audio. - Do not include any introductory text, summaries, or conversational filler in your response. The output should begin directly with the first word of the audio."
 
-        # Convert the audio series to the runtime representation required by the model.
-        audio_runtime = audio_series.blob._get_runtime("R", with_metadata=True)
-
+        # Use bbq.ai.generate() to transcribe audio
         transcribed_results = bbq.ai.generate(
-            prompt=(prompt_text, audio_runtime),
+            prompt=(prompt_text, audio_series),
             connection_id=connection,
             endpoint=model_name,
-            model_params={"generationConfig": {"temperature": 0.0}},
+            request_type="unspecified",
         )
 
+        transcribed_content_series = transcribed_results.struct.field("result").rename(
+            "transcribed_content"
+        )
         transcribed_content_series = transcribed_results.struct.field("result").rename(
             "transcribed_content"
         )
 
         if verbose:
+            transcribed_status_series = transcribed_results.struct.field("status")
             transcribed_status_series = transcribed_results.struct.field("status")
             results_df = bpd.DataFrame(
                 {
diff --git a/tests/system/large/blob/test_function.py b/tests/system/large/blob/test_function.py
index 7963fabd0b..2124234649 100644
--- a/tests/system/large/blob/test_function.py
+++ b/tests/system/large/blob/test_function.py
@@ -768,6 +768,7 @@ def test_blob_transcribe(
         )
         .to_pandas()
     )
+    print(actual)
 
     # check relative length
     expected_text = "Now, as all books not primarily intended as picture-books consist principally of types composed to form letterpress"

From 74e042a9ae286368a94840dbaee1d33dcafe673a Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Sat, 4 Oct 2025 07:19:28 +0000
Subject: [PATCH 02/37] perf: Default to interactive display for SQL in
 anywidget mode

Previously, SQL queries in anywidget mode would fall back to deferred execution, showing a dry run instead of an interactive table.

This change modifies the display logic to directly use the anywidget interactive display for SQL queries, providing a more consistent and responsive user experience. A test case has been added to verify this behavior.
---
 bigframes/dataframe.py               | 21 ++++++++++++++++++---
 tests/system/small/test_anywidget.py | 15 +++++++++++++++
 2 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index f016fddd83..ae284fef0e 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -783,11 +783,26 @@ def __repr__(self) -> str:
 
         opts = bigframes.options.display
         max_results = opts.max_rows
-        # anywdiget mode uses the same display logic as the "deferred" mode
-        # for faster execution
-        if opts.repr_mode in ("deferred", "anywidget"):
+
+        # Only deferred mode shows dry run
+        if opts.repr_mode in ("deferred"):
             return formatter.repr_query_job(self._compute_dry_run())
 
+        # Anywidget mode uses interative display
+        if opts.repr_mode == "anywidget":
+            # Try to display with anywidget, fall back to deferred if not in IPython
+            try:
+                from IPython.display import display as ipython_display
+
+                from bigframes import display
+
+                widget = display.TableWidget(self.copy())
+                ipython_display(widget)
+                return ""  # Return empty string since we used display()
+            except (AttributeError, ValueError, ImportError):
+                # Not in IPython environment, fall back to deferred mode
+                return formatter.repr_query_job(self._compute_dry_run())
+
         # TODO(swast): pass max_columns and get the true column count back. Maybe
         # get 1 more column than we have requested so that pandas can add the
         # ... for us?
diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py
index 8944ee5365..ad16888b44 100644
--- a/tests/system/small/test_anywidget.py
+++ b/tests/system/small/test_anywidget.py
@@ -455,6 +455,21 @@ def test_widget_creation_should_load_css_for_rendering(table_widget):
     assert ".bigframes-widget .footer" in css_content
 
 
+def test_sql_anywidget_mode(session: bf.Session):
+    """
+    Test that a SQL query runs in anywidget mode.
+    """
+    sql = "SELECT * FROM `bigquery-public-data.usa_names.usa_1910_current` LIMIT 5"
+
+    with bf.option_context("display.repr_mode", "anywidget"):
+        df = session.read_gbq(sql)
+        # In a real environment, this would display a widget.
+        # For testing, we just want to make sure we're in the anywidget code path.
+        # The `_repr_html_` method in anywidget mode will return an empty string
+        # and display the widget via IPython's display mechanism.
+        assert df._repr_html_() == ""
+
+
 def test_widget_row_count_should_be_immutable_after_creation(
     paginated_bf_df: bf.dataframe.DataFrame,
 ):

From 074d4c20f172c1ac2f0ed76bee38bb7d02b5acf3 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Sat, 4 Oct 2025 08:44:21 +0000
Subject: [PATCH 03/37] fix: resolve double printing issue in anywidget mode

---
 bigframes/dataframe.py                    |   5 +-
 notebooks/dataframes/anywidget_mode.ipynb |  38 ++++++--
 tests/system/small/test_anywidget.py      | 105 ++--------------------
 3 files changed, 41 insertions(+), 107 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index ae284fef0e..0eb53ddc03 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -792,13 +792,10 @@ def __repr__(self) -> str:
         if opts.repr_mode == "anywidget":
             # Try to display with anywidget, fall back to deferred if not in IPython
             try:
-                from IPython.display import display as ipython_display
-
                 from bigframes import display
 
                 widget = display.TableWidget(self.copy())
-                ipython_display(widget)
-                return ""  # Return empty string since we used display()
+                return widget._repr_html_()  # Return widget's HTML representation
             except (AttributeError, ValueError, ImportError):
                 # Not in IPython environment, fall back to deferred mode
                 return formatter.repr_query_job(self._compute_dry_run())
diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb
index c2af915721..88f9658009 100644
--- a/notebooks/dataframes/anywidget_mode.ipynb
+++ b/notebooks/dataframes/anywidget_mode.ipynb
@@ -73,11 +73,25 @@
    "id": "f289d250",
    "metadata": {},
    "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "071c0a905297406ba6c990cbbb8fc28d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Computation deferred. Computation will process 171.4 MB\n"
+      "\n"
      ]
     }
    ],
@@ -142,7 +156,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "aafd4f912b5f42e0896aa5f0c2c62620",
+       "model_id": "042a3d55c51b4a3192cf1a942c6797e8",
        "version_major": 2,
        "version_minor": 0
       },
@@ -155,11 +169,23 @@
     },
     {
      "data": {
-      "text/html": [],
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "06e46087ecbe4e6d8bdc2ed7c9284a7d",
+       "version_major": 2,
+       "version_minor": 0
+      },
       "text/plain": [
-       "Computation deferred. Computation will process 171.4 MB"
+       "TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
       ]
      },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [],
+      "text/plain": []
+     },
      "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
@@ -205,7 +231,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5ec0ad9f11874d4f9d8edbc903ee7b5d",
+       "model_id": "4da2aaa13f074229b022c7256ba7510b",
        "version_major": 2,
        "version_minor": 0
       },
@@ -304,7 +330,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "651b5aac958c408183775152c2573a03",
+       "model_id": "d2739b180f7d40b1b52ebab121ab7a8a",
        "version_major": 2,
        "version_minor": 0
       },
diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py
index ad16888b44..4f82f7d81d 100644
--- a/tests/system/small/test_anywidget.py
+++ b/tests/system/small/test_anywidget.py
@@ -12,14 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import annotations
+
+from unittest import mock
 
 import pandas as pd
 import pytest
 
 import bigframes as bf
 
-pytest.importorskip("anywidget")
-
 # Test constants to avoid change detector tests
 EXPECTED_ROW_COUNT = 6
 EXPECTED_PAGE_SIZE = 2
@@ -437,25 +438,15 @@ def test_setting_page_size_above_max_should_be_clamped(table_widget):
     # The page size is clamped to the maximum.
     assert table_widget.page_size == expected_clamped_size
 
-
-def test_widget_creation_should_load_css_for_rendering(table_widget):
     """
-    Given a TableWidget is created, when its resources are accessed,
-    it should contain the CSS content required for styling.
+    Test that the widget's CSS is loaded correctly.
     """
-    # The table_widget fixture creates the widget.
-    # No additional setup is needed.
-
-    # Access the CSS content.
     css_content = table_widget._css
-
-    # The content is a non-empty string containing a known selector.
-    assert isinstance(css_content, str)
-    assert len(css_content) > 0
     assert ".bigframes-widget .footer" in css_content
 
 
-def test_sql_anywidget_mode(session: bf.Session):
+@mock.patch("bigframes.display.TableWidget")
+def test_sql_anywidget_mode(mock_table_widget, session: bf.Session):
     """
     Test that a SQL query runs in anywidget mode.
     """
@@ -465,88 +456,8 @@ def test_sql_anywidget_mode(session: bf.Session):
         df = session.read_gbq(sql)
         # In a real environment, this would display a widget.
         # For testing, we just want to make sure we're in the anywidget code path.
-        # The `_repr_html_` method in anywidget mode will return an empty string
-        # and display the widget via IPython's display mechanism.
-        assert df._repr_html_() == ""
-
-
-def test_widget_row_count_should_be_immutable_after_creation(
-    paginated_bf_df: bf.dataframe.DataFrame,
-):
-    """
-    Given a widget created with a specific configuration when global display
-    options are changed later, the widget's original row_count should remain
-    unchanged.
-    """
-    from bigframes.display import TableWidget
-
-    # Use a context manager to ensure the option is reset
-    with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 2):
-        widget = TableWidget(paginated_bf_df)
-        initial_row_count = widget.row_count
-
-    # Change a global option that could influence row count
-    bf.options.display.max_rows = 10
-
-    # Verify the row count remains immutable.
-    assert widget.row_count == initial_row_count
-
-
-class FaultyIterator:
-    def __iter__(self):
-        return self
-
-    def __next__(self):
-        raise ValueError("Simulated read error")
-
-
-def test_widget_should_fallback_to_zero_rows_with_invalid_total_rows(
-    paginated_bf_df: bf.dataframe.DataFrame,
-    monkeypatch: pytest.MonkeyPatch,
-):
-    """
-    Given an internal component fails to return valid execution data,
-    when the TableWidget is created, its error_message should be set and displayed.
-    """
-    # Patch the executor's 'execute' method to simulate an error.
-    monkeypatch.setattr(
-        "bigframes.session.bq_caching_executor.BigQueryCachingExecutor.execute",
-        lambda self, *args, **kwargs: mock_execute_result_with_params(
-            self, paginated_bf_df._block.expr.schema, None, [], *args, **kwargs
-        ),
-    )
-
-    # Create the TableWidget under the error condition.
-    with bf.option_context("display.repr_mode", "anywidget"):
-        from bigframes.display import TableWidget
-
-        # The widget should handle the faulty data from the mock without crashing.
-        widget = TableWidget(paginated_bf_df)
-
-    # The widget should have an error message and display it in the HTML.
-    assert widget.row_count == 0
-    assert widget._error_message is not None
-    assert "Could not determine total row count" in widget._error_message
-    assert widget._error_message in widget.table_html
-
-
-def test_widget_row_count_reflects_actual_data_available(
-    paginated_bf_df: bf.dataframe.DataFrame,
-):
-    """
-    Test that widget row_count reflects the actual data available,
-    regardless of theoretical limits.
-    """
-    from bigframes.display import TableWidget
-
-    # Set up display options that define a page size.
-    with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 2):
-        widget = TableWidget(paginated_bf_df)
-
-        # The widget should report the total rows in the DataFrame,
-        # not limited by page_size (which only affects pagination)
-        assert widget.row_count == EXPECTED_ROW_COUNT
-        assert widget.page_size == 2  # Respects the display option
+        df._repr_html_()
+        mock_table_widget.assert_called_once()
 
 
 # TODO(shuowei): Add tests for custom index and multiindex

From 982ea9781af00c88b19b84bc16e0de3a78dea5ef Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Tue, 7 Oct 2025 05:42:54 +0000
Subject: [PATCH 04/37] feat: Add test case for STRUCT column in anywidget

Adds a test case to verify that a DataFrame with a STRUCT column is
correctly displayed in anywidget mode.

This test confirms that displaying a STRUCT column does not raise an
exception that would trigger the fallback to the deferred representation.
It mocks `IPython.display.display` to capture the `TableWidget` instance
and asserts that the rendered HTML contains the expected string
representation of the STRUCT data.
---
 tests/system/small/test_anywidget.py | 41 ++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py
index 4f82f7d81d..15e902ee16 100644
--- a/tests/system/small/test_anywidget.py
+++ b/tests/system/small/test_anywidget.py
@@ -460,6 +460,47 @@ def test_sql_anywidget_mode(mock_table_widget, session: bf.Session):
         mock_table_widget.assert_called_once()
 
 
+@mock.patch("IPython.display.display")
+def test_struct_column_anywidget_mode(mock_display, session: bf.Session):
+    """
+    Test that a DataFrame with a STRUCT column is displayed in anywidget mode
+    and does not fall back to the deferred representation. This confirms that
+    anywidget can handle complex types without raising an exception that would
+    trigger the fallback mechanism.
+    """
+    pandas_df = pd.DataFrame(
+        {
+            "a": [1],
+            "b": [{"c": 2, "d": 3}],
+        }
+    )
+    bf_df = session.read_pandas(pandas_df)
+
+    with bf.option_context("display.repr_mode", "anywidget"):
+        with mock.patch(
+            "bigframes.dataframe.formatter.repr_query_job"
+        ) as mock_repr_query_job:
+            # Trigger the display logic.
+            result = bf_df._repr_html_()
+
+            # Assert that we did NOT fall back to the deferred representation.
+            mock_repr_query_job.assert_not_called()
+
+            # Assert that display was called with a TableWidget
+            mock_display.assert_called_once()
+            widget = mock_display.call_args[0][0]
+            from bigframes.display import TableWidget
+
+            assert isinstance(widget, TableWidget)
+
+            # Assert that the widget's html contains the struct
+            html = widget.table_html
+            assert "{&#x27;c&#x27;: 2, &#x27;d&#x27;: 3}" in html
+
+            # Assert that _repr_html_ returns an empty string
+            assert result == ""
+
+
 # TODO(shuowei): Add tests for custom index and multiindex
 # This may not be necessary for the SQL Cell use case but should be
 # considered for completeness.

From a9116c71f964cf5c8cec16b3249ded6faffb30ec Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Thu, 9 Oct 2025 08:25:28 +0000
Subject: [PATCH 05/37] fix presubmit

---
 bigframes/display/anywidget.py | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py
index a0b4f809d8..15a022a1f5 100644
--- a/bigframes/display/anywidget.py
+++ b/bigframes/display/anywidget.py
@@ -218,16 +218,14 @@ def _set_table_html(self) -> None:
         start = self.page * self.page_size
         end = start + self.page_size
 
-        # fetch more data if the requested page is outside our cache
-        cached_data = self._cached_data
-        while len(cached_data) < end and not self._all_data_loaded:
-            if self._get_next_batch():
-                cached_data = self._cached_data
-            else:
-                break
-
-        # Get the data for the current page
-        page_data = cached_data.iloc[start:end]
+            # fetch more data if the requested page is outside our cache
+            cached_data = self._cached_data
+            while len(cached_data) < end and not self._all_data_loaded:
+                if self._get_next_batch():
+                    cached_data = self._cached_data
+                else:
+                    break
+            page_data = cached_data.iloc[start:end]
 
         # Generate HTML table
         self.table_html = bigframes.display.html.render_html(
@@ -250,8 +248,5 @@ def _page_size_changed(self, _change: Dict[str, Any]) -> None:
         # Reset the page to 0 when page size changes to avoid invalid page states
         self.page = 0
 
-        # Reset batches to use new page size for future data fetching
-        self._reset_batches_for_new_page_size()
-
         # Update the table display
         self._set_table_html()

From f0992c693221965fe57b8ab0edba322a4ac0b303 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Wed, 15 Oct 2025 19:44:37 +0000
Subject: [PATCH 06/37] Revert accidental changes to test_function.py

---
 tests/system/large/blob/test_function.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/system/large/blob/test_function.py b/tests/system/large/blob/test_function.py
index 2124234649..7963fabd0b 100644
--- a/tests/system/large/blob/test_function.py
+++ b/tests/system/large/blob/test_function.py
@@ -768,7 +768,6 @@ def test_blob_transcribe(
         )
         .to_pandas()
     )
-    print(actual)
 
     # check relative length
     expected_text = "Now, as all books not primarily intended as picture-books consist principally of types composed to form letterpress"

From 3aefdbfe73e3ec6bfbc611c185aafc94de8e1538 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Wed, 15 Oct 2025 19:46:37 +0000
Subject: [PATCH 07/37] revert accidental change to blob.py

---
 bigframes/operations/blob.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/bigframes/operations/blob.py b/bigframes/operations/blob.py
index 038cc1d891..1f6b75a8f5 100644
--- a/bigframes/operations/blob.py
+++ b/bigframes/operations/blob.py
@@ -974,23 +974,21 @@ def audio_transcribe(
 
         prompt_text = "**Task:** Transcribe the provided audio. **Instructions:** - Your response must contain only the verbatim transcription of the audio. - Do not include any introductory text, summaries, or conversational filler in your response. The output should begin directly with the first word of the audio."
 
-        # Use bbq.ai.generate() to transcribe audio
+        # Convert the audio series to the runtime representation required by the model.
+        audio_runtime = audio_series.blob._get_runtime("R", with_metadata=True)
+
         transcribed_results = bbq.ai.generate(
-            prompt=(prompt_text, audio_series),
+            prompt=(prompt_text, audio_runtime),
             connection_id=connection,
             endpoint=model_name,
-            request_type="unspecified",
+            model_params={"generationConfig": {"temperature": 0.0}},
         )
 
-        transcribed_content_series = transcribed_results.struct.field("result").rename(
-            "transcribed_content"
-        )
         transcribed_content_series = transcribed_results.struct.field("result").rename(
             "transcribed_content"
         )
 
         if verbose:
-            transcribed_status_series = transcribed_results.struct.field("status")
             transcribed_status_series = transcribed_results.struct.field("status")
             results_df = bpd.DataFrame(
                 {

From 7d4cfdfb6d677ad31245dfd4dda56ef8ff9a3fe6 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Wed, 15 Oct 2025 19:54:54 +0000
Subject: [PATCH 08/37] change return type

---
 bigframes/dataframe.py | 39 +++++++++++++--------------------------
 1 file changed, 13 insertions(+), 26 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 0eb53ddc03..0259e94132 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -23,7 +23,6 @@
 import re
 import sys
 import textwrap
-import traceback
 import typing
 from typing import (
     Any,
@@ -788,18 +787,6 @@ def __repr__(self) -> str:
         if opts.repr_mode in ("deferred"):
             return formatter.repr_query_job(self._compute_dry_run())
 
-        # Anywidget mode uses interative display
-        if opts.repr_mode == "anywidget":
-            # Try to display with anywidget, fall back to deferred if not in IPython
-            try:
-                from bigframes import display
-
-                widget = display.TableWidget(self.copy())
-                return widget._repr_html_()  # Return widget's HTML representation
-            except (AttributeError, ValueError, ImportError):
-                # Not in IPython environment, fall back to deferred mode
-                return formatter.repr_query_job(self._compute_dry_run())
-
         # TODO(swast): pass max_columns and get the true column count back. Maybe
         # get 1 more column than we have requested so that pandas can add the
         # ... for us?
@@ -863,27 +850,27 @@ def _repr_html_(self) -> str:
 
         if opts.repr_mode == "anywidget":
             try:
+                import anywidget  # noqa: F401
                 from IPython.display import display as ipython_display
+                import traitlets  # noqa: F401
 
                 from bigframes import display
-
-                # Always create a new widget instance for each display call
-                # This ensures that each cell gets its own widget and prevents
-                # unintended sharing between cells
-                widget = display.TableWidget(df.copy())
-
-                ipython_display(widget)
-                return ""  # Return empty string since we used display()
-
-            except (AttributeError, ValueError, ImportError):
-                # Fallback if anywidget is not available
+            except ImportError:
                 warnings.warn(
-                    "Anywidget mode is not available. "
+                    "anywidget or its dependencies are not installed. "
                     "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. "
-                    f"Falling back to deferred mode. Error: {traceback.format_exc()}"
+                    "Falling back to deferred mode."
                 )
                 return formatter.repr_query_job(self._compute_dry_run())
 
+            # Always create a new widget instance for each display call
+            # This ensures that each cell gets its own widget and prevents
+            # unintended sharing between cells
+            widget = display.TableWidget(df.copy())
+
+            ipython_display(widget)
+            return ""  # Return empty string since we used display()
+
         # Continue with regular HTML rendering for non-anywidget modes
         # TODO(swast): pass max_columns and get the true column count back. Maybe
         # get 1 more column than we have requested so that pandas can add the

From a951810f11b3872d6b5868e37b5a56de08ff9655 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Mon, 20 Oct 2025 08:26:16 +0000
Subject: [PATCH 09/37] add todo and revert change

---
 bigframes/dataframe.py         | 65 +++++++++++++++-------------------
 bigframes/display/anywidget.py | 25 ++++++++-----
 2 files changed, 46 insertions(+), 44 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 0259e94132..b7d1268b61 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -23,6 +23,7 @@
 import re
 import sys
 import textwrap
+import traceback
 import typing
 from typing import (
     Any,
@@ -782,9 +783,9 @@ def __repr__(self) -> str:
 
         opts = bigframes.options.display
         max_results = opts.max_rows
-
-        # Only deferred mode shows dry run
-        if opts.repr_mode in ("deferred"):
+        # anywdiget mode uses the same display logic as the "deferred" mode
+        # for faster execution
+        if opts.repr_mode in ("deferred", "anywidget"):
             return formatter.repr_query_job(self._compute_dry_run())
 
         # TODO(swast): pass max_columns and get the true column count back. Maybe
@@ -850,27 +851,27 @@ def _repr_html_(self) -> str:
 
         if opts.repr_mode == "anywidget":
             try:
-                import anywidget  # noqa: F401
                 from IPython.display import display as ipython_display
-                import traitlets  # noqa: F401
 
                 from bigframes import display
-            except ImportError:
+
+                # Always create a new widget instance for each display call
+                # This ensures that each cell gets its own widget and prevents
+                # unintended sharing between cells
+                widget = display.TableWidget(df.copy())
+
+                ipython_display(widget)
+                return ""  # Return empty string since we used display()
+
+            except (AttributeError, ValueError, ImportError):
+                # Fallback if anywidget is not available
                 warnings.warn(
-                    "anywidget or its dependencies are not installed. "
+                    "Anywidget mode is not available. "
                     "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. "
-                    "Falling back to deferred mode."
+                    f"Falling back to deferred mode. Error: {traceback.format_exc()}"
                 )
                 return formatter.repr_query_job(self._compute_dry_run())
 
-            # Always create a new widget instance for each display call
-            # This ensures that each cell gets its own widget and prevents
-            # unintended sharing between cells
-            widget = display.TableWidget(df.copy())
-
-            ipython_display(widget)
-            return ""  # Return empty string since we used display()
-
         # Continue with regular HTML rendering for non-anywidget modes
         # TODO(swast): pass max_columns and get the true column count back. Maybe
         # get 1 more column than we have requested so that pandas can add the
@@ -2568,33 +2569,25 @@ def sort_index(
     ) -> None:
         ...
 
+    @validations.requires_index
     def sort_index(
         self,
         *,
-        axis: Union[int, str] = 0,
         ascending: bool = True,
         inplace: bool = False,
         na_position: Literal["first", "last"] = "last",
     ) -> Optional[DataFrame]:
-        if utils.get_axis_number(axis) == 0:
-            if na_position not in ["first", "last"]:
-                raise ValueError("Param na_position must be one of 'first' or 'last'")
-            na_last = na_position == "last"
-            index_columns = self._block.index_columns
-            ordering = [
-                order.ascending_over(column, na_last)
-                if ascending
-                else order.descending_over(column, na_last)
-                for column in index_columns
-            ]
-            block = self._block.order_by(ordering)
-        else:  # axis=1
-            _, indexer = self.columns.sort_values(
-                return_indexer=True, ascending=ascending, na_position=na_position  # type: ignore
-            )
-            block = self._block.select_columns(
-                [self._block.value_columns[i] for i in indexer]
-            )
+        if na_position not in ["first", "last"]:
+            raise ValueError("Param na_position must be one of 'first' or 'last'")
+        na_last = na_position == "last"
+        index_columns = self._block.index_columns
+        ordering = [
+            order.ascending_over(column, na_last)
+            if ascending
+            else order.descending_over(column, na_last)
+            for column in index_columns
+        ]
+        block = self._block.order_by(ordering)
         if inplace:
             self._set_block(block)
             return None
diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py
index 15a022a1f5..1ed6eeb8a5 100644
--- a/bigframes/display/anywidget.py
+++ b/bigframes/display/anywidget.py
@@ -209,6 +209,15 @@ def _reset_batches_for_new_page_size(self) -> None:
 
     def _set_table_html(self) -> None:
         """Sets the current html data based on the current page and page size."""
+        # TODO (shuowei): BigFrames Series with db_dtypes.JSONArrowType column
+        # fails to convert to pandas DataFrame in anywidget environment due to
+        # missing handling in to_pandas_batches(). b/453561268
+        # For empty dataframe, render empty table with headers.
+        if self.row_count == 0:
+            page_data = self._cached_data
+        else:
+            start = self.page * self.page_size
+            end = start + self.page_size
         if self._error_message:
             self.table_html = (
                 f"<div class='bigframes-error-message'>{self._error_message}</div>"
@@ -218,14 +227,14 @@ def _set_table_html(self) -> None:
         start = self.page * self.page_size
         end = start + self.page_size
 
-            # fetch more data if the requested page is outside our cache
-            cached_data = self._cached_data
-            while len(cached_data) < end and not self._all_data_loaded:
-                if self._get_next_batch():
-                    cached_data = self._cached_data
-                else:
-                    break
-            page_data = cached_data.iloc[start:end]
+        # fetch more data if the requested page is outside our cache
+        cached_data = self._cached_data
+        while len(cached_data) < end and not self._all_data_loaded:
+            if self._get_next_batch():
+                cached_data = self._cached_data
+            else:
+                break
+        page_data = cached_data.iloc[start:end]
 
         # Generate HTML table
         self.table_html = bigframes.display.html.render_html(

From 89521d2b108492f7b7fed2c29a00b729228a6d1e Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Mon, 20 Oct 2025 08:28:56 +0000
Subject: [PATCH 10/37] Revert "add todo and revert change"

This reverts commit 153e1d203c273d6755623b3db30bd2256a240cc1.
---
 bigframes/dataframe.py         | 65 +++++++++++++++++++---------------
 bigframes/display/anywidget.py |  3 --
 2 files changed, 36 insertions(+), 32 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index b7d1268b61..0259e94132 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -23,7 +23,6 @@
 import re
 import sys
 import textwrap
-import traceback
 import typing
 from typing import (
     Any,
@@ -783,9 +782,9 @@ def __repr__(self) -> str:
 
         opts = bigframes.options.display
         max_results = opts.max_rows
-        # anywdiget mode uses the same display logic as the "deferred" mode
-        # for faster execution
-        if opts.repr_mode in ("deferred", "anywidget"):
+
+        # Only deferred mode shows dry run
+        if opts.repr_mode in ("deferred"):
             return formatter.repr_query_job(self._compute_dry_run())
 
         # TODO(swast): pass max_columns and get the true column count back. Maybe
@@ -851,27 +850,27 @@ def _repr_html_(self) -> str:
 
         if opts.repr_mode == "anywidget":
             try:
+                import anywidget  # noqa: F401
                 from IPython.display import display as ipython_display
+                import traitlets  # noqa: F401
 
                 from bigframes import display
-
-                # Always create a new widget instance for each display call
-                # This ensures that each cell gets its own widget and prevents
-                # unintended sharing between cells
-                widget = display.TableWidget(df.copy())
-
-                ipython_display(widget)
-                return ""  # Return empty string since we used display()
-
-            except (AttributeError, ValueError, ImportError):
-                # Fallback if anywidget is not available
+            except ImportError:
                 warnings.warn(
-                    "Anywidget mode is not available. "
+                    "anywidget or its dependencies are not installed. "
                     "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. "
-                    f"Falling back to deferred mode. Error: {traceback.format_exc()}"
+                    "Falling back to deferred mode."
                 )
                 return formatter.repr_query_job(self._compute_dry_run())
 
+            # Always create a new widget instance for each display call
+            # This ensures that each cell gets its own widget and prevents
+            # unintended sharing between cells
+            widget = display.TableWidget(df.copy())
+
+            ipython_display(widget)
+            return ""  # Return empty string since we used display()
+
         # Continue with regular HTML rendering for non-anywidget modes
         # TODO(swast): pass max_columns and get the true column count back. Maybe
         # get 1 more column than we have requested so that pandas can add the
@@ -2569,25 +2568,33 @@ def sort_index(
     ) -> None:
         ...
 
-    @validations.requires_index
     def sort_index(
         self,
         *,
+        axis: Union[int, str] = 0,
         ascending: bool = True,
         inplace: bool = False,
         na_position: Literal["first", "last"] = "last",
     ) -> Optional[DataFrame]:
-        if na_position not in ["first", "last"]:
-            raise ValueError("Param na_position must be one of 'first' or 'last'")
-        na_last = na_position == "last"
-        index_columns = self._block.index_columns
-        ordering = [
-            order.ascending_over(column, na_last)
-            if ascending
-            else order.descending_over(column, na_last)
-            for column in index_columns
-        ]
-        block = self._block.order_by(ordering)
+        if utils.get_axis_number(axis) == 0:
+            if na_position not in ["first", "last"]:
+                raise ValueError("Param na_position must be one of 'first' or 'last'")
+            na_last = na_position == "last"
+            index_columns = self._block.index_columns
+            ordering = [
+                order.ascending_over(column, na_last)
+                if ascending
+                else order.descending_over(column, na_last)
+                for column in index_columns
+            ]
+            block = self._block.order_by(ordering)
+        else:  # axis=1
+            _, indexer = self.columns.sort_values(
+                return_indexer=True, ascending=ascending, na_position=na_position  # type: ignore
+            )
+            block = self._block.select_columns(
+                [self._block.value_columns[i] for i in indexer]
+            )
         if inplace:
             self._set_block(block)
             return None
diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py
index 1ed6eeb8a5..cf5d4e6310 100644
--- a/bigframes/display/anywidget.py
+++ b/bigframes/display/anywidget.py
@@ -209,9 +209,6 @@ def _reset_batches_for_new_page_size(self) -> None:
 
     def _set_table_html(self) -> None:
         """Sets the current html data based on the current page and page size."""
-        # TODO (shuowei): BigFrames Series with db_dtypes.JSONArrowType column
-        # fails to convert to pandas DataFrame in anywidget environment due to
-        # missing handling in to_pandas_batches(). b/453561268
         # For empty dataframe, render empty table with headers.
         if self.row_count == 0:
             page_data = self._cached_data

From 1c155d04b2fd9d0ec286e4458cb5ae758e201c1a Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Mon, 20 Oct 2025 17:12:13 +0000
Subject: [PATCH 11/37] Add todo

---
 bigframes/display/anywidget.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py
index cf5d4e6310..1ed6eeb8a5 100644
--- a/bigframes/display/anywidget.py
+++ b/bigframes/display/anywidget.py
@@ -209,6 +209,9 @@ def _reset_batches_for_new_page_size(self) -> None:
 
     def _set_table_html(self) -> None:
         """Sets the current html data based on the current page and page size."""
+        # TODO (shuowei): BigFrames Series with db_dtypes.JSONArrowType column
+        # fails to convert to pandas DataFrame in anywidget environment due to
+        # missing handling in to_pandas_batches(). b/453561268
         # For empty dataframe, render empty table with headers.
         if self.row_count == 0:
             page_data = self._cached_data

From 86cb692d9ad30ca1cf36f3490ce5fb4c5ac8a0ec Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Tue, 21 Oct 2025 01:54:37 +0000
Subject: [PATCH 12/37] Fix: Handle JSON dtype in anywidget display

This commit fixes an AttributeError that occurred when displaying a
DataFrame with a JSON column in anywidget mode. The dtype check
was incorrect and has been updated. Additionally, the SQL compilation
for casting JSON to string has been corrected to use TO_JSON_STRING.
---
 .../ibis_compiler/scalar_op_registry.py       |  2 +-
 bigframes/display/anywidget.py                | 19 +++++++++++++++----
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py
index e983fc7e21..7b17aac61a 100644
--- a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py
+++ b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py
@@ -1036,7 +1036,7 @@ def astype_op_impl(x: ibis_types.Value, op: ops.AsTypeOp):
         if to_type == ibis_dtypes.bool:
             return cast_json_to_bool_in_safe(x) if op.safe else cast_json_to_bool(x)
         if to_type == ibis_dtypes.string:
-            return cast_json_to_string_in_safe(x) if op.safe else cast_json_to_string(x)
+            return to_json_string(x)
 
     # TODO: either inline this function, or push rest of this op into the function
     return bigframes.core.compile.ibis_types.cast_ibis_value(x, to_type, safe=op.safe)
diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py
index 1ed6eeb8a5..ff5a51f312 100644
--- a/bigframes/display/anywidget.py
+++ b/bigframes/display/anywidget.py
@@ -74,7 +74,21 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
                 "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use TableWidget."
             )
 
-        self._dataframe = dataframe
+        super().__init__()
+        # Workaround for Arrow bug https://github.com/apache/arrow/issues/45262
+        # JSON columns are not supported in `to_pandas_batches` and will be converted to string.
+        json_cols = [
+            col
+            for col, dtype in dataframe.dtypes.items()
+            if dtype == bigframes.dtypes.JSON_DTYPE
+        ]
+        if json_cols:
+            df_copy = dataframe.copy()
+            for col in json_cols:
+                df_copy[str(col)] = df_copy[str(col)].astype("string")
+            self._dataframe = df_copy
+        else:
+            self._dataframe = dataframe
 
         super().__init__()
 
@@ -209,9 +223,6 @@ def _reset_batches_for_new_page_size(self) -> None:
 
     def _set_table_html(self) -> None:
         """Sets the current html data based on the current page and page size."""
-        # TODO (shuowei): BigFrames Series with db_dtypes.JSONArrowType column
-        # fails to convert to pandas DataFrame in anywidget environment due to
-        # missing handling in to_pandas_batches(). b/453561268
         # For empty dataframe, render empty table with headers.
         if self.row_count == 0:
             page_data = self._cached_data

From 81013c6133fe3beeaec2dce300b03b2165ca2d79 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Tue, 21 Oct 2025 02:40:03 +0000
Subject: [PATCH 13/37] revert a change

---
 bigframes/dataframe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 0259e94132..41bc4db03c 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -784,7 +784,7 @@ def __repr__(self) -> str:
         max_results = opts.max_rows
 
         # Only deferred mode shows dry run
-        if opts.repr_mode in ("deferred"):
+        if opts.repr_mode in ("deferred", "anywidget"):
             return formatter.repr_query_job(self._compute_dry_run())
 
         # TODO(swast): pass max_columns and get the true column count back. Maybe

From 6ea72810f26c13e76b0a92ed1333ba1b91d6edbf Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Tue, 21 Oct 2025 02:41:42 +0000
Subject: [PATCH 14/37] revert a change

---
 bigframes/dataframe.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 41bc4db03c..fc60e47f7a 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -783,7 +783,8 @@ def __repr__(self) -> str:
         opts = bigframes.options.display
         max_results = opts.max_rows
 
-        # Only deferred mode shows dry run
+        # anywdiget mode uses the same display logic as the "deferred" mode
+        # for faster execution
         if opts.repr_mode in ("deferred", "anywidget"):
             return formatter.repr_query_job(self._compute_dry_run())
 

From 63b7918bba81abdef65a13cbd486b5f1bd5b364b Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Tue, 21 Oct 2025 02:49:38 +0000
Subject: [PATCH 15/37] Revert: Restore bigframes/dataframe.py to state from
 42da847

---
 bigframes/dataframe.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index fc60e47f7a..0259e94132 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -783,9 +783,8 @@ def __repr__(self) -> str:
         opts = bigframes.options.display
         max_results = opts.max_rows
 
-        # anywdiget mode uses the same display logic as the "deferred" mode
-        # for faster execution
-        if opts.repr_mode in ("deferred", "anywidget"):
+        # Only deferred mode shows dry run
+        if opts.repr_mode in ("deferred"):
             return formatter.repr_query_job(self._compute_dry_run())
 
         # TODO(swast): pass max_columns and get the true column count back. Maybe

From 4aa98797c42a93da2c3d1fb89d4293886d01d120 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Tue, 21 Oct 2025 02:57:02 +0000
Subject: [PATCH 16/37] remove anywidget from early return, allow execution
 proceeds to _repr_html_()

---
 bigframes/dataframe.py                 | 15 +++++++++++++++
 bigframes/operations/output_schemas.py |  5 +++++
 2 files changed, 20 insertions(+)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 0259e94132..5ecc123417 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -863,6 +863,21 @@ def _repr_html_(self) -> str:
                 )
                 return formatter.repr_query_job(self._compute_dry_run())
 
+            # The anywidget frontend doesn't support the db_dtypes JSON type, so
+            # convert to strings for display.
+            json_cols = [
+                series_name
+                for series_name, series in df.items()
+                if bigframes.dtypes.contains_db_dtypes_json_dtype(series.dtype)
+            ]
+            if json_cols:
+                warnings.warn(
+                    "Converting JSON columns to strings for display. "
+                    "This is temporary and will be removed when the frontend supports JSON types."
+                )
+                for col in json_cols:
+                    df[col] = df[col]._apply_unary_op(ops.json_ops.ToJSONString())
+
             # Always create a new widget instance for each display call
             # This ensures that each cell gets its own widget and prevents
             # unintended sharing between cells
diff --git a/bigframes/operations/output_schemas.py b/bigframes/operations/output_schemas.py
index ff9c9883dc..2a72d4f48f 100644
--- a/bigframes/operations/output_schemas.py
+++ b/bigframes/operations/output_schemas.py
@@ -14,6 +14,8 @@
 
 import pyarrow as pa
 
+from bigframes import dtypes
+
 
 def parse_sql_type(sql: str) -> pa.DataType:
     """
@@ -43,6 +45,9 @@ def parse_sql_type(sql: str) -> pa.DataType:
     if sql.upper() == "BOOL":
         return pa.bool_()
 
+    if sql.upper() == "JSON":
+        return dtypes.JSON_ARROW_TYPE
+
     if sql.upper().startswith("ARRAY<") and sql.endswith(">"):
         inner_type = sql[len("ARRAY<") : -1]
         return pa.list_(parse_sql_type(inner_type))

From 62d8608418bdd30e931a6ccd72e24be9ce591de5 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Tue, 21 Oct 2025 03:13:23 +0000
Subject: [PATCH 17/37] remove unnecessary changes

---
 bigframes/dataframe.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 5ecc123417..0dc8bc3d5f 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -783,8 +783,9 @@ def __repr__(self) -> str:
         opts = bigframes.options.display
         max_results = opts.max_rows
 
-        # Only deferred mode shows dry run
-        if opts.repr_mode in ("deferred"):
+        # anywdiget mode uses the same display logic as the "deferred" mode
+        # for faster execution
+        if opts.repr_mode in ("deferred", "anywidget"):
             return formatter.repr_query_job(self._compute_dry_run())
 
         # TODO(swast): pass max_columns and get the true column count back. Maybe

From 24d766d18fdd7fc8275bed76000219486bdeb828 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Tue, 21 Oct 2025 03:21:04 +0000
Subject: [PATCH 18/37] remove redundant code change

---
 bigframes/dataframe.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 0dc8bc3d5f..4fe259639e 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -23,6 +23,7 @@
 import re
 import sys
 import textwrap
+import traceback
 import typing
 from typing import (
     Any,
@@ -856,11 +857,12 @@ def _repr_html_(self) -> str:
                 import traitlets  # noqa: F401
 
                 from bigframes import display
-            except ImportError:
+            except (AttributeError, ValueError, ImportError):
+                # Fallback if anywidget is not available
                 warnings.warn(
-                    "anywidget or its dependencies are not installed. "
+                    "Anywidget mode is not available. "
                     "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. "
-                    "Falling back to deferred mode."
+                    f"Falling back to deferred mode. Error: {traceback.format_exc()}"
                 )
                 return formatter.repr_query_job(self._compute_dry_run())
 

From 9239f20cdff25191082bdd789eccdc4ff6d6b584 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Tue, 21 Oct 2025 03:54:11 +0000
Subject: [PATCH 19/37] code style change

---
 .../ibis_compiler/scalar_op_registry.py       |   2 +-
 bigframes/dataframe.py                        |  48 +++---
 notebooks/dataframes/anywidget_mode.ipynb     | 160 ++++++++++++++----
 3 files changed, 153 insertions(+), 57 deletions(-)

diff --git a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py
index 7b17aac61a..74314cd882 100644
--- a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py
+++ b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py
@@ -30,7 +30,7 @@
 from bigframes.core.compile.constants import UNIT_TO_US_CONVERSION_FACTORS
 import bigframes.core.compile.ibis_compiler.default_ordering
 from bigframes.core.compile.ibis_compiler.scalar_op_compiler import (
-    scalar_op_compiler,  # TODO(tswast): avoid import of variables
+    scalar_op_compiler,  # TODO(b/428238610): avoid import of variables
 )
 import bigframes.core.compile.ibis_types
 import bigframes.operations as ops
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 4fe259639e..38500b8fb3 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -784,7 +784,7 @@ def __repr__(self) -> str:
         opts = bigframes.options.display
         max_results = opts.max_rows
 
-        # anywdiget mode uses the same display logic as the "deferred" mode
+        # anywidget mode uses the same display logic as the "deferred" mode
         # for faster execution
         if opts.repr_mode in ("deferred", "anywidget"):
             return formatter.repr_query_job(self._compute_dry_run())
@@ -857,6 +857,29 @@ def _repr_html_(self) -> str:
                 import traitlets  # noqa: F401
 
                 from bigframes import display
+
+                # The anywidget frontend doesn't support the db_dtypes JSON type, so
+                # convert to strings for display.
+                json_cols = [
+                    series_name
+                    for series_name, series in df.items()
+                    if bigframes.dtypes.contains_db_dtypes_json_dtype(series.dtype)
+                ]
+                if json_cols:
+                    warnings.warn(
+                        "Converting JSON columns to strings for display. "
+                        "This is temporary and will be removed when the frontend supports JSON types."
+                    )
+                    for col in json_cols:
+                        df[col] = df[col]._apply_unary_op(ops.json_ops.ToJSONString())
+
+                # Always create a new widget instance for each display call
+                # This ensures that each cell gets its own widget and prevents
+                # unintended sharing between cells
+                widget = display.TableWidget(df.copy())
+
+                ipython_display(widget)
+                return ""  # Return empty string since we used display()
             except (AttributeError, ValueError, ImportError):
                 # Fallback if anywidget is not available
                 warnings.warn(
@@ -866,29 +889,6 @@ def _repr_html_(self) -> str:
                 )
                 return formatter.repr_query_job(self._compute_dry_run())
 
-            # The anywidget frontend doesn't support the db_dtypes JSON type, so
-            # convert to strings for display.
-            json_cols = [
-                series_name
-                for series_name, series in df.items()
-                if bigframes.dtypes.contains_db_dtypes_json_dtype(series.dtype)
-            ]
-            if json_cols:
-                warnings.warn(
-                    "Converting JSON columns to strings for display. "
-                    "This is temporary and will be removed when the frontend supports JSON types."
-                )
-                for col in json_cols:
-                    df[col] = df[col]._apply_unary_op(ops.json_ops.ToJSONString())
-
-            # Always create a new widget instance for each display call
-            # This ensures that each cell gets its own widget and prevents
-            # unintended sharing between cells
-            widget = display.TableWidget(df.copy())
-
-            ipython_display(widget)
-            return ""  # Return empty string since we used display()
-
         # Continue with regular HTML rendering for non-anywidget modes
         # TODO(swast): pass max_columns and get the true column count back. Maybe
         # get 1 more column than we have requested so that pandas can add the
diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb
index 88f9658009..903d88b210 100644
--- a/notebooks/dataframes/anywidget_mode.ipynb
+++ b/notebooks/dataframes/anywidget_mode.ipynb
@@ -73,25 +73,11 @@
    "id": "f289d250",
    "metadata": {},
    "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "071c0a905297406ba6c990cbbb8fc28d",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\n"
+      "Computation deferred. Computation will process 171.4 MB\n"
      ]
     }
    ],
@@ -156,7 +142,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "042a3d55c51b4a3192cf1a942c6797e8",
+       "model_id": "25b38c1408434091865f4bf9525dd069",
        "version_major": 2,
        "version_minor": 0
       },
@@ -169,23 +155,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "06e46087ecbe4e6d8bdc2ed7c9284a7d",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [],
       "text/plain": [
-       "TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
+       "Computation deferred. Computation will process 171.4 MB"
       ]
      },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [],
-      "text/plain": []
-     },
      "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
@@ -209,6 +183,18 @@
    "id": "6920d49b",
    "metadata": {},
    "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "✅ Completed. "
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
     {
      "data": {
       "text/html": [
@@ -231,7 +217,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "4da2aaa13f074229b022c7256ba7510b",
+       "model_id": "cb4f246802a1407cb966321d8724ea27",
        "version_major": 2,
        "version_minor": 0
       },
@@ -306,6 +292,20 @@
    "id": "a9d5d13a",
    "metadata": {},
    "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "✅ Completed. \n",
+       "    Query processed 171.4 MB in a moment of slot time.\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
     {
      "data": {
       "text/html": [
@@ -330,7 +330,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "d2739b180f7d40b1b52ebab121ab7a8a",
+       "model_id": "5b48c05254224c4dbce56f2793d6b661",
        "version_major": 2,
        "version_minor": 0
       },
@@ -349,10 +349,105 @@
     "print(f\"Small dataset pages: {math.ceil(small_widget.row_count / small_widget.page_size)}\")\n",
     "small_widget"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "added-cell-2",
+   "metadata": {},
+   "source": [
+    "### Displaying Generative AI results containing JSON\n",
+    "The `AI.GENERATE` function in BigQuery returns results in a JSON column. While BigQuery's JSON type is not natively supported by the underlying Arrow `to_pandas_batches()` method used in anywidget mode ([Apache Arrow issue #45262](https://github.com/apache/arrow/issues/45262)), BigQuery Dataframes automatically converts JSON columns to strings for display. This allows you to view the results of generative AI functions seamlessly."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "added-cell-1",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "✅ Completed. \n",
+       "    Query processed 85.9 kB in 14 seconds of slot time.\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n",
+      "instead of using `db_dtypes` in the future when available in pandas\n",
+      "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n",
+      "  warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n",
+      "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dataframe.py:869: UserWarning: Converting JSON columns to strings for display. This is temporary and will be removed when the frontend supports JSON types.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "✅ Completed. \n",
+       "    Query processed 0 Bytes in a moment of slot time.\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "ab607cc7263f4a159ecfe63682c5e651",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "TableWidget(page_size=10, row_count=5, table_html='<table border=\"1\" class=\"dataframe table table-striped tabl…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [],
+      "text/plain": [
+       "Computation deferred. Computation will process 0 Bytes"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "bpd._read_gbq_colab(\"\"\"\\n\n",
+    "SELECT\\n\n",
+    "  AI.GENERATE(\\n\n",
+    "    prompt=>(\\\"Extract the values.\\\", OBJ.GET_ACCESS_URL(OBJ.FETCH_METADATA(OBJ.MAKE_REF(gcs_path, \\\"us.conn\\\")), \\\"r\\\")),\\n\n",
+    "    connection_id=>\\\"bigframes-dev.us.bigframes-default-connection\\\",\\n\n",
+    "    output_schema=>\\\"publication_date string, class_international string, application_number string, filing_date string\\\") AS result,\\n\n",
+    "  *\\n\n",
+    "FROM `bigquery-public-data.labeled_patents.extracted_data`\\n\n",
+    "LIMIT 5;\\n\n",
+    "\"\"\")"
+   ]
   }
  ],
  "metadata": {
   "kernelspec": {
+   "display_name": "3.10.18",
    "display_name": "3.10.18",
    "language": "python",
    "name": "python3"
@@ -368,6 +463,7 @@
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.10.18"
+   "version": "3.10.18"
   }
  },
  "nbformat": 4,

From 48d6c665c072237bc61aa7d705663bfe0aa4ddb8 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Tue, 21 Oct 2025 04:07:52 +0000
Subject: [PATCH 20/37] tescase update

---
 tests/system/small/test_anywidget.py | 68 ++++++++++++++++++++--------
 1 file changed, 48 insertions(+), 20 deletions(-)

diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py
index 15e902ee16..40804e1853 100644
--- a/tests/system/small/test_anywidget.py
+++ b/tests/system/small/test_anywidget.py
@@ -24,7 +24,6 @@
 # Test constants to avoid change detector tests
 EXPECTED_ROW_COUNT = 6
 EXPECTED_PAGE_SIZE = 2
-EXPECTED_TOTAL_PAGES = 3
 
 
 @pytest.fixture(scope="module")
@@ -112,21 +111,19 @@ def empty_bf_df(
     return session.read_pandas(empty_pandas_df)
 
 
-def mock_execute_result_with_params(
-    self, schema, total_rows_val, arrow_batches_val, *args, **kwargs
-):
-    """
-    Mocks an execution result with configurable total_rows and arrow_batches.
-    """
-    from bigframes.session.executor import ExecuteResult
+@pytest.fixture(scope="module")
+def json_df(session: bf.Session) -> bf.dataframe.DataFrame:
+    """Create a DataFrame with a JSON column for testing."""
+    import bigframes.dtypes
 
-    return ExecuteResult(
-        iter(arrow_batches_val),
-        schema=schema,
-        query_job=None,
-        total_bytes=None,
-        total_rows=total_rows_val,
+    pandas_df = pd.DataFrame(
+        {
+            "a": [1],
+            "b": ['{"c": 2, "d": 3}'],
+        }
     )
+    pandas_df["b"] = pandas_df["b"].astype(bigframes.dtypes.JSON_DTYPE)
+    return session.read_pandas(pandas_df)
 
 
 def _assert_html_matches_pandas_slice(
@@ -438,12 +435,6 @@ def test_setting_page_size_above_max_should_be_clamped(table_widget):
     # The page size is clamped to the maximum.
     assert table_widget.page_size == expected_clamped_size
 
-    """
-    Test that the widget's CSS is loaded correctly.
-    """
-    css_content = table_widget._css
-    assert ".bigframes-widget .footer" in css_content
-
 
 @mock.patch("bigframes.display.TableWidget")
 def test_sql_anywidget_mode(mock_table_widget, session: bf.Session):
@@ -501,6 +492,43 @@ def test_struct_column_anywidget_mode(mock_display, session: bf.Session):
             assert result == ""
 
 
+def test_widget_creation_should_load_css_for_rendering(table_widget):
+    """
+    Test that the widget's CSS is loaded correctly.
+    """
+    css_content = table_widget._css
+    assert ".bigframes-widget .footer" in css_content
+
+
+@mock.patch("IPython.display.display")
+def test_json_column_anywidget_mode(mock_display, json_df: bf.dataframe.DataFrame):
+    """
+    Test that a DataFrame with a JSON column is displayed in anywidget mode
+    by converting JSON to string, and does not fall back to deferred representation.
+    """
+    with bf.option_context("display.repr_mode", "anywidget"):
+        with mock.patch(
+            "bigframes.dataframe.formatter.repr_query_job"
+        ) as mock_repr_query_job:
+            result = json_df._repr_html_()
+
+            # Assert no fallback
+            mock_repr_query_job.assert_not_called()
+
+            # Assert TableWidget was created and displayed
+            mock_display.assert_called_once()
+            widget = mock_display.call_args[0][0]
+            from bigframes.display import TableWidget
+
+            assert isinstance(widget, TableWidget)
+
+            # Assert JSON was converted to string in the HTML
+            html = widget.table_html
+            assert "{&quot;c&quot;:2,&quot;d&quot;:3}" in html
+
+            assert result == ""
+
+
 # TODO(shuowei): Add tests for custom index and multiindex
 # This may not be necessary for the SQL Cell use case but should be
 # considered for completeness.

From 4cb8cd22a6c93342d599f2e976ec05aa92b42302 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Tue, 21 Oct 2025 04:15:35 +0000
Subject: [PATCH 21/37] revert a change

---
 bigframes/core/compile/ibis_compiler/scalar_op_registry.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py
index 74314cd882..7b17aac61a 100644
--- a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py
+++ b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py
@@ -30,7 +30,7 @@
 from bigframes.core.compile.constants import UNIT_TO_US_CONVERSION_FACTORS
 import bigframes.core.compile.ibis_compiler.default_ordering
 from bigframes.core.compile.ibis_compiler.scalar_op_compiler import (
-    scalar_op_compiler,  # TODO(b/428238610): avoid import of variables
+    scalar_op_compiler,  # TODO(tswast): avoid import of variables
 )
 import bigframes.core.compile.ibis_types
 import bigframes.operations as ops

From 75a6d68e3e4c4c6474f1aaef2e257b6a0e0d1cf3 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Tue, 21 Oct 2025 04:24:10 +0000
Subject: [PATCH 22/37] final touch of notebook

---
 notebooks/dataframes/anywidget_mode.ipynb | 28 +++++++++++------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb
index 903d88b210..23be36701d 100644
--- a/notebooks/dataframes/anywidget_mode.ipynb
+++ b/notebooks/dataframes/anywidget_mode.ipynb
@@ -142,7 +142,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "25b38c1408434091865f4bf9525dd069",
+       "model_id": "f0ed74d739b64a56a6e3750968b155e1",
        "version_major": 2,
        "version_minor": 0
       },
@@ -217,7 +217,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "cb4f246802a1407cb966321d8724ea27",
+       "model_id": "fd00566103744c189a52033df9c9db7a",
        "version_major": 2,
        "version_minor": 0
       },
@@ -330,7 +330,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5b48c05254224c4dbce56f2793d6b661",
+       "model_id": "2233934e95b84a87b01b9352ca36346d",
        "version_major": 2,
        "version_minor": 0
       },
@@ -369,7 +369,7 @@
      "data": {
       "text/html": [
        "✅ Completed. \n",
-       "    Query processed 85.9 kB in 14 seconds of slot time.\n",
+       "    Query processed 85.9 kB in 11 seconds of slot time.\n",
        "    "
       ],
       "text/plain": [
@@ -408,7 +408,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ab607cc7263f4a159ecfe63682c5e651",
+       "model_id": "3e3e09d7adee4bcaa5b3540603c2418a",
        "version_major": 2,
        "version_minor": 0
       },
@@ -432,15 +432,15 @@
     }
    ],
    "source": [
-    "bpd._read_gbq_colab(\"\"\"\\n\n",
-    "SELECT\\n\n",
-    "  AI.GENERATE(\\n\n",
-    "    prompt=>(\\\"Extract the values.\\\", OBJ.GET_ACCESS_URL(OBJ.FETCH_METADATA(OBJ.MAKE_REF(gcs_path, \\\"us.conn\\\")), \\\"r\\\")),\\n\n",
-    "    connection_id=>\\\"bigframes-dev.us.bigframes-default-connection\\\",\\n\n",
-    "    output_schema=>\\\"publication_date string, class_international string, application_number string, filing_date string\\\") AS result,\\n\n",
-    "  *\\n\n",
-    "FROM `bigquery-public-data.labeled_patents.extracted_data`\\n\n",
-    "LIMIT 5;\\n\n",
+    "bpd._read_gbq_colab(\"\"\"\n",
+    "  SELECT\n",
+    "    AI.GENERATE(\n",
+    "      prompt=>(\\\"Extract the values.\\\", OBJ.GET_ACCESS_URL(OBJ.FETCH_METADATA(OBJ.MAKE_REF(gcs_path, \\\"us.conn\\\")), \\\"r\\\")),\n",
+    "      connection_id=>\\\"bigframes-dev.us.bigframes-default-connection\\\",\n",
+    "      output_schema=>\\\"publication_date string, class_international string, application_number string, filing_date string\\\") AS result,\n",
+    "    *\n",
+    "  FROM `bigquery-public-data.labeled_patents.extracted_data`\n",
+    "  LIMIT 5;\n",
     "\"\"\")"
    ]
   }

From 8dc2171ee13b43b7d9a776fae960f0c27e3b03dd Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Tue, 21 Oct 2025 17:21:16 +0000
Subject: [PATCH 23/37] fix presumbit error

---
 bigframes/operations/output_schemas.py    |  5 ----
 notebooks/dataframes/anywidget_mode.ipynb | 34 +++++++++++------------
 tests/system/small/test_anywidget.py      | 22 +++++++--------
 3 files changed, 27 insertions(+), 34 deletions(-)

diff --git a/bigframes/operations/output_schemas.py b/bigframes/operations/output_schemas.py
index 2a72d4f48f..ff9c9883dc 100644
--- a/bigframes/operations/output_schemas.py
+++ b/bigframes/operations/output_schemas.py
@@ -14,8 +14,6 @@
 
 import pyarrow as pa
 
-from bigframes import dtypes
-
 
 def parse_sql_type(sql: str) -> pa.DataType:
     """
@@ -45,9 +43,6 @@ def parse_sql_type(sql: str) -> pa.DataType:
     if sql.upper() == "BOOL":
         return pa.bool_()
 
-    if sql.upper() == "JSON":
-        return dtypes.JSON_ARROW_TYPE
-
     if sql.upper().startswith("ARRAY<") and sql.endswith(">"):
         inner_type = sql[len("ARRAY<") : -1]
         return pa.list_(parse_sql_type(inner_type))
diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb
index 23be36701d..154afea7e1 100644
--- a/notebooks/dataframes/anywidget_mode.ipynb
+++ b/notebooks/dataframes/anywidget_mode.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
    "id": "d10bfca4",
    "metadata": {},
    "outputs": [],
@@ -32,7 +32,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "id": "ca22f059",
    "metadata": {},
    "outputs": [],
@@ -50,7 +50,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "id": "1bc5aaf3",
    "metadata": {},
    "outputs": [],
@@ -69,7 +69,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "id": "f289d250",
    "metadata": {},
    "outputs": [
@@ -96,7 +96,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "id": "42bb02ab",
    "metadata": {},
    "outputs": [
@@ -123,7 +123,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "id": "ce250157",
    "metadata": {},
    "outputs": [
@@ -142,7 +142,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "f0ed74d739b64a56a6e3750968b155e1",
+       "model_id": "6e46f6d1352043a4baee57fa089f2b0c",
        "version_major": 2,
        "version_minor": 0
       },
@@ -160,7 +160,7 @@
        "Computation deferred. Computation will process 171.4 MB"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -179,7 +179,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "id": "6920d49b",
    "metadata": {},
    "outputs": [
@@ -217,7 +217,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "fd00566103744c189a52033df9c9db7a",
+       "model_id": "88d370b617b545809eb7bb8e5c66ea0e",
        "version_major": 2,
        "version_minor": 0
       },
@@ -251,7 +251,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
    "id": "12b68f15",
    "metadata": {},
    "outputs": [
@@ -288,7 +288,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
    "id": "a9d5d13a",
    "metadata": {},
    "outputs": [
@@ -330,7 +330,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "2233934e95b84a87b01b9352ca36346d",
+       "model_id": "dec19e8788b74219b88bccfc65e3b9c0",
        "version_major": 2,
        "version_minor": 0
       },
@@ -361,7 +361,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
    "id": "added-cell-1",
    "metadata": {},
    "outputs": [
@@ -369,7 +369,7 @@
      "data": {
       "text/html": [
        "✅ Completed. \n",
-       "    Query processed 85.9 kB in 11 seconds of slot time.\n",
+       "    Query processed 85.9 kB in 21 seconds of slot time.\n",
        "    "
       ],
       "text/plain": [
@@ -408,7 +408,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "3e3e09d7adee4bcaa5b3540603c2418a",
+       "model_id": "774357b4083c47c8a5e1fd33bb6af188",
        "version_major": 2,
        "version_minor": 0
       },
@@ -426,7 +426,7 @@
        "Computation deferred. Computation will process 0 Bytes"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py
index 40804e1853..890d591de5 100644
--- a/tests/system/small/test_anywidget.py
+++ b/tests/system/small/test_anywidget.py
@@ -62,8 +62,7 @@ def table_widget(paginated_bf_df: bf.dataframe.DataFrame):
     Helper fixture to create a TableWidget instance with a fixed page size.
     This reduces duplication across tests that use the same widget configuration.
     """
-
-    from bigframes.display import TableWidget
+    from bigframes.display.anywidget import TableWidget
 
     with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 2):
         # Delay context manager cleanup of `max_rows` until after tests finish.
@@ -92,7 +91,7 @@ def small_bf_df(
 @pytest.fixture
 def small_widget(small_bf_df):
     """Helper fixture for tests using a DataFrame smaller than the page size."""
-    from bigframes.display import TableWidget
+    from bigframes.display.anywidget import TableWidget
 
     with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 5):
         yield TableWidget(small_bf_df)
@@ -152,10 +151,11 @@ def test_widget_initialization_should_calculate_total_row_count(
     paginated_bf_df: bf.dataframe.DataFrame,
 ):
     """A TableWidget should correctly calculate the total row count on creation."""
-    from bigframes.display import TableWidget
+    from bigframes.display.anywidget import TableWidget
 
     with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 2):
         widget = TableWidget(paginated_bf_df)
+        widget = TableWidget(paginated_bf_df)
 
     assert widget.row_count == EXPECTED_ROW_COUNT
 
@@ -266,7 +266,7 @@ def test_widget_pagination_should_work_with_custom_page_size(
     A widget should paginate correctly with a custom page size of 3.
     """
     with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 3):
-        from bigframes.display import TableWidget
+        from bigframes.display.anywidget import TableWidget
 
         widget = TableWidget(paginated_bf_df)
         assert widget.page_size == 3
@@ -312,7 +312,7 @@ def test_widget_page_size_should_be_immutable_after_creation(
     by subsequent changes to global options.
     """
     with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 2):
-        from bigframes.display import TableWidget
+        from bigframes.display.anywidget import TableWidget
 
         widget = TableWidget(paginated_bf_df)
         assert widget.page_size == 2
@@ -331,7 +331,7 @@ def test_widget_page_size_should_be_immutable_after_creation(
 def test_empty_widget_should_have_zero_row_count(empty_bf_df: bf.dataframe.DataFrame):
     """Given an empty DataFrame, the widget's row count should be 0."""
     with bf.option_context("display.repr_mode", "anywidget"):
-        from bigframes.display import TableWidget
+        from bigframes.display.anywidget import TableWidget
 
         widget = TableWidget(empty_bf_df)
 
@@ -341,7 +341,7 @@ def test_empty_widget_should_have_zero_row_count(empty_bf_df: bf.dataframe.DataF
 def test_empty_widget_should_render_table_headers(empty_bf_df: bf.dataframe.DataFrame):
     """Given an empty DataFrame, the widget should still render table headers."""
     with bf.option_context("display.repr_mode", "anywidget"):
-        from bigframes.display import TableWidget
+        from bigframes.display.anywidget import TableWidget
 
         widget = TableWidget(empty_bf_df)
 
@@ -477,10 +477,8 @@ def test_struct_column_anywidget_mode(mock_display, session: bf.Session):
             # Assert that we did NOT fall back to the deferred representation.
             mock_repr_query_job.assert_not_called()
 
-            # Assert that display was called with a TableWidget
-            mock_display.assert_called_once()
             widget = mock_display.call_args[0][0]
-            from bigframes.display import TableWidget
+            from bigframes.display.anywidget import TableWidget
 
             assert isinstance(widget, TableWidget)
 
@@ -518,7 +516,7 @@ def test_json_column_anywidget_mode(mock_display, json_df: bf.dataframe.DataFram
             # Assert TableWidget was created and displayed
             mock_display.assert_called_once()
             widget = mock_display.call_args[0][0]
-            from bigframes.display import TableWidget
+            from bigframes.display.anywidget import TableWidget
 
             assert isinstance(widget, TableWidget)
 

From 2adc426e9b97ea49397a1ce19ec30ca304af4410 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Tue, 21 Oct 2025 17:43:03 +0000
Subject: [PATCH 24/37] remove invlaid test with anywidget bug fix

---
 tests/system/small/test_series.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index 5ace3f54d8..63c2f6c498 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -4077,7 +4077,6 @@ def test_json_astype_others(data, to_type, errors):
         pytest.param(["10.2", None], dtypes.INT_DTYPE, id="to_int"),
         pytest.param(["false", None], dtypes.FLOAT_DTYPE, id="to_float"),
         pytest.param(["10.2", None], dtypes.BOOL_DTYPE, id="to_bool"),
-        pytest.param(["true", None], dtypes.STRING_DTYPE, id="to_string"),
     ],
 )
 def test_json_astype_others_raise_error(data, to_type):

From faf1bb2d4f7123084a0ba0d09d5414c26fa02a11 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Tue, 21 Oct 2025 22:33:00 +0000
Subject: [PATCH 25/37] fix presubmit

---
 bigframes/series.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/bigframes/series.py b/bigframes/series.py
index ad1f091803..e90a360418 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -609,6 +609,15 @@ def astype(
         if errors not in ["raise", "null"]:
             raise ValueError("Argument 'errors' must be one of 'raise' or 'null'")
         dtype = bigframes.dtypes.bigframes_type(dtype)
+
+        # BigQuery doesn't support CAST(json_col AS STRING), but it does support
+        # TO_JSON_STRING(json_col).
+        if (
+            self.dtype == bigframes.dtypes.JSON_DTYPE
+            and dtype == bigframes.dtypes.STRING_DTYPE
+        ):
+            return self._apply_unary_op(ops.json_ops.ToJSONString())
+
         return self._apply_unary_op(
             bigframes.operations.AsTypeOp(to_type=dtype, safe=(errors == "null"))
         )

From 7a83b804f27dd4216f90c21bf13885958beec924 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Tue, 21 Oct 2025 23:42:19 +0000
Subject: [PATCH 26/37] fix polar complier

---
 bigframes/core/compile/polars/compiler.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/bigframes/core/compile/polars/compiler.py b/bigframes/core/compile/polars/compiler.py
index acaf1b8f22..1a55cef63a 100644
--- a/bigframes/core/compile/polars/compiler.py
+++ b/bigframes/core/compile/polars/compiler.py
@@ -407,6 +407,19 @@ def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
             assert isinstance(op, json_ops.JSONDecode)
             return input.str.json_decode(_DTYPE_MAPPING[op.to_type])
 
+        @compile_op.register(json_ops.ToJSONString)
+        def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
+            return input.str.json_decode(pl.String())
+
+        @compile_op.register(json_ops.ParseJSON)
+        def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
+            return input.str.json_decode(pl.String())
+
+        @compile_op.register(json_ops.JSONExtract)
+        def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
+            assert isinstance(op, json_ops.JSONExtract)
+            return input.str.json_extract(json_path=op.json_path)
+
         @compile_op.register(arr_ops.ToArrayOp)
         def _(self, op: ops.ToArrayOp, *inputs: pl.Expr) -> pl.Expr:
             return pl.concat_list(*inputs)

From 233e857acfeb1d8fdfc47e90391ccc555054272e Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Tue, 21 Oct 2025 23:49:03 +0000
Subject: [PATCH 27/37] Revert an unnecessary change

---
 bigframes/operations/output_schemas.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/bigframes/operations/output_schemas.py b/bigframes/operations/output_schemas.py
index ff9c9883dc..2a72d4f48f 100644
--- a/bigframes/operations/output_schemas.py
+++ b/bigframes/operations/output_schemas.py
@@ -14,6 +14,8 @@
 
 import pyarrow as pa
 
+from bigframes import dtypes
+
 
 def parse_sql_type(sql: str) -> pa.DataType:
     """
@@ -43,6 +45,9 @@ def parse_sql_type(sql: str) -> pa.DataType:
     if sql.upper() == "BOOL":
         return pa.bool_()
 
+    if sql.upper() == "JSON":
+        return dtypes.JSON_ARROW_TYPE
+
     if sql.upper().startswith("ARRAY<") and sql.endswith(">"):
         inner_type = sql[len("ARRAY<") : -1]
         return pa.list_(parse_sql_type(inner_type))

From 11daddb7ebb22e6544dfd4fb2572b4c7b630ff00 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Mon, 27 Oct 2025 22:27:23 +0000
Subject: [PATCH 28/37] apply the workaround to i/O layer

---
 bigframes/core/compile/polars/compiler.py |   42 +-
 bigframes/dataframe.py                    |   17 +-
 tests/system/small/test_dataframe.py      | 6151 +--------------------
 3 files changed, 60 insertions(+), 6150 deletions(-)

diff --git a/bigframes/core/compile/polars/compiler.py b/bigframes/core/compile/polars/compiler.py
index 1a55cef63a..681ca37da7 100644
--- a/bigframes/core/compile/polars/compiler.py
+++ b/bigframes/core/compile/polars/compiler.py
@@ -45,13 +45,13 @@
 polars_installed = True
 if TYPE_CHECKING:
     import polars as pl
+    import pyarrow as pa
 else:
     try:
         import bigframes._importing
 
-        # Use import_polars() instead of importing directly so that we check
-        # the version numbers.
         pl = bigframes._importing.import_polars()
+        import pyarrow as pa
     except Exception:
         polars_installed = False
 
@@ -409,11 +409,13 @@ def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
 
         @compile_op.register(json_ops.ToJSONString)
         def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
-            return input.str.json_decode(pl.String())
+            # Convert JSON to string representation
+            return input.cast(pl.String())
 
         @compile_op.register(json_ops.ParseJSON)
         def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
-            return input.str.json_decode(pl.String())
+            # Parse string as JSON - this should decode, not encode
+            return input.str.json_decode()
 
         @compile_op.register(json_ops.JSONExtract)
         def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
@@ -599,9 +601,35 @@ def compile_readlocal(self, node: nodes.ReadLocalNode):
                 scan_item.source_id: scan_item.id.sql
                 for scan_item in node.scan_list.items
             }
-            lazy_frame = cast(
-                pl.DataFrame, pl.from_arrow(node.local_data_source.data)
-            ).lazy()
+
+            # Workaround for PyArrow bug https://github.com/apache/arrow/issues/45262
+            # Convert JSON columns to strings before Polars processing
+            arrow_data = node.local_data_source.data
+            schema = arrow_data.schema
+
+            # Check if any columns are JSON type
+            json_field_indices = [
+                i
+                for i, field in enumerate(schema)
+                if pa.types.is_extension_type(field.type)
+                and field.type.extension_name == "google:sqlType:json"
+            ]
+
+            if json_field_indices:
+                # Convert JSON columns to string columns
+                new_arrays = []
+                new_fields = []
+                for i, field in enumerate(schema):
+                    if i in json_field_indices:
+                        # Cast JSON to string
+                        new_arrays.append(arrow_data.column(i).cast(pa.string()))
+                        new_fields.append(pa.field(field.name, pa.string()))
+                    else:
+                        new_arrays.append(arrow_data.column(i))
+                        new_fields.append(field)
+                arrow_data = pa.table(new_arrays, schema=pa.schema(new_fields))
+
+            lazy_frame = cast(pl.DataFrame, pl.from_arrow(arrow_data)).lazy()
             lazy_frame = lazy_frame.select(cols_to_read.keys()).rename(cols_to_read)
             if node.offsets_col:
                 lazy_frame = lazy_frame.with_columns(
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 38500b8fb3..788a47f38b 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -1965,7 +1965,22 @@ def _to_pandas_batches(
         *,
         allow_large_results: Optional[bool] = None,
     ) -> blocks.PandasBatches:
-        return self._block.to_pandas_batches(
+        # Workaround for PyArrow bug https://github.com/apache/arrow/issues/45262
+        # JSON columns are not supported in to_pandas_batches
+        json_cols = [
+            str(col_name)  # Cast to string
+            for col_name, dtype in self.dtypes.items()
+            if bigframes.dtypes.contains_db_dtypes_json_dtype(dtype)
+        ]
+
+        df = self
+        if json_cols:
+            # Convert JSON columns to strings before materialization
+            df = df.copy()
+            for col in json_cols:
+                df[col] = df[col].astype("string")
+
+        return df._block.to_pandas_batches(
             page_size=page_size,
             max_results=max_results,
             allow_large_results=allow_large_results,
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 79f8efd00f..ffd9bc512b 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -1,6144 +1,11 @@
-# Copyright 2023 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+def test_to_pandas_batches_with_json_columns(session):
+    """Test that JSON columns are properly handled in to_pandas_batches."""
+    # Create a DataFrame with JSON column
+    df = session.read_gbq('SELECT JSON \'{"key": "value"}\' as json_col')
 
-import io
-import operator
-import sys
-import tempfile
-import typing
-from typing import Dict, List, Tuple
+    # This should not raise an error
+    batches = df._to_pandas_batches(page_size=10)
+    result = next(batches)
 
-import geopandas as gpd  # type: ignore
-import numpy as np
-import pandas as pd
-import pandas.testing
-import pyarrow as pa  # type: ignore
-import pytest
-
-import bigframes
-import bigframes._config.display_options as display_options
-import bigframes.core.indexes as bf_indexes
-import bigframes.dataframe as dataframe
-import bigframes.dtypes as dtypes
-import bigframes.pandas as bpd
-import bigframes.series as series
-from bigframes.testing.utils import (
-    assert_dfs_equivalent,
-    assert_pandas_df_equal,
-    assert_series_equal,
-    assert_series_equivalent,
-)
-
-
-def test_df_construct_copy(scalars_dfs):
-    columns = ["int64_col", "string_col", "float64_col"]
-    scalars_df, scalars_pandas_df = scalars_dfs
-    # Make the mapping from label to col_id non-trivial
-    bf_df = scalars_df.copy()
-    bf_df["int64_col"] = bf_df["int64_col"] / 2
-    pd_df = scalars_pandas_df.copy()
-    pd_df["int64_col"] = pd_df["int64_col"] / 2
-
-    bf_result = dataframe.DataFrame(bf_df, columns=columns).to_pandas()
-
-    pd_result = pd.DataFrame(pd_df, columns=columns)
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_df_construct_pandas_default(scalars_dfs):
-    # This should trigger the inlined codepath
-    columns = [
-        "int64_too",
-        "int64_col",
-        "float64_col",
-        "bool_col",
-        "string_col",
-        "date_col",
-        "datetime_col",
-        "numeric_col",
-        "float64_col",
-        "time_col",
-        "timestamp_col",
-    ]
-    _, scalars_pandas_df = scalars_dfs
-    bf_result = dataframe.DataFrame(scalars_pandas_df, columns=columns).to_pandas()
-    pd_result = pd.DataFrame(scalars_pandas_df, columns=columns)
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-@pytest.mark.parametrize(
-    ("write_engine"),
-    [
-        ("bigquery_inline"),
-        ("bigquery_load"),
-        ("bigquery_streaming"),
-        ("bigquery_write"),
-    ],
-)
-def test_read_pandas_all_nice_types(
-    session: bigframes.Session, scalars_pandas_df_index: pd.DataFrame, write_engine
-):
-    bf_result = session.read_pandas(
-        scalars_pandas_df_index, write_engine=write_engine
-    ).to_pandas()
-    pandas.testing.assert_frame_equal(bf_result, scalars_pandas_df_index)
-
-
-def test_df_construct_large_strings():
-    data = [["hello", "w" + "o" * 50000 + "rld"]]
-    bf_result = dataframe.DataFrame(data).to_pandas()
-    pd_result = pd.DataFrame(data, dtype=pd.StringDtype(storage="pyarrow"))
-    pandas.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False)
-
-
-def test_df_construct_pandas_load_job(scalars_dfs_maybe_ordered):
-    # This should trigger the inlined codepath
-    columns = [
-        "int64_too",
-        "int64_col",
-        "float64_col",
-        "bool_col",
-        "string_col",
-        "date_col",
-        "datetime_col",
-        "numeric_col",
-        "float64_col",
-        "time_col",
-        "timestamp_col",
-        "geography_col",
-    ]
-    _, scalars_pandas_df = scalars_dfs_maybe_ordered
-    bf_result = dataframe.DataFrame(scalars_pandas_df, columns=columns)
-    pd_result = pd.DataFrame(scalars_pandas_df, columns=columns)
-    assert_dfs_equivalent(pd_result, bf_result)
-
-
-def test_df_construct_structs(session):
-    pd_frame = pd.Series(
-        [
-            {"version": 1, "project": "pandas"},
-            {"version": 2, "project": "pandas"},
-            {"version": 1, "project": "numpy"},
-        ]
-    ).to_frame()
-    bf_series = session.read_pandas(pd_frame)
-    pd.testing.assert_frame_equal(
-        bf_series.to_pandas(), pd_frame, check_index_type=False, check_dtype=False
-    )
-
-
-def test_df_construct_local_concat_pd(scalars_pandas_df_index, session):
-    pd_df = pd.concat([scalars_pandas_df_index, scalars_pandas_df_index])
-
-    bf_df = session.read_pandas(pd_df)
-
-    pd.testing.assert_frame_equal(
-        bf_df.to_pandas(), pd_df, check_index_type=False, check_dtype=False
-    )
-
-
-def test_df_construct_pandas_set_dtype(scalars_dfs):
-    columns = [
-        "int64_too",
-        "int64_col",
-        "float64_col",
-        "bool_col",
-    ]
-    _, scalars_pandas_df = scalars_dfs
-    bf_result = dataframe.DataFrame(
-        scalars_pandas_df, columns=columns, dtype="Float64"
-    ).to_pandas()
-    pd_result = pd.DataFrame(scalars_pandas_df, columns=columns, dtype="Float64")
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_df_construct_from_series(scalars_dfs_maybe_ordered):
-    scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
-    bf_result = dataframe.DataFrame(
-        {"a": scalars_df["int64_col"], "b": scalars_df["string_col"]},
-        dtype="string[pyarrow]",
-    )
-    pd_result = pd.DataFrame(
-        {"a": scalars_pandas_df["int64_col"], "b": scalars_pandas_df["string_col"]},
-        dtype="string[pyarrow]",
-    )
-    assert_dfs_equivalent(pd_result, bf_result)
-
-
-def test_df_construct_from_dict():
-    input_dict = {
-        "Animal": ["Falcon", "Falcon", "Parrot", "Parrot"],
-        # With a space in column name. We use standardized SQL schema ids to solve the problem that BQ schema doesn't support column names with spaces. b/296751058
-        "Max Speed": [380.0, 370.0, 24.0, 26.0],
-    }
-    bf_result = dataframe.DataFrame(input_dict).to_pandas()
-    pd_result = pd.DataFrame(input_dict)
-
-    pandas.testing.assert_frame_equal(
-        bf_result, pd_result, check_dtype=False, check_index_type=False
-    )
-
-
-@pytest.mark.parametrize(
-    ("json_type"),
-    [
-        pytest.param(dtypes.JSON_DTYPE),
-        pytest.param("json"),
-    ],
-)
-def test_df_construct_w_json_dtype(json_type):
-    data = [
-        "1",
-        "false",
-        '["a", {"b": 1}, null]',
-        None,
-    ]
-    df = dataframe.DataFrame({"json_col": data}, dtype=json_type)
-
-    assert df["json_col"].dtype == dtypes.JSON_DTYPE
-    assert df["json_col"][1] == "false"
-
-
-def test_df_construct_inline_respects_location(reset_default_session_and_location):
-    # Note: This starts a thread-local session.
-    with bpd.option_context("bigquery.location", "europe-west1"):
-        df = bpd.DataFrame([[1, 2, 3], [4, 5, 6]])
-        df.to_gbq()
-        assert df.query_job is not None
-        table = bpd.get_global_session().bqclient.get_table(df.query_job.destination)
-
-        assert table.location == "europe-west1"
-
-
-def test_df_construct_dtype():
-    data = {
-        "int_col": [1, 2, 3],
-        "string_col": ["1.1", "2.0", "3.5"],
-        "float_col": [1.0, 2.0, 3.0],
-    }
-    dtype = pd.StringDtype(storage="pyarrow")
-    bf_result = dataframe.DataFrame(data, dtype=dtype)
-    pd_result = pd.DataFrame(data, dtype=dtype)
-    pd_result.index = pd_result.index.astype("Int64")
-    pandas.testing.assert_frame_equal(bf_result.to_pandas(), pd_result)
-
-
-def test_get_column(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    col_name = "int64_col"
-    series = scalars_df[col_name]
-    bf_result = series.to_pandas()
-    pd_result = scalars_pandas_df[col_name]
-    assert_series_equal(bf_result, pd_result)
-
-
-def test_get_column_nonstring(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    series = scalars_df.rename(columns={"int64_col": 123.1})[123.1]
-    bf_result = series.to_pandas()
-    pd_result = scalars_pandas_df.rename(columns={"int64_col": 123.1})[123.1]
-    assert_series_equal(bf_result, pd_result)
-
-
-@pytest.mark.parametrize(
-    "row_slice",
-    [
-        (slice(1, 7, 2)),
-        (slice(1, 7, None)),
-        (slice(None, -3, None)),
-    ],
-)
-def test_get_rows_with_slice(scalars_dfs, row_slice):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = scalars_df[row_slice].to_pandas()
-    pd_result = scalars_pandas_df[row_slice]
-    assert_pandas_df_equal(bf_result, pd_result)
-
-
-def test_hasattr(scalars_dfs):
-    scalars_df, _ = scalars_dfs
-    assert hasattr(scalars_df, "int64_col")
-    assert hasattr(scalars_df, "head")
-    assert not hasattr(scalars_df, "not_exist")
-
-
-@pytest.mark.parametrize(
-    ("ordered"),
-    [
-        (True),
-        (False),
-    ],
-)
-def test_head_with_custom_column_labels(
-    scalars_df_index, scalars_pandas_df_index, ordered
-):
-    rename_mapping = {
-        "int64_col": "Integer Column",
-        "string_col": "言語列",
-    }
-    bf_df = scalars_df_index.rename(columns=rename_mapping).head(3)
-    bf_result = bf_df.to_pandas(ordered=ordered)
-    pd_result = scalars_pandas_df_index.rename(columns=rename_mapping).head(3)
-    assert_pandas_df_equal(bf_result, pd_result, ignore_order=not ordered)
-
-
-def test_tail_with_custom_column_labels(scalars_df_index, scalars_pandas_df_index):
-    rename_mapping = {
-        "int64_col": "Integer Column",
-        "string_col": "言語列",
-    }
-    bf_df = scalars_df_index.rename(columns=rename_mapping).tail(3)
-    bf_result = bf_df.to_pandas()
-    pd_result = scalars_pandas_df_index.rename(columns=rename_mapping).tail(3)
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-@pytest.mark.parametrize(
-    ("keep",),
-    [
-        ("first",),
-        ("last",),
-        ("all",),
-    ],
-)
-def test_df_nlargest(scalars_df_index, scalars_pandas_df_index, keep):
-    bf_result = scalars_df_index.nlargest(3, ["bool_col", "int64_too"], keep=keep)
-    pd_result = scalars_pandas_df_index.nlargest(
-        3, ["bool_col", "int64_too"], keep=keep
-    )
-
-    pd.testing.assert_frame_equal(
-        bf_result.to_pandas(),
-        pd_result,
-    )
-
-
-@pytest.mark.parametrize(
-    ("keep",),
-    [
-        ("first",),
-        ("last",),
-        ("all",),
-    ],
-)
-def test_df_nsmallest(scalars_df_index, scalars_pandas_df_index, keep):
-    bf_result = scalars_df_index.nsmallest(6, ["bool_col"], keep=keep)
-    pd_result = scalars_pandas_df_index.nsmallest(6, ["bool_col"], keep=keep)
-
-    pd.testing.assert_frame_equal(
-        bf_result.to_pandas(),
-        pd_result,
-    )
-
-
-def test_get_column_by_attr(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    series = scalars_df.int64_col
-    bf_result = series.to_pandas()
-    pd_result = scalars_pandas_df.int64_col
-    assert_series_equal(bf_result, pd_result)
-
-
-def test_get_columns(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    col_names = ["bool_col", "float64_col", "int64_col"]
-    df_subset = scalars_df.get(col_names)
-    df_pandas = df_subset.to_pandas()
-    pd.testing.assert_index_equal(
-        df_pandas.columns, scalars_pandas_df[col_names].columns
-    )
-
-
-def test_get_columns_default(scalars_dfs):
-    scalars_df, _ = scalars_dfs
-    col_names = ["not", "column", "names"]
-    result = scalars_df.get(col_names, "default_val")
-    assert result == "default_val"
-
-
-@pytest.mark.parametrize(
-    ("loc", "column", "value", "allow_duplicates"),
-    [
-        (0, 666, 2, False),
-        (5, "float64_col", 2.2, True),
-        (13, "rowindex_2", [8, 7, 6, 5, 4, 3, 2, 1, 0], True),
-        pytest.param(
-            14,
-            "test",
-            2,
-            False,
-            marks=pytest.mark.xfail(
-                raises=IndexError,
-            ),
-        ),
-        pytest.param(
-            12,
-            "int64_col",
-            2,
-            False,
-            marks=pytest.mark.xfail(
-                raises=ValueError,
-            ),
-        ),
-    ],
-)
-def test_insert(scalars_dfs, loc, column, value, allow_duplicates):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    # insert works inplace, so will influence other tests.
-    # make a copy to avoid inplace changes.
-    bf_df = scalars_df.copy()
-    pd_df = scalars_pandas_df.copy()
-    bf_df.insert(loc, column, value, allow_duplicates)
-    pd_df.insert(loc, column, value, allow_duplicates)
-
-    pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df, check_dtype=False)
-
-
-def test_mask_series_cond(scalars_df_index, scalars_pandas_df_index):
-    cond_bf = scalars_df_index["int64_col"] > 0
-    cond_pd = scalars_pandas_df_index["int64_col"] > 0
-
-    bf_df = scalars_df_index[["int64_too", "int64_col", "float64_col"]]
-    pd_df = scalars_pandas_df_index[["int64_too", "int64_col", "float64_col"]]
-    bf_result = bf_df.mask(cond_bf, bf_df + 1).to_pandas()
-    pd_result = pd_df.mask(cond_pd, pd_df + 1)
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_mask_callable(scalars_df_index, scalars_pandas_df_index):
-    def is_positive(x):
-        return x > 0
-
-    bf_df = scalars_df_index[["int64_too", "int64_col", "float64_col"]]
-    pd_df = scalars_pandas_df_index[["int64_too", "int64_col", "float64_col"]]
-    bf_result = bf_df.mask(cond=is_positive, other=lambda x: x + 1).to_pandas()
-    pd_result = pd_df.mask(cond=is_positive, other=lambda x: x + 1)
-
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_where_multi_column(scalars_df_index, scalars_pandas_df_index):
-    # Test when a dataframe has multi-columns.
-    columns = ["int64_col", "float64_col"]
-    dataframe_bf = scalars_df_index[columns]
-
-    dataframe_bf.columns = pd.MultiIndex.from_tuples(
-        [("str1", 1), ("str2", 2)], names=["STR", "INT"]
-    )
-    cond_bf = dataframe_bf["str1"] > 0
-
-    with pytest.raises(NotImplementedError) as context:
-        dataframe_bf.where(cond_bf).to_pandas()
-    assert (
-        str(context.value)
-        == "The dataframe.where() method does not support multi-column."
-    )
-
-
-def test_where_series_cond(scalars_df_index, scalars_pandas_df_index):
-    # Condition is dataframe, other is None (as default).
-    cond_bf = scalars_df_index["int64_col"] > 0
-    cond_pd = scalars_pandas_df_index["int64_col"] > 0
-    bf_result = scalars_df_index.where(cond_bf).to_pandas()
-    pd_result = scalars_pandas_df_index.where(cond_pd)
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_where_series_cond_const_other(scalars_df_index, scalars_pandas_df_index):
-    # Condition is a series, other is a constant.
-    columns = ["int64_col", "float64_col"]
-    dataframe_bf = scalars_df_index[columns]
-    dataframe_pd = scalars_pandas_df_index[columns]
-    dataframe_bf.columns.name = "test_name"
-    dataframe_pd.columns.name = "test_name"
-
-    cond_bf = dataframe_bf["int64_col"] > 0
-    cond_pd = dataframe_pd["int64_col"] > 0
-    other = 0
-
-    bf_result = dataframe_bf.where(cond_bf, other).to_pandas()
-    pd_result = dataframe_pd.where(cond_pd, other)
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_where_series_cond_dataframe_other(scalars_df_index, scalars_pandas_df_index):
-    # Condition is a series, other is a dataframe.
-    columns = ["int64_col", "float64_col"]
-    dataframe_bf = scalars_df_index[columns]
-    dataframe_pd = scalars_pandas_df_index[columns]
-
-    cond_bf = dataframe_bf["int64_col"] > 0
-    cond_pd = dataframe_pd["int64_col"] > 0
-    other_bf = -dataframe_bf
-    other_pd = -dataframe_pd
-
-    bf_result = dataframe_bf.where(cond_bf, other_bf).to_pandas()
-    pd_result = dataframe_pd.where(cond_pd, other_pd)
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_where_dataframe_cond(scalars_df_index, scalars_pandas_df_index):
-    # Condition is a dataframe, other is None.
-    columns = ["int64_col", "float64_col"]
-    dataframe_bf = scalars_df_index[columns]
-    dataframe_pd = scalars_pandas_df_index[columns]
-
-    cond_bf = dataframe_bf > 0
-    cond_pd = dataframe_pd > 0
-
-    bf_result = dataframe_bf.where(cond_bf, None).to_pandas()
-    pd_result = dataframe_pd.where(cond_pd, None)
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_where_dataframe_cond_const_other(scalars_df_index, scalars_pandas_df_index):
-    # Condition is a dataframe, other is a constant.
-    columns = ["int64_col", "float64_col"]
-    dataframe_bf = scalars_df_index[columns]
-    dataframe_pd = scalars_pandas_df_index[columns]
-
-    cond_bf = dataframe_bf > 0
-    cond_pd = dataframe_pd > 0
-    other_bf = 10
-    other_pd = 10
-
-    bf_result = dataframe_bf.where(cond_bf, other_bf).to_pandas()
-    pd_result = dataframe_pd.where(cond_pd, other_pd)
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_where_dataframe_cond_dataframe_other(
-    scalars_df_index, scalars_pandas_df_index
-):
-    # Condition is a dataframe, other is a dataframe.
-    columns = ["int64_col", "float64_col"]
-    dataframe_bf = scalars_df_index[columns]
-    dataframe_pd = scalars_pandas_df_index[columns]
-
-    cond_bf = dataframe_bf > 0
-    cond_pd = dataframe_pd > 0
-    other_bf = dataframe_bf * 2
-    other_pd = dataframe_pd * 2
-
-    bf_result = dataframe_bf.where(cond_bf, other_bf).to_pandas()
-    pd_result = dataframe_pd.where(cond_pd, other_pd)
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_where_callable_cond_constant_other(scalars_df_index, scalars_pandas_df_index):
-    # Condition is callable, other is a constant.
-    columns = ["int64_col", "float64_col"]
-    dataframe_bf = scalars_df_index[columns]
-    dataframe_pd = scalars_pandas_df_index[columns]
-
-    other = 10
-
-    bf_result = dataframe_bf.where(lambda x: x > 0, other).to_pandas()
-    pd_result = dataframe_pd.where(lambda x: x > 0, other)
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_where_dataframe_cond_callable_other(scalars_df_index, scalars_pandas_df_index):
-    # Condition is a dataframe, other is callable.
-    columns = ["int64_col", "float64_col"]
-    dataframe_bf = scalars_df_index[columns]
-    dataframe_pd = scalars_pandas_df_index[columns]
-
-    cond_bf = dataframe_bf > 0
-    cond_pd = dataframe_pd > 0
-
-    def func(x):
-        return x * 2
-
-    bf_result = dataframe_bf.where(cond_bf, func).to_pandas()
-    pd_result = dataframe_pd.where(cond_pd, func)
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_where_callable_cond_callable_other(scalars_df_index, scalars_pandas_df_index):
-    # Condition is callable, other is callable too.
-    columns = ["int64_col", "float64_col"]
-    dataframe_bf = scalars_df_index[columns]
-    dataframe_pd = scalars_pandas_df_index[columns]
-
-    def func(x):
-        return x["int64_col"] > 0
-
-    bf_result = dataframe_bf.where(func, lambda x: x * 2).to_pandas()
-    pd_result = dataframe_pd.where(func, lambda x: x * 2)
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_where_series_other(scalars_df_index):
-    # When other is a series, throw an error.
-    columns = ["int64_col", "float64_col"]
-    dataframe_bf = scalars_df_index[columns]
-
-    with pytest.raises(
-        ValueError,
-        match="Seires is not a supported replacement type!",
-    ):
-        dataframe_bf.where(dataframe_bf > 0, dataframe_bf["int64_col"])
-
-
-def test_drop_column(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    col_name = "int64_col"
-    df_pandas = scalars_df.drop(columns=col_name).to_pandas()
-    pd.testing.assert_index_equal(
-        df_pandas.columns, scalars_pandas_df.drop(columns=col_name).columns
-    )
-
-
-def test_drop_columns(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    col_names = ["int64_col", "geography_col", "time_col"]
-    df_pandas = scalars_df.drop(columns=col_names).to_pandas()
-    pd.testing.assert_index_equal(
-        df_pandas.columns, scalars_pandas_df.drop(columns=col_names).columns
-    )
-
-
-def test_drop_labels_axis_1(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    labels = ["int64_col", "geography_col", "time_col"]
-
-    pd_result = scalars_pandas_df.drop(labels=labels, axis=1)
-    bf_result = scalars_df.drop(labels=labels, axis=1).to_pandas()
-
-    pd.testing.assert_frame_equal(pd_result, bf_result)
-
-
-def test_drop_with_custom_column_labels(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    rename_mapping = {
-        "int64_col": "Integer Column",
-        "string_col": "言語列",
-    }
-    dropped_columns = [
-        "言語列",
-        "timestamp_col",
-    ]
-    bf_df = scalars_df.rename(columns=rename_mapping).drop(columns=dropped_columns)
-    bf_result = bf_df.to_pandas()
-    pd_result = scalars_pandas_df.rename(columns=rename_mapping).drop(
-        columns=dropped_columns
-    )
-    assert_pandas_df_equal(bf_result, pd_result)
-
-
-def test_df_memory_usage(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-
-    pd_result = scalars_pandas_df.memory_usage()
-    bf_result = scalars_df.memory_usage()
-
-    pd.testing.assert_series_equal(pd_result, bf_result, rtol=1.5)
-
-
-def test_df_info(scalars_dfs):
-    expected = (
-        "<class 'bigframes.dataframe.DataFrame'>\n"
-        "Index: 9 entries, 0 to 8\n"
-        "Data columns (total 14 columns):\n"
-        "  #  Column         Non-Null Count    Dtype\n"
-        "---  -------------  ----------------  ------------------------------\n"
-        "  0  bool_col       8 non-null        boolean\n"
-        "  1  bytes_col      6 non-null        binary[pyarrow]\n"
-        "  2  date_col       7 non-null        date32[day][pyarrow]\n"
-        "  3  datetime_col   6 non-null        timestamp[us][pyarrow]\n"
-        "  4  geography_col  4 non-null        geometry\n"
-        "  5  int64_col      8 non-null        Int64\n"
-        "  6  int64_too      9 non-null        Int64\n"
-        "  7  numeric_col    6 non-null        decimal128(38, 9)[pyarrow]\n"
-        "  8  float64_col    7 non-null        Float64\n"
-        "  9  rowindex_2     9 non-null        Int64\n"
-        " 10  string_col     8 non-null        string\n"
-        " 11  time_col       6 non-null        time64[us][pyarrow]\n"
-        " 12  timestamp_col  6 non-null        timestamp[us, tz=UTC][pyarrow]\n"
-        " 13  duration_col   7 non-null        duration[us][pyarrow]\n"
-        "dtypes: Float64(1), Int64(3), binary[pyarrow](1), boolean(1), date32[day][pyarrow](1), decimal128(38, 9)[pyarrow](1), duration[us][pyarrow](1), geometry(1), string(1), time64[us][pyarrow](1), timestamp[us, tz=UTC][pyarrow](1), timestamp[us][pyarrow](1)\n"
-        "memory usage: 1341 bytes\n"
-    )
-
-    scalars_df, _ = scalars_dfs
-    bf_result = io.StringIO()
-
-    scalars_df.info(buf=bf_result)
-
-    assert expected == bf_result.getvalue()
-
-
-@pytest.mark.parametrize(
-    ("include", "exclude"),
-    [
-        ("Int64", None),
-        (["int"], None),
-        ("number", None),
-        ([pd.Int64Dtype(), pd.BooleanDtype()], None),
-        (None, [pd.Int64Dtype(), pd.BooleanDtype()]),
-        ("Int64", ["boolean"]),
-    ],
-)
-def test_select_dtypes(scalars_dfs, include, exclude):
-    scalars_df, scalars_pandas_df = scalars_dfs
-
-    pd_result = scalars_pandas_df.select_dtypes(include=include, exclude=exclude)
-    bf_result = scalars_df.select_dtypes(include=include, exclude=exclude).to_pandas()
-
-    pd.testing.assert_frame_equal(pd_result, bf_result)
-
-
-def test_drop_index(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-
-    pd_result = scalars_pandas_df.drop(index=[4, 1, 2])
-    bf_result = scalars_df.drop(index=[4, 1, 2]).to_pandas()
-
-    pd.testing.assert_frame_equal(pd_result, bf_result)
-
-
-def test_drop_pandas_index(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    drop_index = scalars_pandas_df.iloc[[4, 1, 2]].index
-
-    pd_result = scalars_pandas_df.drop(index=drop_index)
-    bf_result = scalars_df.drop(index=drop_index).to_pandas()
-
-    pd.testing.assert_frame_equal(pd_result, bf_result)
-
-
-def test_drop_bigframes_index(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    drop_index = scalars_df.loc[[4, 1, 2]].index
-    drop_pandas_index = scalars_pandas_df.loc[[4, 1, 2]].index
-
-    pd_result = scalars_pandas_df.drop(index=drop_pandas_index)
-    bf_result = scalars_df.drop(index=drop_index).to_pandas()
-
-    pd.testing.assert_frame_equal(pd_result, bf_result)
-
-
-def test_drop_bigframes_index_with_na(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    scalars_df = scalars_df.copy()
-    scalars_pandas_df = scalars_pandas_df.copy()
-    scalars_df = scalars_df.set_index("bytes_col")
-    scalars_pandas_df = scalars_pandas_df.set_index("bytes_col")
-    drop_index = scalars_df.iloc[[3, 5]].index
-    drop_pandas_index = scalars_pandas_df.iloc[[3, 5]].index
-
-    pd_result = scalars_pandas_df.drop(index=drop_pandas_index)  # drop_pandas_index)
-    bf_result = scalars_df.drop(index=drop_index).to_pandas()
-
-    pd.testing.assert_frame_equal(pd_result, bf_result)
-
-
-def test_drop_bigframes_multiindex(scalars_dfs):
-    # TODO: supply a reason why this isn't compatible with pandas 1.x
-    pytest.importorskip("pandas", minversion="2.0.0")
-    scalars_df, scalars_pandas_df = scalars_dfs
-    scalars_df = scalars_df.copy()
-    scalars_pandas_df = scalars_pandas_df.copy()
-    sub_df = scalars_df.iloc[[4, 1, 2]]
-    sub_pandas_df = scalars_pandas_df.iloc[[4, 1, 2]]
-    sub_df = sub_df.set_index(["bytes_col", "numeric_col"])
-    sub_pandas_df = sub_pandas_df.set_index(["bytes_col", "numeric_col"])
-    drop_index = sub_df.index
-    drop_pandas_index = sub_pandas_df.index
-
-    scalars_df = scalars_df.set_index(["bytes_col", "numeric_col"])
-    scalars_pandas_df = scalars_pandas_df.set_index(["bytes_col", "numeric_col"])
-    bf_result = scalars_df.drop(index=drop_index).to_pandas()
-    pd_result = scalars_pandas_df.drop(index=drop_pandas_index)
-
-    pd.testing.assert_frame_equal(pd_result, bf_result)
-
-
-def test_drop_labels_axis_0(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-
-    pd_result = scalars_pandas_df.drop(labels=[4, 1, 2], axis=0)
-    bf_result = scalars_df.drop(labels=[4, 1, 2], axis=0).to_pandas()
-
-    pd.testing.assert_frame_equal(pd_result, bf_result)
-
-
-def test_drop_index_and_columns(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-
-    pd_result = scalars_pandas_df.drop(index=[4, 1, 2], columns="int64_col")
-    bf_result = scalars_df.drop(index=[4, 1, 2], columns="int64_col").to_pandas()
-
-    pd.testing.assert_frame_equal(pd_result, bf_result)
-
-
-def test_rename(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    col_name_dict = {"bool_col": 1.2345}
-    df_pandas = scalars_df.rename(columns=col_name_dict).to_pandas()
-    pd.testing.assert_index_equal(
-        df_pandas.columns, scalars_pandas_df.rename(columns=col_name_dict).columns
-    )
-
-
-def test_df_peek(scalars_dfs_maybe_ordered):
-    scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
-
-    peek_result = scalars_df.peek(n=3, force=False, allow_large_results=True)
-
-    pd.testing.assert_index_equal(scalars_pandas_df.columns, peek_result.columns)
-    assert len(peek_result) == 3
-
-
-def test_df_peek_with_large_results_not_allowed(scalars_dfs_maybe_ordered):
-    scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
-
-    peek_result = scalars_df.peek(n=3, force=False, allow_large_results=False)
-
-    pd.testing.assert_index_equal(scalars_pandas_df.columns, peek_result.columns)
-    assert len(peek_result) == 3
-
-
-def test_df_peek_filtered(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    peek_result = scalars_df[scalars_df.int64_col != 0].peek(n=3, force=False)
-    pd.testing.assert_index_equal(scalars_pandas_df.columns, peek_result.columns)
-    assert len(peek_result) == 3
-
-
-def test_df_peek_exception(scalars_dfs):
-    scalars_df, _ = scalars_dfs
-
-    with pytest.raises(ValueError):
-        # Window ops aren't compatible with efficient peeking
-        scalars_df[["int64_col", "int64_too"]].cumsum().peek(n=3, force=False)
-
-
-def test_df_peek_force_default(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    peek_result = scalars_df[["int64_col", "int64_too"]].cumsum().peek(n=3)
-    pd.testing.assert_index_equal(
-        scalars_pandas_df[["int64_col", "int64_too"]].columns, peek_result.columns
-    )
-    assert len(peek_result) == 3
-
-
-def test_df_peek_reset_index(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    peek_result = (
-        scalars_df[["int64_col", "int64_too"]].reset_index(drop=True).peek(n=3)
-    )
-    pd.testing.assert_index_equal(
-        scalars_pandas_df[["int64_col", "int64_too"]].columns, peek_result.columns
-    )
-    assert len(peek_result) == 3
-
-
-def test_repr_w_all_rows(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-
-    # Remove columns with flaky formatting, like NUMERIC columns (which use the
-    # object dtype). Also makes a copy so that mutating the index name doesn't
-    # break other tests.
-    scalars_df = scalars_df.drop(columns=["numeric_col"])
-    scalars_pandas_df = scalars_pandas_df.drop(columns=["numeric_col"])
-
-    # When there are 10 or fewer rows, the outputs should be identical.
-    actual = repr(scalars_df.head(10))
-
-    with display_options.pandas_repr(bigframes.options.display):
-        expected = repr(scalars_pandas_df.head(10))
-
-    assert actual == expected
-
-
-def test_join_repr(scalars_dfs_maybe_ordered):
-    scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
-
-    scalars_df = (
-        scalars_df[["int64_col"]]
-        .join(scalars_df.set_index("int64_col")[["int64_too"]])
-        .sort_index()
-    )
-    scalars_pandas_df = (
-        scalars_pandas_df[["int64_col"]]
-        .join(scalars_pandas_df.set_index("int64_col")[["int64_too"]])
-        .sort_index()
-    )
-    # Pandas join result index name seems to depend on the index values in a way that bigframes can't match exactly
-    scalars_pandas_df.index.name = None
-
-    actual = repr(scalars_df)
-
-    with display_options.pandas_repr(bigframes.options.display):
-        expected = repr(scalars_pandas_df)
-
-    assert actual == expected
-
-
-def test_repr_w_display_options(scalars_dfs, session):
-    metrics = session._metrics
-    scalars_df, _ = scalars_dfs
-    # get a pandas df of the expected format
-    df, _ = scalars_df._block.to_pandas()
-    pandas_df = df.set_axis(scalars_df._block.column_labels, axis=1)
-    pandas_df.index.name = scalars_df.index.name
-
-    executions_pre = metrics.execution_count
-    with bigframes.option_context(
-        "display.max_rows", 10, "display.max_columns", 5, "display.max_colwidth", 10
-    ):
-
-        # When there are 10 or fewer rows, the outputs should be identical except for the extra note.
-        actual = scalars_df.head(10).__repr__()
-        executions_post = metrics.execution_count
-
-        with display_options.pandas_repr(bigframes.options.display):
-            pandas_repr = pandas_df.head(10).__repr__()
-
-    assert actual == pandas_repr
-    assert (executions_post - executions_pre) <= 3
-
-
-def test_repr_html_w_all_rows(scalars_dfs, session):
-    metrics = session._metrics
-    scalars_df, _ = scalars_dfs
-    # get a pandas df of the expected format
-    df, _ = scalars_df._block.to_pandas()
-    pandas_df = df.set_axis(scalars_df._block.column_labels, axis=1)
-    pandas_df.index.name = scalars_df.index.name
-
-    executions_pre = metrics.execution_count
-    # When there are 10 or fewer rows, the outputs should be identical except for the extra note.
-    actual = scalars_df.head(10)._repr_html_()
-    executions_post = metrics.execution_count
-
-    with display_options.pandas_repr(bigframes.options.display):
-        pandas_repr = pandas_df.head(10)._repr_html_()
-
-    expected = (
-        pandas_repr
-        + f"[{len(pandas_df.index)} rows x {len(pandas_df.columns)} columns in total]"
-    )
-    assert actual == expected
-    assert (executions_post - executions_pre) <= 3
-
-
-def test_df_column_name_with_space(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    col_name_dict = {"bool_col": "bool  col"}
-    df_pandas = scalars_df.rename(columns=col_name_dict).to_pandas()
-    pd.testing.assert_index_equal(
-        df_pandas.columns, scalars_pandas_df.rename(columns=col_name_dict).columns
-    )
-
-
-def test_df_column_name_duplicate(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    col_name_dict = {"int64_too": "int64_col"}
-    df_pandas = scalars_df.rename(columns=col_name_dict).to_pandas()
-    pd.testing.assert_index_equal(
-        df_pandas.columns, scalars_pandas_df.rename(columns=col_name_dict).columns
-    )
-
-
-def test_get_df_column_name_duplicate(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    col_name_dict = {"int64_too": "int64_col"}
-
-    bf_result = scalars_df.rename(columns=col_name_dict)["int64_col"].to_pandas()
-    pd_result = scalars_pandas_df.rename(columns=col_name_dict)["int64_col"]
-    pd.testing.assert_index_equal(bf_result.columns, pd_result.columns)
-
-
-@pytest.mark.parametrize(
-    ("indices", "axis"),
-    [
-        ([1, 3, 5], 0),
-        ([2, 4, 6], 1),
-        ([1, -3, -5, -6], "index"),
-        ([-2, -4, -6], "columns"),
-    ],
-)
-def test_take_df(scalars_dfs, indices, axis):
-    scalars_df, scalars_pandas_df = scalars_dfs
-
-    bf_result = scalars_df.take(indices, axis=axis).to_pandas()
-    pd_result = scalars_pandas_df.take(indices, axis=axis)
-
-    assert_pandas_df_equal(bf_result, pd_result)
-
-
-def test_filter_df(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-
-    bf_bool_series = scalars_df["bool_col"]
-    bf_result = scalars_df[bf_bool_series].to_pandas()
-
-    pd_bool_series = scalars_pandas_df["bool_col"]
-    pd_result = scalars_pandas_df[pd_bool_series]
-
-    assert_pandas_df_equal(bf_result, pd_result)
-
-
-def test_read_gbq_direct_to_batches_row_count(unordered_session):
-    df = unordered_session.read_gbq("bigquery-public-data.usa_names.usa_1910_2013")
-    iter = df.to_pandas_batches()
-    assert iter.total_rows == 5552452
-
-
-def test_df_to_pandas_batches(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-
-    capped_unfiltered_batches = scalars_df.to_pandas_batches(page_size=2, max_results=6)
-    bf_bool_series = scalars_df["bool_col"]
-    filtered_batches = scalars_df[bf_bool_series].to_pandas_batches()
-
-    pd_bool_series = scalars_pandas_df["bool_col"]
-    pd_result = scalars_pandas_df[pd_bool_series]
-
-    assert 6 == capped_unfiltered_batches.total_rows
-    assert len(pd_result) == filtered_batches.total_rows
-    assert_pandas_df_equal(pd.concat(filtered_batches), pd_result)
-
-
-@pytest.mark.parametrize(
-    ("literal", "expected_dtype"),
-    (
-        pytest.param(
-            2,
-            dtypes.INT_DTYPE,
-            id="INT64",
-        ),
-        # ====================================================================
-        # NULL values
-        #
-        # These are regression tests for b/428999884. It needs to be possible to
-        # set a column to NULL with a desired type (not just the pandas default
-        # of float64).
-        # ====================================================================
-        pytest.param(None, dtypes.FLOAT_DTYPE, id="NULL-None"),
-        pytest.param(
-            pa.scalar(None, type=pa.int64()),
-            dtypes.INT_DTYPE,
-            id="NULL-pyarrow-TIMESTAMP",
-        ),
-        pytest.param(
-            pa.scalar(None, type=pa.timestamp("us", tz="UTC")),
-            dtypes.TIMESTAMP_DTYPE,
-            id="NULL-pyarrow-TIMESTAMP",
-        ),
-        pytest.param(
-            pa.scalar(None, type=pa.timestamp("us")),
-            dtypes.DATETIME_DTYPE,
-            id="NULL-pyarrow-DATETIME",
-        ),
-    ),
-)
-def test_assign_new_column_w_literal(scalars_dfs, literal, expected_dtype):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    df = scalars_df.assign(new_col=literal)
-    bf_result = df.to_pandas()
-
-    new_col_pd = literal
-    if isinstance(literal, pa.Scalar):
-        # PyArrow integer scalars aren't yet supported in pandas Int64Dtype.
-        new_col_pd = literal.as_py()
-
-    # Pandas might not pick the same dtype as BigFrames, but it should at least
-    # be castable to it.
-    pd_result = scalars_pandas_df.assign(new_col=new_col_pd)
-    pd_result["new_col"] = pd_result["new_col"].astype(expected_dtype)
-
-    assert_pandas_df_equal(bf_result, pd_result)
-
-
-def test_assign_new_column_w_loc(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_df = scalars_df.copy()
-    pd_df = scalars_pandas_df.copy()
-    bf_df.loc[:, "new_col"] = 2
-    pd_df.loc[:, "new_col"] = 2
-    bf_result = bf_df.to_pandas()
-    pd_result = pd_df
-
-    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
-    pd_result["new_col"] = pd_result["new_col"].astype("Int64")
-
-    pd.testing.assert_frame_equal(bf_result, pd_result)
-
-
-@pytest.mark.parametrize(
-    ("scalar",),
-    [
-        (2.1,),
-        (None,),
-    ],
-)
-def test_assign_new_column_w_setitem(scalars_dfs, scalar):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_df = scalars_df.copy()
-    pd_df = scalars_pandas_df.copy()
-    bf_df["new_col"] = scalar
-    pd_df["new_col"] = scalar
-    bf_result = bf_df.to_pandas()
-    pd_result = pd_df
-
-    # Convert default pandas dtypes `float64` to match BigQuery DataFrames dtypes.
-    pd_result["new_col"] = pd_result["new_col"].astype("Float64")
-
-    pd.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_assign_new_column_w_setitem_dataframe(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_df = scalars_df.copy()
-    pd_df = scalars_pandas_df.copy()
-    bf_df["int64_col"] = bf_df["int64_too"].to_frame()
-    pd_df["int64_col"] = pd_df["int64_too"].to_frame()
-
-    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
-    pd_df["int64_col"] = pd_df["int64_col"].astype("Int64")
-
-    pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df)
-
-
-def test_assign_new_column_w_setitem_dataframe_error(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_df = scalars_df.copy()
-    pd_df = scalars_pandas_df.copy()
-
-    with pytest.raises(ValueError):
-        bf_df["impossible_col"] = bf_df[["int64_too", "string_col"]]
-    with pytest.raises(ValueError):
-        pd_df["impossible_col"] = pd_df[["int64_too", "string_col"]]
-
-
-def test_assign_new_column_w_setitem_list(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_df = scalars_df.copy()
-    pd_df = scalars_pandas_df.copy()
-    bf_df["new_col"] = [9, 8, 7, 6, 5, 4, 3, 2, 1]
-    pd_df["new_col"] = [9, 8, 7, 6, 5, 4, 3, 2, 1]
-    bf_result = bf_df.to_pandas()
-    pd_result = pd_df
-
-    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
-    pd_result["new_col"] = pd_result["new_col"].astype("Int64")
-
-    pd.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_assign_new_column_w_setitem_list_repeated(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_df = scalars_df.copy()
-    pd_df = scalars_pandas_df.copy()
-    bf_df["new_col"] = [9, 8, 7, 6, 5, 4, 3, 2, 1]
-    pd_df["new_col"] = [9, 8, 7, 6, 5, 4, 3, 2, 1]
-    bf_df["new_col_2"] = [1, 3, 2, 5, 4, 7, 6, 9, 8]
-    pd_df["new_col_2"] = [1, 3, 2, 5, 4, 7, 6, 9, 8]
-    bf_result = bf_df.to_pandas()
-    pd_result = pd_df
-
-    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
-    pd_result["new_col"] = pd_result["new_col"].astype("Int64")
-    pd_result["new_col_2"] = pd_result["new_col_2"].astype("Int64")
-
-    pd.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_assign_new_column_w_setitem_list_custom_index(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_df = scalars_df.copy()
-    pd_df = scalars_pandas_df.copy()
-
-    # set the custom index
-    pd_df = pd_df.set_index(["string_col", "int64_col"])
-    bf_df = bf_df.set_index(["string_col", "int64_col"])
-
-    bf_df["new_col"] = [9, 8, 7, 6, 5, 4, 3, 2, 1]
-    pd_df["new_col"] = [9, 8, 7, 6, 5, 4, 3, 2, 1]
-    bf_result = bf_df.to_pandas()
-    pd_result = pd_df
-
-    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
-    pd_result["new_col"] = pd_result["new_col"].astype("Int64")
-
-    pd.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_assign_new_column_w_setitem_list_error(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_df = scalars_df.copy()
-    pd_df = scalars_pandas_df.copy()
-
-    with pytest.raises(ValueError):
-        pd_df["new_col"] = [1, 2, 3]  # should be len 9, is 3
-    with pytest.raises(ValueError):
-        bf_df["new_col"] = [1, 2, 3]
-
-
-@pytest.mark.parametrize(
-    ("key", "value"),
-    [
-        pytest.param(["int64_col", "int64_too"], 1, id="scalar_to_existing_column"),
-        pytest.param(
-            ["int64_col", "int64_too"], [1, 2], id="sequence_to_existing_column"
-        ),
-        pytest.param(
-            ["int64_col", "new_col"], [1, 2], id="sequence_to_partial_new_column"
-        ),
-        pytest.param(
-            ["new_col", "new_col_too"], [1, 2], id="sequence_to_full_new_column"
-        ),
-        pytest.param(
-            pd.Index(("new_col", "new_col_too")),
-            [1, 2],
-            id="sequence_to_full_new_column_as_index",
-        ),
-    ],
-)
-def test_setitem_multicolumn_with_literals(scalars_dfs, key, value):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = scalars_df.copy()
-    pd_result = scalars_pandas_df.copy()
-
-    bf_result[key] = value
-    pd_result[key] = value
-
-    pd.testing.assert_frame_equal(pd_result, bf_result.to_pandas(), check_dtype=False)
-
-
-def test_setitem_multicolumn_with_literals_different_lengths_raise_error(scalars_dfs):
-    scalars_df, _ = scalars_dfs
-    bf_result = scalars_df.copy()
-
-    with pytest.raises(ValueError):
-        bf_result[["int64_col", "int64_too"]] = [1]
-
-
-def test_setitem_multicolumn_with_dataframes(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = scalars_df.copy()
-    pd_result = scalars_pandas_df.copy()
-
-    bf_result[["int64_col", "int64_too"]] = bf_result[["int64_too", "int64_col"]] / 2
-    pd_result[["int64_col", "int64_too"]] = pd_result[["int64_too", "int64_col"]] / 2
-
-    pd.testing.assert_frame_equal(pd_result, bf_result.to_pandas(), check_dtype=False)
-
-
-def test_setitem_multicolumn_with_dataframes_series_on_rhs_raise_error(scalars_dfs):
-    scalars_df, _ = scalars_dfs
-    bf_result = scalars_df.copy()
-
-    with pytest.raises(ValueError):
-        bf_result[["int64_col", "int64_too"]] = bf_result["int64_col"] / 2
-
-
-def test_setitem_multicolumn_with_dataframes_different_lengths_raise_error(scalars_dfs):
-    scalars_df, _ = scalars_dfs
-    bf_result = scalars_df.copy()
-
-    with pytest.raises(ValueError):
-        bf_result[["int64_col"]] = bf_result[["int64_col", "int64_too"]] / 2
-
-
-def test_assign_existing_column(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    kwargs = {"int64_col": 2}
-    df = scalars_df.assign(**kwargs)
-    bf_result = df.to_pandas()
-    pd_result = scalars_pandas_df.assign(**kwargs)
-
-    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
-    pd_result["int64_col"] = pd_result["int64_col"].astype("Int64")
-
-    assert_pandas_df_equal(bf_result, pd_result)
-
-
-def test_assign_listlike_to_empty_df(session):
-    empty_df = dataframe.DataFrame(session=session)
-    empty_pandas_df = pd.DataFrame()
-
-    bf_result = empty_df.assign(new_col=[1, 2, 3])
-    pd_result = empty_pandas_df.assign(new_col=[1, 2, 3])
-
-    pd_result["new_col"] = pd_result["new_col"].astype("Int64")
-    pd_result.index = pd_result.index.astype("Int64")
-    assert_pandas_df_equal(bf_result.to_pandas(), pd_result)
-
-
-def test_assign_to_empty_df_multiindex_error(session):
-    empty_df = dataframe.DataFrame(session=session)
-    empty_pandas_df = pd.DataFrame()
-
-    empty_df["empty_col_1"] = typing.cast(series.Series, [])
-    empty_df["empty_col_2"] = typing.cast(series.Series, [])
-    empty_pandas_df["empty_col_1"] = []
-    empty_pandas_df["empty_col_2"] = []
-    empty_df = empty_df.set_index(["empty_col_1", "empty_col_2"])
-    empty_pandas_df = empty_pandas_df.set_index(["empty_col_1", "empty_col_2"])
-
-    with pytest.raises(ValueError):
-        empty_df.assign(new_col=[1, 2, 3, 4, 5, 6, 7, 8, 9])
-    with pytest.raises(ValueError):
-        empty_pandas_df.assign(new_col=[1, 2, 3, 4, 5, 6, 7, 8, 9])
-
-
-@pytest.mark.parametrize(
-    ("ordered"),
-    [
-        (True),
-        (False),
-    ],
-)
-def test_assign_series(scalars_dfs, ordered):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    column_name = "int64_col"
-    df = scalars_df.assign(new_col=scalars_df[column_name])
-    bf_result = df.to_pandas(ordered=ordered)
-    pd_result = scalars_pandas_df.assign(new_col=scalars_pandas_df[column_name])
-
-    assert_pandas_df_equal(bf_result, pd_result, ignore_order=not ordered)
-
-
-def test_assign_series_overwrite(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    column_name = "int64_col"
-    df = scalars_df.assign(**{column_name: scalars_df[column_name] + 3})
-    bf_result = df.to_pandas()
-    pd_result = scalars_pandas_df.assign(
-        **{column_name: scalars_pandas_df[column_name] + 3}
-    )
-
-    assert_pandas_df_equal(bf_result, pd_result)
-
-
-def test_assign_sequential(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    kwargs = {"int64_col": 2, "new_col": 3, "new_col2": 4}
-    df = scalars_df.assign(**kwargs)
-    bf_result = df.to_pandas()
-    pd_result = scalars_pandas_df.assign(**kwargs)
-
-    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
-    pd_result["int64_col"] = pd_result["int64_col"].astype("Int64")
-    pd_result["new_col"] = pd_result["new_col"].astype("Int64")
-    pd_result["new_col2"] = pd_result["new_col2"].astype("Int64")
-
-    assert_pandas_df_equal(bf_result, pd_result)
-
-
-# Require an index so that the self-join is consistent each time.
-def test_assign_same_table_different_index_performs_self_join(
-    scalars_df_index, scalars_pandas_df_index
-):
-    column_name = "int64_col"
-    bf_df = scalars_df_index.assign(
-        alternative_index=scalars_df_index["rowindex_2"] + 2
-    )
-    pd_df = scalars_pandas_df_index.assign(
-        alternative_index=scalars_pandas_df_index["rowindex_2"] + 2
-    )
-    bf_df_2 = bf_df.set_index("alternative_index")
-    pd_df_2 = pd_df.set_index("alternative_index")
-    bf_result = bf_df.assign(new_col=bf_df_2[column_name] * 10).to_pandas()
-    pd_result = pd_df.assign(new_col=pd_df_2[column_name] * 10)
-
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-# Different table expression must have Index
-def test_assign_different_df(
-    scalars_df_index, scalars_df_2_index, scalars_pandas_df_index
-):
-    column_name = "int64_col"
-    df = scalars_df_index.assign(new_col=scalars_df_2_index[column_name])
-    bf_result = df.to_pandas()
-    # Doesn't matter to pandas if it comes from the same DF or a different DF.
-    pd_result = scalars_pandas_df_index.assign(
-        new_col=scalars_pandas_df_index[column_name]
-    )
-
-    assert_pandas_df_equal(bf_result, pd_result)
-
-
-def test_assign_different_df_w_loc(
-    scalars_df_index, scalars_df_2_index, scalars_pandas_df_index
-):
-    bf_df = scalars_df_index.copy()
-    bf_df2 = scalars_df_2_index.copy()
-    pd_df = scalars_pandas_df_index.copy()
-    assert "int64_col" in bf_df.columns
-    assert "int64_col" in pd_df.columns
-    bf_df.loc[:, "int64_col"] = bf_df2.loc[:, "int64_col"] + 1
-    pd_df.loc[:, "int64_col"] = pd_df.loc[:, "int64_col"] + 1
-    bf_result = bf_df.to_pandas()
-    pd_result = pd_df
-
-    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
-    pd_result["int64_col"] = pd_result["int64_col"].astype("Int64")
-
-    pd.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_assign_different_df_w_setitem(
-    scalars_df_index, scalars_df_2_index, scalars_pandas_df_index
-):
-    bf_df = scalars_df_index.copy()
-    bf_df2 = scalars_df_2_index.copy()
-    pd_df = scalars_pandas_df_index.copy()
-    assert "int64_col" in bf_df.columns
-    assert "int64_col" in pd_df.columns
-    bf_df["int64_col"] = bf_df2["int64_col"] + 1
-    pd_df["int64_col"] = pd_df["int64_col"] + 1
-    bf_result = bf_df.to_pandas()
-    pd_result = pd_df
-
-    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
-    pd_result["int64_col"] = pd_result["int64_col"].astype("Int64")
-
-    pd.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_assign_callable_lambda(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    kwargs = {"new_col": lambda x: x["int64_col"] + x["int64_too"]}
-    df = scalars_df.assign(**kwargs)
-    bf_result = df.to_pandas()
-    pd_result = scalars_pandas_df.assign(**kwargs)
-
-    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
-    pd_result["new_col"] = pd_result["new_col"].astype("Int64")
-
-    assert_pandas_df_equal(bf_result, pd_result)
-
-
-@pytest.mark.parametrize(
-    ("axis", "how", "ignore_index", "subset"),
-    [
-        (0, "any", False, None),
-        (0, "any", True, None),
-        (0, "all", False, ["bool_col", "time_col"]),
-        (0, "any", False, ["bool_col", "time_col"]),
-        (0, "all", False, "time_col"),
-        (1, "any", False, None),
-        (1, "all", False, None),
-    ],
-)
-def test_df_dropna_by_how(scalars_dfs, axis, how, ignore_index, subset):
-    # TODO: supply a reason why this isn't compatible with pandas 1.x
-    pytest.importorskip("pandas", minversion="2.0.0")
-    scalars_df, scalars_pandas_df = scalars_dfs
-    df = scalars_df.dropna(axis=axis, how=how, ignore_index=ignore_index, subset=subset)
-    bf_result = df.to_pandas()
-    pd_result = scalars_pandas_df.dropna(
-        axis=axis, how=how, ignore_index=ignore_index, subset=subset
-    )
-
-    # Pandas uses int64 instead of Int64 (nullable) dtype.
-    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-@pytest.mark.parametrize(
-    ("axis", "ignore_index", "subset", "thresh"),
-    [
-        (0, False, None, 2),
-        (0, True, None, 3),
-        (1, False, None, 2),
-    ],
-)
-def test_df_dropna_by_thresh(scalars_dfs, axis, ignore_index, subset, thresh):
-    """
-    Tests that dropna correctly keeps rows/columns with a minimum number
-    of non-null values.
-    """
-    # TODO: supply a reason why this isn't compatible with pandas 1.x
-    pytest.importorskip("pandas", minversion="2.0.0")
-    scalars_df, scalars_pandas_df = scalars_dfs
-
-    df_result = scalars_df.dropna(
-        axis=axis, thresh=thresh, ignore_index=ignore_index, subset=subset
-    )
-    pd_result = scalars_pandas_df.dropna(
-        axis=axis, thresh=thresh, ignore_index=ignore_index, subset=subset
-    )
-
-    bf_result = df_result.to_pandas()
-    # Pandas uses int64 instead of Int64 (nullable) dtype.
-    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
-    pd.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_df_dropna_range_columns(scalars_dfs):
-    # TODO: supply a reason why this isn't compatible with pandas 1.x
-    pytest.importorskip("pandas", minversion="2.0.0")
-    scalars_df, scalars_pandas_df = scalars_dfs
-    scalars_df = scalars_df.copy()
-    scalars_pandas_df = scalars_pandas_df.copy()
-    scalars_df.columns = pandas.RangeIndex(0, len(scalars_df.columns))
-    scalars_pandas_df.columns = pandas.RangeIndex(0, len(scalars_pandas_df.columns))
-
-    df = scalars_df.dropna()
-    bf_result = df.to_pandas()
-    pd_result = scalars_pandas_df.dropna()
-
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_df_interpolate(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    columns = ["int64_col", "int64_too", "float64_col"]
-    bf_result = scalars_df[columns].interpolate().to_pandas()
-    # Pandas can only interpolate on "float64" columns
-    # https://github.com/pandas-dev/pandas/issues/40252
-    pd_result = scalars_pandas_df[columns].astype("float64").interpolate()
-
-    pandas.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-        check_index_type=False,
-        check_dtype=False,
-    )
-
-
-@pytest.mark.parametrize(
-    "col, fill_value",
-    [
-        (["int64_col", "float64_col"], 3),
-        (["string_col"], "A"),
-        (["datetime_col"], pd.Timestamp("2023-01-01")),
-    ],
-)
-def test_df_fillna(scalars_dfs, col, fill_value):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = scalars_df[col].fillna(fill_value).to_pandas()
-    pd_result = scalars_pandas_df[col].fillna(fill_value)
-
-    pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
-
-
-def test_df_replace_scalar_scalar(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = scalars_df.replace(555.555, 3).to_pandas()
-    pd_result = scalars_pandas_df.replace(555.555, 3)
-
-    # pandas has narrower result types as they are determined dynamically
-    pd.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False)
-
-
-def test_df_replace_regex_scalar(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = scalars_df.replace("^H.l", "Howdy, Planet!", regex=True).to_pandas()
-    pd_result = scalars_pandas_df.replace("^H.l", "Howdy, Planet!", regex=True)
-
-    pd.testing.assert_frame_equal(
-        pd_result,
-        bf_result,
-    )
-
-
-def test_df_replace_list_scalar(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = scalars_df.replace([555.555, 3.2], 3).to_pandas()
-    pd_result = scalars_pandas_df.replace([555.555, 3.2], 3)
-
-    # pandas has narrower result types as they are determined dynamically
-    pd.testing.assert_frame_equal(
-        pd_result,
-        bf_result,
-        check_dtype=False,
-    )
-
-
-def test_df_replace_value_dict(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = scalars_df.replace(1, {"int64_col": 100, "int64_too": 200}).to_pandas()
-    pd_result = scalars_pandas_df.replace(1, {"int64_col": 100, "int64_too": 200})
-
-    pd.testing.assert_frame_equal(
-        pd_result,
-        bf_result,
-    )
-
-
-def test_df_ffill(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = scalars_df[["int64_col", "float64_col"]].ffill(limit=1).to_pandas()
-    pd_result = scalars_pandas_df[["int64_col", "float64_col"]].ffill(limit=1)
-
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_df_bfill(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = scalars_df[["int64_col", "float64_col"]].bfill().to_pandas()
-    pd_result = scalars_pandas_df[["int64_col", "float64_col"]].bfill()
-
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_apply_series_series_callable(
-    scalars_df_index,
-    scalars_pandas_df_index,
-):
-    columns = ["int64_too", "int64_col"]
-
-    def foo(series, arg1, arg2, *, kwarg1=0, kwarg2=0):
-        return series**2 + (arg1 * arg2 % 4) + (kwarg1 * kwarg2 % 7)
-
-    bf_result = (
-        scalars_df_index[columns]
-        .apply(foo, args=(33, 61), kwarg1=52, kwarg2=21)
-        .to_pandas()
-    )
-
-    pd_result = scalars_pandas_df_index[columns].apply(
-        foo, args=(33, 61), kwarg1=52, kwarg2=21
-    )
-
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_apply_series_listlike_callable(
-    scalars_df_index,
-    scalars_pandas_df_index,
-):
-    columns = ["int64_too", "int64_col"]
-    bf_result = (
-        scalars_df_index[columns].apply(lambda x: [len(x), x.min(), 24]).to_pandas()
-    )
-
-    pd_result = scalars_pandas_df_index[columns].apply(lambda x: [len(x), x.min(), 24])
-
-    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
-    pd_result.index = pd_result.index.astype("Int64")
-    pd_result = pd_result.astype("Int64")
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_apply_series_scalar_callable(
-    scalars_df_index,
-    scalars_pandas_df_index,
-):
-    columns = ["int64_too", "int64_col"]
-    bf_result = scalars_df_index[columns].apply(lambda x: x.sum())
-
-    pd_result = scalars_pandas_df_index[columns].apply(lambda x: x.sum())
-
-    pandas.testing.assert_series_equal(bf_result, pd_result)
-
-
-def test_df_pipe(
-    scalars_df_index,
-    scalars_pandas_df_index,
-):
-    columns = ["int64_too", "int64_col"]
-
-    def foo(x: int, y: int, df):
-        return (df + x) % y
-
-    bf_result = (
-        scalars_df_index[columns]
-        .pipe((foo, "df"), x=7, y=9)
-        .pipe(lambda x: x**2)
-        .to_pandas()
-    )
-
-    pd_result = (
-        scalars_pandas_df_index[columns]
-        .pipe((foo, "df"), x=7, y=9)
-        .pipe(lambda x: x**2)
-    )
-
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_df_keys(
-    scalars_df_index,
-    scalars_pandas_df_index,
-):
-    pandas.testing.assert_index_equal(
-        scalars_df_index.keys(), scalars_pandas_df_index.keys()
-    )
-
-
-def test_df_iter(
-    scalars_df_index,
-    scalars_pandas_df_index,
-):
-    for bf_i, df_i in zip(scalars_df_index, scalars_pandas_df_index):
-        assert bf_i == df_i
-
-
-def test_iterrows(
-    scalars_df_index,
-    scalars_pandas_df_index,
-):
-    # TODO: supply a reason why this isn't compatible with pandas 1.x
-    pytest.importorskip("pandas", minversion="2.0.0")
-    scalars_df_index = scalars_df_index.add_suffix("_suffix", axis=1)
-    scalars_pandas_df_index = scalars_pandas_df_index.add_suffix("_suffix", axis=1)
-    for (bf_index, bf_series), (pd_index, pd_series) in zip(
-        scalars_df_index.iterrows(), scalars_pandas_df_index.iterrows()
-    ):
-        assert bf_index == pd_index
-        pandas.testing.assert_series_equal(bf_series, pd_series)
-
-
-@pytest.mark.parametrize(
-    (
-        "index",
-        "name",
-    ),
-    [
-        (
-            True,
-            "my_df",
-        ),
-        (False, None),
-    ],
-)
-def test_itertuples(scalars_df_index, index, name):
-    # Numeric has slightly different representation as a result of conversions.
-    bf_tuples = scalars_df_index.itertuples(index, name)
-    pd_tuples = scalars_df_index.to_pandas().itertuples(index, name)
-    for bf_tuple, pd_tuple in zip(bf_tuples, pd_tuples):
-        assert bf_tuple == pd_tuple
-
-
-def test_df_isin_list_w_null(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    values = ["Hello, World!", 55555, 2.51, pd.NA, True]
-    bf_result = (
-        scalars_df[["int64_col", "float64_col", "string_col", "bool_col"]]
-        .isin(values)
-        .to_pandas()
-    )
-    pd_result = scalars_pandas_df[
-        ["int64_col", "float64_col", "string_col", "bool_col"]
-    ].isin(values)
-
-    pandas.testing.assert_frame_equal(bf_result, pd_result.astype("boolean"))
-
-
-def test_df_isin_list_wo_null(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    values = ["Hello, World!", 55555, 2.51, True]
-    bf_result = (
-        scalars_df[["int64_col", "float64_col", "string_col", "bool_col"]]
-        .isin(values)
-        .to_pandas()
-    )
-    pd_result = scalars_pandas_df[
-        ["int64_col", "float64_col", "string_col", "bool_col"]
-    ].isin(values)
-
-    pandas.testing.assert_frame_equal(bf_result, pd_result.astype("boolean"))
-
-
-def test_df_isin_dict(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    values = {
-        "string_col": ["Hello, World!", 55555, 2.51, pd.NA, True],
-        "int64_col": [5555, 2.51],
-        "bool_col": [pd.NA],
-    }
-    bf_result = (
-        scalars_df[["int64_col", "float64_col", "string_col", "bool_col"]]
-        .isin(values)
-        .to_pandas()
-    )
-    pd_result = scalars_pandas_df[
-        ["int64_col", "float64_col", "string_col", "bool_col"]
-    ].isin(values)
-
-    pandas.testing.assert_frame_equal(bf_result, pd_result.astype("boolean"))
-
-
-def test_df_cross_merge(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    left_columns = ["int64_col", "float64_col", "rowindex_2"]
-    right_columns = ["int64_col", "bool_col", "string_col", "rowindex_2"]
-
-    left = scalars_df[left_columns]
-    # Offset the rows somewhat so that outer join can have an effect.
-    right = scalars_df[right_columns].assign(rowindex_2=scalars_df["rowindex_2"] + 2)
-
-    bf_result = left.merge(right, "cross").to_pandas()
-
-    pd_result = scalars_pandas_df[left_columns].merge(
-        scalars_pandas_df[right_columns].assign(
-            rowindex_2=scalars_pandas_df["rowindex_2"] + 2
-        ),
-        "cross",
-    )
-    pd.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False)
-
-
-@pytest.mark.parametrize(
-    ("merge_how",),
-    [
-        ("inner",),
-        ("outer",),
-        ("left",),
-        ("right",),
-    ],
-)
-def test_df_merge(scalars_dfs, merge_how):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    on = "rowindex_2"
-    left_columns = ["int64_col", "float64_col", "rowindex_2"]
-    right_columns = ["int64_col", "bool_col", "string_col", "rowindex_2"]
-
-    left = scalars_df[left_columns]
-    # Offset the rows somewhat so that outer join can have an effect.
-    right = scalars_df[right_columns].assign(rowindex_2=scalars_df["rowindex_2"] + 2)
-
-    df = left.merge(right, merge_how, on, sort=True)
-    bf_result = df.to_pandas()
-
-    pd_result = scalars_pandas_df[left_columns].merge(
-        scalars_pandas_df[right_columns].assign(
-            rowindex_2=scalars_pandas_df["rowindex_2"] + 2
-        ),
-        merge_how,
-        on,
-        sort=True,
-    )
-
-    assert_pandas_df_equal(
-        bf_result, pd_result, ignore_order=True, check_index_type=False
-    )
-
-
-@pytest.mark.parametrize(
-    ("left_on", "right_on"),
-    [
-        (["int64_col", "rowindex_2"], ["int64_col", "rowindex_2"]),
-        (["rowindex_2", "int64_col"], ["int64_col", "rowindex_2"]),
-        (["rowindex_2", "float64_col"], ["int64_col", "rowindex_2"]),
-    ],
-)
-def test_df_merge_multi_key(scalars_dfs, left_on, right_on):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    left_columns = ["int64_col", "float64_col", "rowindex_2"]
-    right_columns = ["int64_col", "bool_col", "string_col", "rowindex_2"]
-
-    left = scalars_df[left_columns]
-    # Offset the rows somewhat so that outer join can have an effect.
-    right = scalars_df[right_columns].assign(rowindex_2=scalars_df["rowindex_2"] + 2)
-
-    df = left.merge(right, "outer", left_on=left_on, right_on=right_on, sort=True)
-    bf_result = df.to_pandas()
-
-    pd_result = scalars_pandas_df[left_columns].merge(
-        scalars_pandas_df[right_columns].assign(
-            rowindex_2=scalars_pandas_df["rowindex_2"] + 2
-        ),
-        "outer",
-        left_on=left_on,
-        right_on=right_on,
-        sort=True,
-    )
-
-    assert_pandas_df_equal(
-        bf_result, pd_result, ignore_order=True, check_index_type=False
-    )
-
-
-@pytest.mark.parametrize(
-    ("merge_how",),
-    [
-        ("inner",),
-        ("outer",),
-        ("left",),
-        ("right",),
-    ],
-)
-def test_merge_custom_col_name(scalars_dfs, merge_how):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    left_columns = ["int64_col", "float64_col"]
-    right_columns = ["int64_col", "bool_col", "string_col"]
-    on = "int64_col"
-    rename_columns = {"float64_col": "f64_col"}
-
-    left = scalars_df[left_columns]
-    left = left.rename(columns=rename_columns)
-    right = scalars_df[right_columns]
-    df = left.merge(right, merge_how, on, sort=True)
-    bf_result = df.to_pandas()
-
-    pandas_left_df = scalars_pandas_df[left_columns]
-    pandas_left_df = pandas_left_df.rename(columns=rename_columns)
-    pandas_right_df = scalars_pandas_df[right_columns]
-    pd_result = pandas_left_df.merge(pandas_right_df, merge_how, on, sort=True)
-
-    assert_pandas_df_equal(
-        bf_result, pd_result, ignore_order=True, check_index_type=False
-    )
-
-
-@pytest.mark.parametrize(
-    ("merge_how",),
-    [
-        ("inner",),
-        ("outer",),
-        ("left",),
-        ("right",),
-    ],
-)
-def test_merge_left_on_right_on(scalars_dfs, merge_how):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    left_columns = ["int64_col", "float64_col", "int64_too"]
-    right_columns = ["int64_col", "bool_col", "string_col", "rowindex_2"]
-
-    left = scalars_df[left_columns]
-    right = scalars_df[right_columns]
-
-    df = left.merge(
-        right, merge_how, left_on="int64_too", right_on="rowindex_2", sort=True
-    )
-    bf_result = df.to_pandas()
-
-    pd_result = scalars_pandas_df[left_columns].merge(
-        scalars_pandas_df[right_columns],
-        merge_how,
-        left_on="int64_too",
-        right_on="rowindex_2",
-        sort=True,
-    )
-
-    assert_pandas_df_equal(
-        bf_result, pd_result, ignore_order=True, check_index_type=False
-    )
-
-
-def test_self_merge_self_w_on_args():
-    data = {
-        "A": pd.Series([1, 2, 3], dtype="Int64"),
-        "B": pd.Series([1, 2, 3], dtype="Int64"),
-        "C": pd.Series([100, 200, 300], dtype="Int64"),
-        "D": pd.Series(["alpha", "beta", "gamma"], dtype="string[pyarrow]"),
-    }
-    df = pd.DataFrame(data)
-
-    df1 = df[["A", "C"]]
-    df2 = df[["B", "C", "D"]]
-    pd_result = df1.merge(df2, left_on=["A", "C"], right_on=["B", "C"], how="inner")
-
-    bf_df = bpd.DataFrame(data)
-
-    bf_df1 = bf_df[["A", "C"]]
-    bf_df2 = bf_df[["B", "C", "D"]]
-    bf_result = bf_df1.merge(
-        bf_df2, left_on=["A", "C"], right_on=["B", "C"], how="inner"
-    ).to_pandas()
-    pd.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False)
-
-
-@pytest.mark.parametrize(
-    ("decimals",),
-    [
-        (2,),
-        ({"float64_col": 0, "bool_col": 1, "int64_too": -3},),
-        ({},),
-    ],
-)
-def test_dataframe_round(scalars_dfs, decimals):
-    if pd.__version__.startswith("1."):
-        pytest.skip("Rounding doesn't work as expected in pandas 1.x")
-    scalars_df, scalars_pandas_df = scalars_dfs
-
-    bf_result = scalars_df.round(decimals).to_pandas()
-    pd_result = scalars_pandas_df.round(decimals)
-
-    assert_pandas_df_equal(bf_result, pd_result)
-
-
-def test_get_dtypes(scalars_df_default_index):
-    dtypes = scalars_df_default_index.dtypes
-    dtypes_dict: Dict[str, bigframes.dtypes.Dtype] = {
-        "bool_col": pd.BooleanDtype(),
-        "bytes_col": pd.ArrowDtype(pa.binary()),
-        "date_col": pd.ArrowDtype(pa.date32()),
-        "datetime_col": pd.ArrowDtype(pa.timestamp("us")),
-        "geography_col": gpd.array.GeometryDtype(),
-        "int64_col": pd.Int64Dtype(),
-        "int64_too": pd.Int64Dtype(),
-        "numeric_col": pd.ArrowDtype(pa.decimal128(38, 9)),
-        "float64_col": pd.Float64Dtype(),
-        "rowindex": pd.Int64Dtype(),
-        "rowindex_2": pd.Int64Dtype(),
-        "string_col": pd.StringDtype(storage="pyarrow"),
-        "time_col": pd.ArrowDtype(pa.time64("us")),
-        "timestamp_col": pd.ArrowDtype(pa.timestamp("us", tz="UTC")),
-        "duration_col": pd.ArrowDtype(pa.duration("us")),
-    }
-    pd.testing.assert_series_equal(
-        dtypes,
-        pd.Series(dtypes_dict),
-    )
-
-
-def test_get_dtypes_array_struct_query(session):
-    df = session.read_gbq(
-        """SELECT
-        [1, 3, 2] AS array_column,
-        STRUCT(
-            "a" AS string_field,
-            1.2 AS float_field) AS struct_column"""
-    )
-
-    dtypes = df.dtypes
-    pd.testing.assert_series_equal(
-        dtypes,
-        pd.Series(
-            {
-                "array_column": pd.ArrowDtype(pa.list_(pa.int64())),
-                "struct_column": pd.ArrowDtype(
-                    pa.struct(
-                        [
-                            ("string_field", pa.string()),
-                            ("float_field", pa.float64()),
-                        ]
-                    )
-                ),
-            }
-        ),
-    )
-
-
-def test_get_dtypes_array_struct_table(nested_df):
-    dtypes = nested_df.dtypes
-    pd.testing.assert_series_equal(
-        dtypes,
-        pd.Series(
-            {
-                "customer_id": pd.StringDtype(storage="pyarrow"),
-                "day": pd.ArrowDtype(pa.date32()),
-                "flag": pd.Int64Dtype(),
-                "label": pd.ArrowDtype(
-                    pa.struct(
-                        [
-                            ("key", pa.string()),
-                            ("value", pa.string()),
-                        ]
-                    ),
-                ),
-                "event_sequence": pd.ArrowDtype(
-                    pa.list_(
-                        pa.struct(
-                            [
-                                pa.field(
-                                    "data",
-                                    pa.list_(
-                                        pa.struct(
-                                            [
-                                                ("value", pa.float64()),
-                                                ("key", pa.string()),
-                                            ],
-                                        ),
-                                    ),
-                                    nullable=False,
-                                ),
-                                ("timestamp", pa.timestamp("us", "UTC")),
-                                ("category", pa.string()),
-                            ]
-                        ),
-                    ),
-                ),
-                "address": pd.ArrowDtype(
-                    pa.struct(
-                        [
-                            ("street", pa.string()),
-                            ("city", pa.string()),
-                        ]
-                    ),
-                ),
-            }
-        ),
-    )
-
-
-def test_shape(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = scalars_df.shape
-    pd_result = scalars_pandas_df.shape
-
-    assert bf_result == pd_result
-
-
-@pytest.mark.parametrize(
-    "reference_table, test_table",
-    [
-        (
-            "bigframes-dev.bigframes_tests_sys.base_table",
-            "bigframes-dev.bigframes_tests_sys.base_table_mat_view",
-        ),
-        (
-            "bigframes-dev.bigframes_tests_sys.base_table",
-            "bigframes-dev.bigframes_tests_sys.base_table_view",
-        ),
-        (
-            "bigframes-dev.bigframes_tests_sys.csv_native_table",
-            "bigframes-dev.bigframes_tests_sys.csv_external_table",
-        ),
-    ],
-)
-def test_view_and_external_table_shape(session, reference_table, test_table):
-    reference_df = session.read_gbq(reference_table)
-    test_df = session.read_gbq(test_table)
-
-    assert test_df.shape == reference_df.shape
-
-
-def test_len(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = len(scalars_df)
-    pd_result = len(scalars_pandas_df)
-
-    assert bf_result == pd_result
-
-
-@pytest.mark.parametrize(
-    ("n_rows",),
-    [
-        (50,),
-        (10000,),
-    ],
-)
-@pytest.mark.parametrize(
-    "write_engine",
-    ["bigquery_load", "bigquery_streaming", "bigquery_write"],
-)
-def test_df_len_local(session, n_rows, write_engine):
-    assert (
-        len(
-            session.read_pandas(
-                pd.DataFrame(np.random.randint(1, 7, n_rows), columns=["one"]),
-                write_engine=write_engine,
-            )
-        )
-        == n_rows
-    )
-
-
-def test_size(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = scalars_df.size
-    pd_result = scalars_pandas_df.size
-
-    assert bf_result == pd_result
-
-
-def test_ndim(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = scalars_df.ndim
-    pd_result = scalars_pandas_df.ndim
-
-    assert bf_result == pd_result
-
-
-def test_empty_false(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-
-    bf_result = scalars_df.empty
-    pd_result = scalars_pandas_df.empty
-
-    assert bf_result == pd_result
-
-
-def test_empty_true_column_filter(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-
-    bf_result = scalars_df[[]].empty
-    pd_result = scalars_pandas_df[[]].empty
-
-    assert bf_result == pd_result
-
-
-def test_empty_true_row_filter(scalars_dfs: Tuple[dataframe.DataFrame, pd.DataFrame]):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_bool: series.Series = typing.cast(series.Series, scalars_df["bool_col"])
-    pd_bool: pd.Series = scalars_pandas_df["bool_col"]
-    bf_false = bf_bool.notna() & (bf_bool != bf_bool)
-    pd_false = pd_bool.notna() & (pd_bool != pd_bool)
-
-    bf_result = scalars_df[bf_false].empty
-    pd_result = scalars_pandas_df[pd_false].empty
-
-    assert pd_result
-    assert bf_result == pd_result
-
-
-def test_empty_true_memtable(session: bigframes.Session):
-    bf_df = dataframe.DataFrame(session=session)
-    pd_df = pd.DataFrame()
-
-    bf_result = bf_df.empty
-    pd_result = pd_df.empty
-
-    assert pd_result
-    assert bf_result == pd_result
-
-
-@pytest.mark.parametrize(
-    ("drop",),
-    ((True,), (False,)),
-)
-def test_reset_index(scalars_df_index, scalars_pandas_df_index, drop):
-    df = scalars_df_index.reset_index(drop=drop)
-    assert df.index.name is None
-
-    bf_result = df.to_pandas()
-    pd_result = scalars_pandas_df_index.reset_index(drop=drop)
-
-    # Pandas uses int64 instead of Int64 (nullable) dtype.
-    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
-
-    # reset_index should maintain the original ordering.
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_reset_index_allow_duplicates(scalars_df_index, scalars_pandas_df_index):
-    scalars_df_index = scalars_df_index.copy()
-    scalars_df_index.index.name = "int64_col"
-    df = scalars_df_index.reset_index(allow_duplicates=True, drop=False)
-    assert df.index.name is None
-
-    bf_result = df.to_pandas()
-
-    scalars_pandas_df_index = scalars_pandas_df_index.copy()
-    scalars_pandas_df_index.index.name = "int64_col"
-    pd_result = scalars_pandas_df_index.reset_index(allow_duplicates=True, drop=False)
-
-    # Pandas uses int64 instead of Int64 (nullable) dtype.
-    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
-
-    # reset_index should maintain the original ordering.
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_reset_index_duplicates_error(scalars_df_index):
-    scalars_df_index = scalars_df_index.copy()
-    scalars_df_index.index.name = "int64_col"
-    with pytest.raises(ValueError):
-        scalars_df_index.reset_index(allow_duplicates=False, drop=False)
-
-
-@pytest.mark.parametrize(
-    ("drop",),
-    ((True,), (False,)),
-)
-def test_reset_index_inplace(scalars_df_index, scalars_pandas_df_index, drop):
-    df = scalars_df_index.copy()
-    df.reset_index(drop=drop, inplace=True)
-    assert df.index.name is None
-
-    bf_result = df.to_pandas()
-    pd_result = scalars_pandas_df_index.copy()
-    pd_result.reset_index(drop=drop, inplace=True)
-
-    # Pandas uses int64 instead of Int64 (nullable) dtype.
-    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
-
-    # reset_index should maintain the original ordering.
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_reset_index_then_filter(
-    scalars_df_index,
-    scalars_pandas_df_index,
-):
-    bf_filter = scalars_df_index["bool_col"].fillna(True)
-    bf_df = scalars_df_index.reset_index()[bf_filter]
-    bf_result = bf_df.to_pandas()
-    pd_filter = scalars_pandas_df_index["bool_col"].fillna(True)
-    pd_result = scalars_pandas_df_index.reset_index()[pd_filter]
-
-    # Pandas uses int64 instead of Int64 (nullable) dtype.
-    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
-
-    # reset_index should maintain the original ordering and index keys
-    # post-filter will have gaps.
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_reset_index_with_unnamed_index(
-    scalars_df_index,
-    scalars_pandas_df_index,
-):
-    scalars_df_index = scalars_df_index.copy()
-    scalars_pandas_df_index = scalars_pandas_df_index.copy()
-
-    scalars_df_index.index.name = None
-    scalars_pandas_df_index.index.name = None
-    df = scalars_df_index.reset_index(drop=False)
-    assert df.index.name is None
-
-    # reset_index(drop=False) creates a new column "index".
-    assert df.columns[0] == "index"
-
-    bf_result = df.to_pandas()
-    pd_result = scalars_pandas_df_index.reset_index(drop=False)
-
-    # Pandas uses int64 instead of Int64 (nullable) dtype.
-    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
-
-    # reset_index should maintain the original ordering.
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_reset_index_with_unnamed_multiindex(
-    scalars_df_index,
-    scalars_pandas_df_index,
-):
-    bf_df = dataframe.DataFrame(
-        ([1, 2, 3], [2, 5, 7]),
-        index=pd.MultiIndex.from_tuples([("a", "aa"), ("a", "aa")]),
-    )
-    pd_df = pd.DataFrame(
-        ([1, 2, 3], [2, 5, 7]),
-        index=pd.MultiIndex.from_tuples([("a", "aa"), ("a", "aa")]),
-    )
-
-    bf_df = bf_df.reset_index()
-    pd_df = pd_df.reset_index()
-
-    assert pd_df.columns[0] == "level_0"
-    assert bf_df.columns[0] == "level_0"
-    assert pd_df.columns[1] == "level_1"
-    assert bf_df.columns[1] == "level_1"
-
-
-def test_reset_index_with_unnamed_index_and_index_column(
-    scalars_df_index,
-    scalars_pandas_df_index,
-):
-    scalars_df_index = scalars_df_index.copy()
-    scalars_pandas_df_index = scalars_pandas_df_index.copy()
-
-    scalars_df_index.index.name = None
-    scalars_pandas_df_index.index.name = None
-    df = scalars_df_index.assign(index=scalars_df_index["int64_col"]).reset_index(
-        drop=False
-    )
-    assert df.index.name is None
-
-    # reset_index(drop=False) creates a new column "level_0" if the "index" column already exists.
-    assert df.columns[0] == "level_0"
-
-    bf_result = df.to_pandas()
-    pd_result = scalars_pandas_df_index.assign(
-        index=scalars_pandas_df_index["int64_col"]
-    ).reset_index(drop=False)
-
-    # Pandas uses int64 instead of Int64 (nullable) dtype.
-    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
-
-    # reset_index should maintain the original ordering.
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-@pytest.mark.parametrize(
-    ("drop",),
-    (
-        (True,),
-        (False,),
-    ),
-)
-@pytest.mark.parametrize(
-    ("append",),
-    (
-        (True,),
-        (False,),
-    ),
-)
-@pytest.mark.parametrize(
-    ("index_column",),
-    (("int64_too",), ("string_col",), ("timestamp_col",)),
-)
-def test_set_index(scalars_dfs, index_column, drop, append):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    df = scalars_df.set_index(index_column, append=append, drop=drop)
-    bf_result = df.to_pandas()
-    pd_result = scalars_pandas_df.set_index(index_column, append=append, drop=drop)
-
-    # Sort to disambiguate when there are duplicate index labels.
-    # Note: Doesn't use assert_pandas_df_equal_ignore_ordering because we get
-    # "ValueError: 'timestamp_col' is both an index level and a column label,
-    # which is ambiguous" when trying to sort by a column with the same name as
-    # the index.
-    bf_result = bf_result.sort_values("rowindex_2")
-    pd_result = pd_result.sort_values("rowindex_2")
-
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_set_index_key_error(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    with pytest.raises(KeyError):
-        scalars_pandas_df.set_index(["not_a_col"])
-    with pytest.raises(KeyError):
-        scalars_df.set_index(["not_a_col"])
-
-
-@pytest.mark.parametrize(
-    ("ascending",),
-    ((True,), (False,)),
-)
-@pytest.mark.parametrize(
-    ("na_position",),
-    (("first",), ("last",)),
-)
-@pytest.mark.parametrize(
-    ("axis",),
-    ((0,), ("columns",)),
-)
-def test_sort_index(scalars_dfs, ascending, na_position, axis):
-    index_column = "int64_col"
-    scalars_df, scalars_pandas_df = scalars_dfs
-    df = scalars_df.set_index(index_column)
-    bf_result = df.sort_index(
-        ascending=ascending, na_position=na_position, axis=axis
-    ).to_pandas()
-    pd_result = scalars_pandas_df.set_index(index_column).sort_index(
-        ascending=ascending, na_position=na_position, axis=axis
-    )
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_dataframe_sort_index_inplace(scalars_dfs):
-    index_column = "int64_col"
-    scalars_df, scalars_pandas_df = scalars_dfs
-    df = scalars_df.copy().set_index(index_column)
-    df.sort_index(ascending=False, inplace=True)
-    bf_result = df.to_pandas()
-
-    pd_result = scalars_pandas_df.set_index(index_column).sort_index(ascending=False)
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_df_abs(scalars_dfs_maybe_ordered):
-    scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
-    columns = ["int64_col", "int64_too", "float64_col"]
-
-    bf_result = scalars_df[columns].abs()
-    pd_result = scalars_pandas_df[columns].abs()
-
-    assert_dfs_equivalent(pd_result, bf_result)
-
-
-def test_df_pos(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = (+scalars_df[["int64_col", "numeric_col"]]).to_pandas()
-    pd_result = +scalars_pandas_df[["int64_col", "numeric_col"]]
-
-    assert_pandas_df_equal(pd_result, bf_result)
-
-
-def test_df_neg(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = (-scalars_df[["int64_col", "numeric_col"]]).to_pandas()
-    pd_result = -scalars_pandas_df[["int64_col", "numeric_col"]]
-
-    assert_pandas_df_equal(pd_result, bf_result)
-
-
-def test_df__abs__(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = (
-        abs(scalars_df[["int64_col", "numeric_col", "float64_col"]])
-    ).to_pandas()
-    pd_result = abs(scalars_pandas_df[["int64_col", "numeric_col", "float64_col"]])
-
-    assert_pandas_df_equal(pd_result, bf_result)
-
-
-def test_df_invert(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    columns = ["int64_col", "bool_col"]
-
-    bf_result = (~scalars_df[columns]).to_pandas()
-    pd_result = ~scalars_pandas_df[columns]
-
-    assert_pandas_df_equal(bf_result, pd_result)
-
-
-def test_df_isnull(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-
-    columns = ["int64_col", "int64_too", "string_col", "bool_col"]
-    bf_result = scalars_df[columns].isnull().to_pandas()
-    pd_result = scalars_pandas_df[columns].isnull()
-
-    # One of dtype mismatches to be documented. Here, the `bf_result.dtype` is
-    # `BooleanDtype` but the `pd_result.dtype` is `bool`.
-    pd_result["int64_col"] = pd_result["int64_col"].astype(pd.BooleanDtype())
-    pd_result["int64_too"] = pd_result["int64_too"].astype(pd.BooleanDtype())
-    pd_result["string_col"] = pd_result["string_col"].astype(pd.BooleanDtype())
-    pd_result["bool_col"] = pd_result["bool_col"].astype(pd.BooleanDtype())
-
-    assert_pandas_df_equal(bf_result, pd_result)
-
-
-def test_df_notnull(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-
-    columns = ["int64_col", "int64_too", "string_col", "bool_col"]
-    bf_result = scalars_df[columns].notnull().to_pandas()
-    pd_result = scalars_pandas_df[columns].notnull()
-
-    # One of dtype mismatches to be documented. Here, the `bf_result.dtype` is
-    # `BooleanDtype` but the `pd_result.dtype` is `bool`.
-    pd_result["int64_col"] = pd_result["int64_col"].astype(pd.BooleanDtype())
-    pd_result["int64_too"] = pd_result["int64_too"].astype(pd.BooleanDtype())
-    pd_result["string_col"] = pd_result["string_col"].astype(pd.BooleanDtype())
-    pd_result["bool_col"] = pd_result["bool_col"].astype(pd.BooleanDtype())
-
-    assert_pandas_df_equal(bf_result, pd_result)
-
-
-@pytest.mark.parametrize(
-    ("left_labels", "right_labels", "overwrite", "fill_value"),
-    [
-        (["a", "b", "c"], ["c", "a", "b"], True, None),
-        (["a", "b", "c"], ["c", "a", "b"], False, None),
-        (["a", "b", "c"], ["a", "b", "c"], False, 2),
-    ],
-    ids=[
-        "one_one_match_overwrite",
-        "one_one_match_no_overwrite",
-        "exact_match",
-    ],
-)
-def test_combine(
-    scalars_df_index,
-    scalars_df_2_index,
-    scalars_pandas_df_index,
-    left_labels,
-    right_labels,
-    overwrite,
-    fill_value,
-):
-    if pd.__version__.startswith("1."):
-        pytest.skip("pd.NA vs NaN not handled well in pandas 1.x.")
-    columns = ["int64_too", "int64_col", "float64_col"]
-
-    bf_df_a = scalars_df_index[columns]
-    bf_df_a.columns = left_labels
-    bf_df_b = scalars_df_2_index[columns]
-    bf_df_b.columns = right_labels
-    bf_result = bf_df_a.combine(
-        bf_df_b,
-        lambda x, y: x**2 + 2 * x * y + y**2,
-        overwrite=overwrite,
-        fill_value=fill_value,
-    ).to_pandas()
-
-    pd_df_a = scalars_pandas_df_index[columns]
-    pd_df_a.columns = left_labels
-    pd_df_b = scalars_pandas_df_index[columns]
-    pd_df_b.columns = right_labels
-    pd_result = pd_df_a.combine(
-        pd_df_b,
-        lambda x, y: x**2 + 2 * x * y + y**2,
-        overwrite=overwrite,
-        fill_value=fill_value,
-    )
-
-    # Some dtype inconsistency for all-NULL columns
-    pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    ("overwrite", "filter_func"),
-    [
-        (True, None),
-        (False, None),
-        (True, lambda x: x.isna() | (x % 2 == 0)),
-    ],
-    ids=[
-        "default",
-        "overwritefalse",
-        "customfilter",
-    ],
-)
-def test_df_update(overwrite, filter_func):
-    if pd.__version__.startswith("1."):
-        pytest.skip("dtype handled differently in pandas 1.x.")
-
-    index1: pandas.Index = pandas.Index([1, 2, 3, 4], dtype="Int64")
-
-    index2: pandas.Index = pandas.Index([1, 2, 4, 5], dtype="Int64")
-    pd_df1 = pandas.DataFrame(
-        {"a": [1, None, 3, 4], "b": [5, 6, None, 8]}, dtype="Int64", index=index1
-    )
-    pd_df2 = pandas.DataFrame(
-        {"a": [None, 20, 30, 40], "c": [90, None, 110, 120]},
-        dtype="Int64",
-        index=index2,
-    )
-
-    bf_df1 = dataframe.DataFrame(pd_df1)
-    bf_df2 = dataframe.DataFrame(pd_df2)
-
-    bf_df1.update(bf_df2, overwrite=overwrite, filter_func=filter_func)
-    pd_df1.update(pd_df2, overwrite=overwrite, filter_func=filter_func)
-
-    pd.testing.assert_frame_equal(bf_df1.to_pandas(), pd_df1)
-
-
-def test_df_idxmin():
-    pd_df = pd.DataFrame(
-        {"a": [1, 2, 3], "b": [7, None, 3], "c": [4, 4, 4]}, index=["x", "y", "z"]
-    )
-    bf_df = dataframe.DataFrame(pd_df)
-
-    bf_result = bf_df.idxmin().to_pandas()
-    pd_result = pd_df.idxmin()
-
-    pd.testing.assert_series_equal(
-        bf_result, pd_result, check_index_type=False, check_dtype=False
-    )
-
-
-def test_df_idxmax():
-    pd_df = pd.DataFrame(
-        {"a": [1, 2, 3], "b": [7, None, 3], "c": [4, 4, 4]}, index=["x", "y", "z"]
-    )
-    bf_df = dataframe.DataFrame(pd_df)
-
-    bf_result = bf_df.idxmax().to_pandas()
-    pd_result = pd_df.idxmax()
-
-    pd.testing.assert_series_equal(
-        bf_result, pd_result, check_index_type=False, check_dtype=False
-    )
-
-
-@pytest.mark.parametrize(
-    ("join", "axis"),
-    [
-        ("outer", None),
-        ("outer", 0),
-        ("outer", 1),
-        ("left", 0),
-        ("right", 1),
-        ("inner", None),
-        ("inner", 1),
-    ],
-)
-def test_df_align(join, axis):
-
-    index1: pandas.Index = pandas.Index([1, 2, 3, 4], dtype="Int64")
-
-    index2: pandas.Index = pandas.Index([1, 2, 4, 5], dtype="Int64")
-    pd_df1 = pandas.DataFrame(
-        {"a": [1, None, 3, 4], "b": [5, 6, None, 8]}, dtype="Int64", index=index1
-    )
-    pd_df2 = pandas.DataFrame(
-        {"a": [None, 20, 30, 40], "c": [90, None, 110, 120]},
-        dtype="Int64",
-        index=index2,
-    )
-
-    bf_df1 = dataframe.DataFrame(pd_df1)
-    bf_df2 = dataframe.DataFrame(pd_df2)
-
-    bf_result1, bf_result2 = bf_df1.align(bf_df2, join=join, axis=axis)
-    pd_result1, pd_result2 = pd_df1.align(pd_df2, join=join, axis=axis)
-
-    # Don't check dtype as pandas does unnecessary float conversion
-    assert isinstance(bf_result1, dataframe.DataFrame) and isinstance(
-        bf_result2, dataframe.DataFrame
-    )
-    pd.testing.assert_frame_equal(bf_result1.to_pandas(), pd_result1, check_dtype=False)
-    pd.testing.assert_frame_equal(bf_result2.to_pandas(), pd_result2, check_dtype=False)
-
-
-def test_combine_first(
-    scalars_df_index,
-    scalars_df_2_index,
-    scalars_pandas_df_index,
-):
-    if pd.__version__.startswith("1."):
-        pytest.skip("pd.NA vs NaN not handled well in pandas 1.x.")
-    columns = ["int64_too", "int64_col", "float64_col"]
-
-    bf_df_a = scalars_df_index[columns].iloc[0:6]
-    bf_df_a.columns = ["a", "b", "c"]
-    bf_df_b = scalars_df_2_index[columns].iloc[2:8]
-    bf_df_b.columns = ["b", "a", "d"]
-    bf_result = bf_df_a.combine_first(bf_df_b).to_pandas()
-
-    pd_df_a = scalars_pandas_df_index[columns].iloc[0:6]
-    pd_df_a.columns = ["a", "b", "c"]
-    pd_df_b = scalars_pandas_df_index[columns].iloc[2:8]
-    pd_df_b.columns = ["b", "a", "d"]
-    pd_result = pd_df_a.combine_first(pd_df_b)
-
-    # Some dtype inconsistency for all-NULL columns
-    pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    ("columns", "numeric_only"),
-    [
-        (["bool_col", "int64_col", "float64_col"], True),
-        (["bool_col", "int64_col", "float64_col"], False),
-        (["bool_col", "int64_col", "float64_col", "string_col"], True),
-        pytest.param(
-            ["bool_col", "int64_col", "float64_col", "string_col"],
-            False,
-            marks=pytest.mark.xfail(
-                raises=NotImplementedError,
-            ),
-        ),
-    ],
-)
-def test_df_corr_w_numeric_only(scalars_dfs_maybe_ordered, columns, numeric_only):
-    scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
-
-    bf_result = scalars_df[columns].corr(numeric_only=numeric_only).to_pandas()
-    pd_result = scalars_pandas_df[columns].corr(numeric_only=numeric_only)
-
-    # BigFrames and Pandas differ in their data type handling:
-    # - Column types: BigFrames uses Float64, Pandas uses float64.
-    # - Index types: BigFrames uses strign, Pandas uses object.
-    pd.testing.assert_index_equal(bf_result.columns, pd_result.columns)
-    # Only check row order in ordered mode.
-    pd.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-        check_dtype=False,
-        check_index_type=False,
-        check_like=~scalars_df._block.session._strictly_ordered,
-    )
-
-
-def test_df_corr_w_invalid_parameters(scalars_dfs):
-    columns = ["int64_too", "int64_col", "float64_col"]
-    scalars_df, _ = scalars_dfs
-
-    with pytest.raises(NotImplementedError):
-        scalars_df[columns].corr(method="kendall")
-
-    with pytest.raises(NotImplementedError):
-        scalars_df[columns].corr(min_periods=1)
-
-
-@pytest.mark.parametrize(
-    ("columns", "numeric_only"),
-    [
-        (["bool_col", "int64_col", "float64_col"], True),
-        (["bool_col", "int64_col", "float64_col"], False),
-        (["bool_col", "int64_col", "float64_col", "string_col"], True),
-        pytest.param(
-            ["bool_col", "int64_col", "float64_col", "string_col"],
-            False,
-            marks=pytest.mark.xfail(
-                raises=NotImplementedError,
-            ),
-        ),
-    ],
-)
-def test_cov_w_numeric_only(scalars_dfs_maybe_ordered, columns, numeric_only):
-    scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
-    bf_result = scalars_df[columns].cov(numeric_only=numeric_only).to_pandas()
-    pd_result = scalars_pandas_df[columns].cov(numeric_only=numeric_only)
-    # BigFrames and Pandas differ in their data type handling:
-    # - Column types: BigFrames uses Float64, Pandas uses float64.
-    # - Index types: BigFrames uses strign, Pandas uses object.
-    pd.testing.assert_index_equal(bf_result.columns, pd_result.columns)
-    # Only check row order in ordered mode.
-    pd.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-        check_dtype=False,
-        check_index_type=False,
-        check_like=~scalars_df._block.session._strictly_ordered,
-    )
-
-
-def test_df_corrwith_df(scalars_dfs_maybe_ordered):
-    scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
-
-    l_cols = ["int64_col", "float64_col", "int64_too"]
-    r_cols = ["int64_too", "float64_col"]
-
-    bf_result = scalars_df[l_cols].corrwith(scalars_df[r_cols]).to_pandas()
-    pd_result = scalars_pandas_df[l_cols].corrwith(scalars_pandas_df[r_cols])
-
-    # BigFrames and Pandas differ in their data type handling:
-    # - Column types: BigFrames uses Float64, Pandas uses float64.
-    # - Index types: BigFrames uses strign, Pandas uses object.
-    pd.testing.assert_series_equal(
-        bf_result, pd_result, check_dtype=False, check_index_type=False
-    )
-
-
-def test_df_corrwith_df_numeric_only(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-
-    l_cols = ["int64_col", "float64_col", "int64_too", "string_col"]
-    r_cols = ["int64_too", "float64_col", "bool_col"]
-
-    bf_result = (
-        scalars_df[l_cols].corrwith(scalars_df[r_cols], numeric_only=True).to_pandas()
-    )
-    pd_result = scalars_pandas_df[l_cols].corrwith(
-        scalars_pandas_df[r_cols], numeric_only=True
-    )
-
-    # BigFrames and Pandas differ in their data type handling:
-    # - Column types: BigFrames uses Float64, Pandas uses float64.
-    # - Index types: BigFrames uses strign, Pandas uses object.
-    pd.testing.assert_series_equal(
-        bf_result, pd_result, check_dtype=False, check_index_type=False
-    )
-
-
-def test_df_corrwith_df_non_numeric_error(scalars_dfs):
-    scalars_df, _ = scalars_dfs
-
-    l_cols = ["int64_col", "float64_col", "int64_too", "string_col"]
-    r_cols = ["int64_too", "float64_col", "bool_col"]
-
-    with pytest.raises(NotImplementedError):
-        scalars_df[l_cols].corrwith(scalars_df[r_cols], numeric_only=False)
-
-
-def test_df_corrwith_series(scalars_dfs_maybe_ordered):
-    # TODO: supply a reason why this isn't compatible with pandas 1.x
-    pytest.importorskip("pandas", minversion="2.0.0")
-    scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
-
-    l_cols = ["int64_col", "float64_col", "int64_too"]
-    r_col = "float64_col"
-
-    bf_result = scalars_df[l_cols].corrwith(scalars_df[r_col]).to_pandas()
-    pd_result = scalars_pandas_df[l_cols].corrwith(scalars_pandas_df[r_col])
-
-    # BigFrames and Pandas differ in their data type handling:
-    # - Column types: BigFrames uses Float64, Pandas uses float64.
-    # - Index types: BigFrames uses strign, Pandas uses object.
-    pd.testing.assert_series_equal(
-        bf_result, pd_result, check_dtype=False, check_index_type=False
-    )
-
-
-@pytest.mark.parametrize(
-    ("op"),
-    [
-        operator.add,
-        operator.sub,
-        operator.mul,
-        operator.truediv,
-        operator.floordiv,
-        operator.eq,
-        operator.ne,
-        operator.gt,
-        operator.ge,
-        operator.lt,
-        operator.le,
-    ],
-    ids=[
-        "add",
-        "subtract",
-        "multiply",
-        "true_divide",
-        "floor_divide",
-        "eq",
-        "ne",
-        "gt",
-        "ge",
-        "lt",
-        "le",
-    ],
-)
-# TODO(garrettwu): deal with NA values
-@pytest.mark.parametrize(("other_scalar"), [1, 2.5, 0, 0.0])
-@pytest.mark.parametrize(("reverse_operands"), [True, False])
-def test_scalar_binop(scalars_dfs, op, other_scalar, reverse_operands):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    columns = ["int64_col", "float64_col"]
-
-    maybe_reversed_op = (lambda x, y: op(y, x)) if reverse_operands else op
-
-    bf_result = maybe_reversed_op(scalars_df[columns], other_scalar).to_pandas()
-    pd_result = maybe_reversed_op(scalars_pandas_df[columns], other_scalar)
-
-    assert_pandas_df_equal(bf_result, pd_result)
-
-
-def test_dataframe_string_radd_const(scalars_dfs):
-    pytest.importorskip(
-        "pandas",
-        minversion="2.0.0",
-        reason="PyArrow string addition requires pandas 2.0+",
-    )
-
-    scalars_df, scalars_pandas_df = scalars_dfs
-    columns = ["string_col", "string_col"]
-
-    bf_result = ("prefix" + scalars_df[columns]).to_pandas()
-    pd_result = "prefix" + scalars_pandas_df[columns]
-
-    assert_pandas_df_equal(bf_result, pd_result)
-
-
-@pytest.mark.parametrize(("other_scalar"), [1, -2])
-def test_mod(scalars_dfs, other_scalar):
-    # Zero case excluded as pandas produces 0 result for Int64 inputs rather than NA/NaN.
-    # This is likely a pandas bug as mod 0 is undefined in other dtypes, and most programming languages.
-    scalars_df, scalars_pandas_df = scalars_dfs
-
-    bf_result = (scalars_df[["int64_col", "int64_too"]] % other_scalar).to_pandas()
-    pd_result = scalars_pandas_df[["int64_col", "int64_too"]] % other_scalar
-
-    assert_pandas_df_equal(bf_result, pd_result)
-
-
-def test_scalar_binop_str_exception(scalars_dfs):
-    scalars_df, _ = scalars_dfs
-    columns = ["string_col"]
-    with pytest.raises(TypeError, match="Cannot add dtypes"):
-        (scalars_df[columns] + 1).to_pandas()
-
-
-@pytest.mark.parametrize(
-    ("op"),
-    [
-        (lambda x, y: x.add(y, axis="index")),
-        (lambda x, y: x.radd(y, axis="index")),
-        (lambda x, y: x.sub(y, axis="index")),
-        (lambda x, y: x.rsub(y, axis="index")),
-        (lambda x, y: x.mul(y, axis="index")),
-        (lambda x, y: x.rmul(y, axis="index")),
-        (lambda x, y: x.truediv(y, axis="index")),
-        (lambda x, y: x.rtruediv(y, axis="index")),
-        (lambda x, y: x.floordiv(y, axis="index")),
-        (lambda x, y: x.floordiv(y, axis="index")),
-        (lambda x, y: x.gt(y, axis="index")),
-        (lambda x, y: x.ge(y, axis="index")),
-        (lambda x, y: x.lt(y, axis="index")),
-        (lambda x, y: x.le(y, axis="index")),
-    ],
-    ids=[
-        "add",
-        "radd",
-        "sub",
-        "rsub",
-        "mul",
-        "rmul",
-        "truediv",
-        "rtruediv",
-        "floordiv",
-        "rfloordiv",
-        "gt",
-        "ge",
-        "lt",
-        "le",
-    ],
-)
-def test_series_binop_axis_index(
-    scalars_dfs,
-    op,
-):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    df_columns = ["int64_col", "float64_col"]
-    series_column = "int64_too"
-
-    bf_result = op(scalars_df[df_columns], scalars_df[series_column]).to_pandas()
-    pd_result = op(scalars_pandas_df[df_columns], scalars_pandas_df[series_column])
-
-    assert_pandas_df_equal(bf_result, pd_result)
-
-
-@pytest.mark.parametrize(
-    ("input"),
-    [
-        ((1000, 2000, 3000)),
-        (pd.Index([1000, 2000, 3000])),
-        (pd.Series((1000, 2000), index=["int64_too", "float64_col"])),
-    ],
-    ids=[
-        "tuple",
-        "pd_index",
-        "pd_series",
-    ],
-)
-def test_listlike_binop_axis_1_in_memory_data(scalars_dfs, input):
-    # TODO: supply a reason why this isn't compatible with pandas 1.x
-    pytest.importorskip("pandas", minversion="2.0.0")
-    scalars_df, scalars_pandas_df = scalars_dfs
-
-    df_columns = ["int64_col", "float64_col", "int64_too"]
-
-    bf_result = scalars_df[df_columns].add(input, axis=1).to_pandas()
-    if hasattr(input, "to_pandas"):
-        input = input.to_pandas()
-    pd_result = scalars_pandas_df[df_columns].add(input, axis=1)
-
-    assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)
-
-
-def test_df_reverse_binop_pandas(scalars_dfs):
-    # TODO: supply a reason why this isn't compatible with pandas 1.x
-    pytest.importorskip("pandas", minversion="2.0.0")
-    scalars_df, scalars_pandas_df = scalars_dfs
-
-    pd_series = pd.Series([100, 200, 300])
-
-    df_columns = ["int64_col", "float64_col", "int64_too"]
-
-    bf_result = pd_series + scalars_df[df_columns].to_pandas()
-    pd_result = pd_series + scalars_pandas_df[df_columns]
-
-    assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)
-
-
-def test_listlike_binop_axis_1_bf_index(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-
-    df_columns = ["int64_col", "float64_col", "int64_too"]
-
-    bf_result = (
-        scalars_df[df_columns]
-        .add(bf_indexes.Index([1000, 2000, 3000]), axis=1)
-        .to_pandas()
-    )
-    pd_result = scalars_pandas_df[df_columns].add(pd.Index([1000, 2000, 3000]), axis=1)
-
-    assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)
-
-
-def test_binop_with_self_aggregate(scalars_dfs_maybe_ordered):
-    scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
-
-    df_columns = ["int64_col", "float64_col", "int64_too"]
-
-    # Ensure that this takes the optimized single-query path by counting executions
-    execution_count_before = scalars_df._session._metrics.execution_count
-    bf_df = scalars_df[df_columns]
-    bf_result = (bf_df - bf_df.mean()).to_pandas()
-    execution_count_after = scalars_df._session._metrics.execution_count
-
-    pd_df = scalars_pandas_df[df_columns]
-    pd_result = pd_df - pd_df.mean()
-
-    executions = execution_count_after - execution_count_before
-
-    assert executions == 1
-    assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)
-
-
-def test_binop_with_self_aggregate_w_index_reset(scalars_dfs_maybe_ordered):
-    scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
-
-    df_columns = ["int64_col", "float64_col", "int64_too"]
-
-    # Ensure that this takes the optimized single-query path by counting executions
-    execution_count_before = scalars_df._session._metrics.execution_count
-    bf_df = scalars_df[df_columns].reset_index(drop=True)
-    bf_result = (bf_df - bf_df.mean()).to_pandas()
-    execution_count_after = scalars_df._session._metrics.execution_count
-
-    pd_df = scalars_pandas_df[df_columns].reset_index(drop=True)
-    pd_result = pd_df - pd_df.mean()
-
-    executions = execution_count_after - execution_count_before
-
-    assert executions == 1
-    pd_result.index = pd_result.index.astype("Int64")
-    assert_pandas_df_equal(
-        bf_result, pd_result, check_dtype=False, check_index_type=False
-    )
-
-
-@pytest.mark.parametrize(
-    ("left_labels", "right_labels"),
-    [
-        (["a", "a", "b"], ["c", "c", "d"]),
-        (["a", "b", "c"], ["c", "a", "b"]),
-        (["a", "c", "c"], ["c", "a", "c"]),
-        (["a", "b", "c"], ["a", "b", "c"]),
-    ],
-    ids=[
-        "no_overlap",
-        "one_one_match",
-        "multi_match",
-        "exact_match",
-    ],
-)
-def test_binop_df_df_binary_op(
-    scalars_df_index,
-    scalars_df_2_index,
-    scalars_pandas_df_index,
-    left_labels,
-    right_labels,
-):
-    if pd.__version__.startswith("1."):
-        pytest.skip("pd.NA vs NaN not handled well in pandas 1.x.")
-    columns = ["int64_too", "int64_col", "float64_col"]
-
-    bf_df_a = scalars_df_index[columns]
-    bf_df_a.columns = left_labels
-    bf_df_b = scalars_df_2_index[columns]
-    bf_df_b.columns = right_labels
-    bf_result = (bf_df_a - bf_df_b).to_pandas()
-
-    pd_df_a = scalars_pandas_df_index[columns]
-    pd_df_a.columns = left_labels
-    pd_df_b = scalars_pandas_df_index[columns]
-    pd_df_b.columns = right_labels
-    pd_result = pd_df_a - pd_df_b
-
-    # Some dtype inconsistency for all-NULL columns
-    pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
-
-
-# Differnt table will only work for explicit index, since default index orders are arbitrary.
-@pytest.mark.parametrize(
-    ("ordered"),
-    [
-        (True),
-        (False),
-    ],
-)
-def test_series_binop_add_different_table(
-    scalars_df_index, scalars_pandas_df_index, scalars_df_2_index, ordered
-):
-    df_columns = ["int64_col", "float64_col"]
-    series_column = "int64_too"
-
-    bf_result = (
-        scalars_df_index[df_columns]
-        .add(scalars_df_2_index[series_column], axis="index")
-        .to_pandas(ordered=ordered)
-    )
-    pd_result = scalars_pandas_df_index[df_columns].add(
-        scalars_pandas_df_index[series_column], axis="index"
-    )
-
-    assert_pandas_df_equal(bf_result, pd_result, ignore_order=not ordered)
-
-
-# TODO(garrettwu): Test series binop with different index
-
-all_joins = pytest.mark.parametrize(
-    ("how",),
-    (("outer",), ("left",), ("right",), ("inner",), ("cross",)),
-)
-
-
-@all_joins
-def test_join_same_table(scalars_dfs_maybe_ordered, how):
-    bf_df, pd_df = scalars_dfs_maybe_ordered
-
-    bf_df_a = bf_df.set_index("int64_too")[["string_col", "int64_col"]]
-    bf_df_a = bf_df_a.sort_index()
-
-    bf_df_b = bf_df.set_index("int64_too")[["float64_col"]]
-    bf_df_b = bf_df_b[bf_df_b.float64_col > 0]
-    bf_df_b = bf_df_b.sort_values("float64_col")
-
-    bf_result = bf_df_a.join(bf_df_b, how=how).to_pandas()
-
-    pd_df_a = pd_df.set_index("int64_too")[["string_col", "int64_col"]].sort_index()
-    pd_df_a = pd_df_a.sort_index()
-
-    pd_df_b = pd_df.set_index("int64_too")[["float64_col"]]
-    pd_df_b = pd_df_b[pd_df_b.float64_col > 0]
-    pd_df_b = pd_df_b.sort_values("float64_col")
-
-    pd_result = pd_df_a.join(pd_df_b, how=how)
-
-    assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
-
-
-def test_join_incompatible_key_type_error(scalars_dfs):
-    bf_df, _ = scalars_dfs
-
-    bf_df_a = bf_df.set_index("int64_too")[["string_col", "int64_col"]]
-    bf_df_a = bf_df_a.sort_index()
-
-    bf_df_b = bf_df.set_index("date_col")[["float64_col"]]
-    bf_df_b = bf_df_b[bf_df_b.float64_col > 0]
-    bf_df_b = bf_df_b.sort_values("float64_col")
-
-    with pytest.raises(TypeError):
-        # joining incompatible date, int columns
-        bf_df_a.join(bf_df_b, how="left")
-
-
-@all_joins
-def test_join_different_table(
-    scalars_df_index, scalars_df_2_index, scalars_pandas_df_index, how
-):
-    bf_df_a = scalars_df_index[["string_col", "int64_col"]]
-    bf_df_b = scalars_df_2_index.dropna()[["float64_col"]]
-    bf_result = bf_df_a.join(bf_df_b, how=how).to_pandas()
-    pd_df_a = scalars_pandas_df_index[["string_col", "int64_col"]]
-    pd_df_b = scalars_pandas_df_index.dropna()[["float64_col"]]
-    pd_result = pd_df_a.join(pd_df_b, how=how)
-    assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
-
-
-@all_joins
-def test_join_different_table_with_duplicate_column_name(
-    scalars_df_index, scalars_pandas_df_index, how
-):
-    bf_df_a = scalars_df_index[["string_col", "int64_col", "int64_too"]].rename(
-        columns={"int64_too": "int64_col"}
-    )
-    bf_df_b = scalars_df_index.dropna()[
-        ["string_col", "int64_col", "int64_too"]
-    ].rename(columns={"int64_too": "int64_col"})
-    bf_result = bf_df_a.join(bf_df_b, how=how, lsuffix="_l", rsuffix="_r").to_pandas()
-    pd_df_a = scalars_pandas_df_index[["string_col", "int64_col", "int64_too"]].rename(
-        columns={"int64_too": "int64_col"}
-    )
-    pd_df_b = scalars_pandas_df_index.dropna()[
-        ["string_col", "int64_col", "int64_too"]
-    ].rename(columns={"int64_too": "int64_col"})
-    pd_result = pd_df_a.join(pd_df_b, how=how, lsuffix="_l", rsuffix="_r")
-
-    # Ensure no inplace changes
-    pd.testing.assert_index_equal(bf_df_a.columns, pd_df_a.columns)
-    pd.testing.assert_index_equal(bf_df_b.index.to_pandas(), pd_df_b.index)
-    pd.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False)
-
-
-@all_joins
-def test_join_param_on_with_duplicate_column_name_not_on_col(
-    scalars_df_index, scalars_pandas_df_index, how
-):
-    # This test is for duplicate column names, but the 'on' column is not duplicated.
-    if how == "cross":
-        return
-    bf_df_a = scalars_df_index[
-        ["string_col", "datetime_col", "timestamp_col", "int64_too"]
-    ].rename(columns={"timestamp_col": "datetime_col"})
-    bf_df_b = scalars_df_index.dropna()[
-        ["string_col", "datetime_col", "timestamp_col"]
-    ].rename(columns={"timestamp_col": "datetime_col"})
-    bf_result = bf_df_a.join(
-        bf_df_b, on="int64_too", how=how, lsuffix="_l", rsuffix="_r"
-    ).to_pandas()
-    pd_df_a = scalars_pandas_df_index[
-        ["string_col", "datetime_col", "timestamp_col", "int64_too"]
-    ].rename(columns={"timestamp_col": "datetime_col"})
-    pd_df_b = scalars_pandas_df_index.dropna()[
-        ["string_col", "datetime_col", "timestamp_col"]
-    ].rename(columns={"timestamp_col": "datetime_col"})
-    pd_result = pd_df_a.join(
-        pd_df_b, on="int64_too", how=how, lsuffix="_l", rsuffix="_r"
-    )
-    pd.testing.assert_frame_equal(
-        bf_result.sort_index(),
-        pd_result.sort_index(),
-        check_like=True,
-        check_index_type=False,
-        check_names=False,
-    )
-    pd.testing.assert_index_equal(bf_result.columns, pd_result.columns)
-
-
-@pytest.mark.skipif(
-    pandas.__version__.startswith("1."), reason="bad left join in pandas 1.x"
-)
-@all_joins
-def test_join_param_on_with_duplicate_column_name_on_col(
-    scalars_df_index, scalars_pandas_df_index, how
-):
-    # This test is for duplicate column names, and the 'on' column is duplicated.
-    if how == "cross":
-        return
-    bf_df_a = scalars_df_index[
-        ["string_col", "datetime_col", "timestamp_col", "int64_too"]
-    ].rename(columns={"timestamp_col": "datetime_col"})
-    bf_df_b = scalars_df_index.dropna()[
-        ["string_col", "datetime_col", "timestamp_col", "int64_too"]
-    ].rename(columns={"timestamp_col": "datetime_col"})
-    bf_result = bf_df_a.join(
-        bf_df_b, on="int64_too", how=how, lsuffix="_l", rsuffix="_r"
-    ).to_pandas()
-    pd_df_a = scalars_pandas_df_index[
-        ["string_col", "datetime_col", "timestamp_col", "int64_too"]
-    ].rename(columns={"timestamp_col": "datetime_col"})
-    pd_df_b = scalars_pandas_df_index.dropna()[
-        ["string_col", "datetime_col", "timestamp_col", "int64_too"]
-    ].rename(columns={"timestamp_col": "datetime_col"})
-    pd_result = pd_df_a.join(
-        pd_df_b, on="int64_too", how=how, lsuffix="_l", rsuffix="_r"
-    )
-    pd.testing.assert_frame_equal(
-        bf_result.sort_index(),
-        pd_result.sort_index(),
-        check_like=True,
-        check_index_type=False,
-        check_names=False,
-    )
-    pd.testing.assert_index_equal(bf_result.columns, pd_result.columns)
-
-
-@all_joins
-def test_join_param_on(scalars_dfs, how):
-    bf_df, pd_df = scalars_dfs
-
-    bf_df_a = bf_df[["string_col", "int64_col", "rowindex_2"]]
-    bf_df_a = bf_df_a.assign(rowindex_2=bf_df_a["rowindex_2"] + 2)
-    bf_df_b = bf_df[["float64_col"]]
-
-    if how == "cross":
-        with pytest.raises(ValueError):
-            bf_df_a.join(bf_df_b, on="rowindex_2", how=how)
-    else:
-        bf_result = bf_df_a.join(bf_df_b, on="rowindex_2", how=how).to_pandas()
-
-        pd_df_a = pd_df[["string_col", "int64_col", "rowindex_2"]]
-        pd_df_a = pd_df_a.assign(rowindex_2=pd_df_a["rowindex_2"] + 2)
-        pd_df_b = pd_df[["float64_col"]]
-        pd_result = pd_df_a.join(pd_df_b, on="rowindex_2", how=how)
-        assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
-
-
-@all_joins
-def test_df_join_series(scalars_dfs, how):
-    bf_df, pd_df = scalars_dfs
-
-    bf_df_a = bf_df[["string_col", "int64_col", "rowindex_2"]]
-    bf_df_a = bf_df_a.assign(rowindex_2=bf_df_a["rowindex_2"] + 2)
-    bf_series_b = bf_df["float64_col"]
-
-    if how == "cross":
-        with pytest.raises(ValueError):
-            bf_df_a.join(bf_series_b, on="rowindex_2", how=how)
-    else:
-        bf_result = bf_df_a.join(bf_series_b, on="rowindex_2", how=how).to_pandas()
-
-        pd_df_a = pd_df[["string_col", "int64_col", "rowindex_2"]]
-        pd_df_a = pd_df_a.assign(rowindex_2=pd_df_a["rowindex_2"] + 2)
-        pd_series_b = pd_df["float64_col"]
-        pd_result = pd_df_a.join(pd_series_b, on="rowindex_2", how=how)
-        assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
-
-
-@pytest.mark.parametrize(
-    ("by", "ascending", "na_position"),
-    [
-        ("int64_col", True, "first"),
-        (["bool_col", "int64_col"], True, "last"),
-        ("int64_col", False, "first"),
-        (["bool_col", "int64_col"], [False, True], "last"),
-        (["bool_col", "int64_col"], [True, False], "first"),
-    ],
-)
-def test_dataframe_sort_values(
-    scalars_df_index, scalars_pandas_df_index, by, ascending, na_position
-):
-    # Test needs values to be unique
-    bf_result = scalars_df_index.sort_values(
-        by, ascending=ascending, na_position=na_position
-    ).to_pandas()
-    pd_result = scalars_pandas_df_index.sort_values(
-        by, ascending=ascending, na_position=na_position
-    )
-
-    pandas.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-@pytest.mark.parametrize(
-    ("by", "ascending", "na_position"),
-    [
-        ("int64_col", True, "first"),
-        (["bool_col", "int64_col"], True, "last"),
-    ],
-)
-def test_dataframe_sort_values_inplace(
-    scalars_df_index, scalars_pandas_df_index, by, ascending, na_position
-):
-    # Test needs values to be unique
-    bf_sorted = scalars_df_index.copy()
-    bf_sorted.sort_values(
-        by, ascending=ascending, na_position=na_position, inplace=True
-    )
-    bf_result = bf_sorted.to_pandas()
-    pd_result = scalars_pandas_df_index.sort_values(
-        by, ascending=ascending, na_position=na_position
-    )
-
-    pandas.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-def test_dataframe_sort_values_invalid_input(scalars_df_index):
-    with pytest.raises(KeyError):
-        scalars_df_index.sort_values(by=scalars_df_index["int64_col"])
-
-
-def test_dataframe_sort_values_stable(scalars_df_index, scalars_pandas_df_index):
-    bf_result = (
-        scalars_df_index.sort_values("int64_col", kind="stable")
-        .sort_values("bool_col", kind="stable")
-        .to_pandas()
-    )
-    pd_result = scalars_pandas_df_index.sort_values(
-        "int64_col", kind="stable"
-    ).sort_values("bool_col", kind="stable")
-
-    pandas.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-@pytest.mark.parametrize(
-    ("operator", "columns"),
-    [
-        pytest.param(lambda x: x.cumsum(), ["float64_col", "int64_too"]),
-        pytest.param(lambda x: x.cumprod(), ["float64_col", "int64_too"]),
-        pytest.param(
-            lambda x: x.cumprod(),
-            ["string_col"],
-            marks=pytest.mark.xfail(
-                raises=ValueError,
-            ),
-        ),
-    ],
-    ids=[
-        "cumsum",
-        "cumprod",
-        "non-numeric",
-    ],
-)
-def test_dataframe_numeric_analytic_op(
-    scalars_df_index, scalars_pandas_df_index, operator, columns
-):
-    # TODO: Add nullable ints (pandas 1.x has poor behavior on these)
-    bf_series = operator(scalars_df_index[columns])
-    pd_series = operator(scalars_pandas_df_index[columns])
-    bf_result = bf_series.to_pandas()
-    pd.testing.assert_frame_equal(pd_series, bf_result, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    ("operator"),
-    [
-        (lambda x: x.cummin()),
-        (lambda x: x.cummax()),
-        (lambda x: x.shift(2)),
-        (lambda x: x.shift(-2)),
-    ],
-    ids=[
-        "cummin",
-        "cummax",
-        "shiftpostive",
-        "shiftnegative",
-    ],
-)
-def test_dataframe_general_analytic_op(
-    scalars_df_index, scalars_pandas_df_index, operator
-):
-    col_names = ["int64_too", "float64_col", "int64_col", "bool_col"]
-    bf_series = operator(scalars_df_index[col_names])
-    pd_series = operator(scalars_pandas_df_index[col_names])
-    bf_result = bf_series.to_pandas()
-    pd.testing.assert_frame_equal(
-        pd_series,
-        bf_result,
-    )
-
-
-@pytest.mark.parametrize(
-    ("periods",),
-    [
-        (1,),
-        (2,),
-        (-1,),
-    ],
-)
-def test_dataframe_diff(scalars_df_index, scalars_pandas_df_index, periods):
-    col_names = ["int64_too", "float64_col", "int64_col"]
-    bf_result = scalars_df_index[col_names].diff(periods=periods).to_pandas()
-    pd_result = scalars_pandas_df_index[col_names].diff(periods=periods)
-    pd.testing.assert_frame_equal(
-        pd_result,
-        bf_result,
-    )
-
-
-@pytest.mark.parametrize(
-    ("periods",),
-    [
-        (1,),
-        (2,),
-        (-1,),
-    ],
-)
-def test_dataframe_pct_change(scalars_df_index, scalars_pandas_df_index, periods):
-    col_names = ["int64_too", "float64_col", "int64_col"]
-    bf_result = scalars_df_index[col_names].pct_change(periods=periods).to_pandas()
-    pd_result = scalars_pandas_df_index[col_names].pct_change(periods=periods)
-    pd.testing.assert_frame_equal(
-        pd_result,
-        bf_result,
-    )
-
-
-def test_dataframe_agg_single_string(scalars_dfs):
-    numeric_cols = ["int64_col", "int64_too", "float64_col"]
-    scalars_df, scalars_pandas_df = scalars_dfs
-
-    bf_result = scalars_df[numeric_cols].agg("sum").to_pandas()
-    pd_result = scalars_pandas_df[numeric_cols].agg("sum")
-
-    assert bf_result.dtype == "Float64"
-    pd.testing.assert_series_equal(
-        pd_result, bf_result, check_dtype=False, check_index_type=False
-    )
-
-
-@pytest.mark.parametrize(
-    ("agg",),
-    (
-        ("sum",),
-        ("size",),
-    ),
-)
-def test_dataframe_agg_int_single_string(scalars_dfs, agg):
-    numeric_cols = ["int64_col", "int64_too", "bool_col"]
-    scalars_df, scalars_pandas_df = scalars_dfs
-
-    bf_result = scalars_df[numeric_cols].agg(agg).to_pandas()
-    pd_result = scalars_pandas_df[numeric_cols].agg(agg)
-
-    assert bf_result.dtype == "Int64"
-    pd.testing.assert_series_equal(
-        pd_result, bf_result, check_dtype=False, check_index_type=False
-    )
-
-
-def test_dataframe_agg_multi_string(scalars_dfs_maybe_ordered):
-    numeric_cols = ["int64_col", "int64_too", "float64_col"]
-    aggregations = [
-        "sum",
-        "mean",
-        "median",
-        "std",
-        "var",
-        "min",
-        "max",
-        "nunique",
-        "count",
-    ]
-    scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
-    bf_result = scalars_df[numeric_cols].agg(aggregations)
-    pd_result = scalars_pandas_df[numeric_cols].agg(aggregations)
-
-    # Pandas may produce narrower numeric types, but bigframes always produces Float64
-    pd_result = pd_result.astype("Float64")
-
-    # Drop median, as it's an approximation.
-    bf_median = bf_result.loc["median", :]
-    bf_result = bf_result.drop(labels=["median"])
-    pd_result = pd_result.drop(labels=["median"])
-
-    assert_dfs_equivalent(pd_result, bf_result, check_index_type=False)
-
-    # Double-check that median is at least plausible.
-    assert (
-        (bf_result.loc["min", :] <= bf_median) & (bf_median <= bf_result.loc["max", :])
-    ).all()
-
-
-def test_dataframe_agg_int_multi_string(scalars_dfs):
-    numeric_cols = ["int64_col", "int64_too", "bool_col"]
-    aggregations = [
-        "sum",
-        "nunique",
-        "count",
-        "size",
-    ]
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = scalars_df[numeric_cols].agg(aggregations).to_pandas()
-    pd_result = scalars_pandas_df[numeric_cols].agg(aggregations)
-
-    for dtype in bf_result.dtypes:
-        assert dtype == "Int64"
-
-    # Pandas may produce narrower numeric types
-    # Pandas has object index type
-    pd.testing.assert_frame_equal(
-        pd_result, bf_result, check_dtype=False, check_index_type=False
-    )
-
-
-def test_df_transpose():
-    # Include some floats to ensure type coercion
-    values = [[0, 3.5, True], [1, 4.5, False], [2, 6.5, None]]
-    # Test complex case of both axes being multi-indices with non-unique elements
-
-    columns: pandas.Index = pd.Index(
-        ["A", "B", "A"], dtype=pd.StringDtype(storage="pyarrow")
-    )
-    columns_multi = pd.MultiIndex.from_arrays([columns, columns], names=["c1", "c2"])
-
-    index: pandas.Index = pd.Index(
-        ["b", "a", "a"], dtype=pd.StringDtype(storage="pyarrow")
-    )
-    rows_multi = pd.MultiIndex.from_arrays([index, index], names=["r1", "r2"])
-
-    pd_df = pandas.DataFrame(values, index=rows_multi, columns=columns_multi)
-    bf_df = dataframe.DataFrame(values, index=rows_multi, columns=columns_multi)
-
-    pd_result = pd_df.T
-    bf_result = bf_df.T.to_pandas()
-
-    pd.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False)
-
-
-def test_df_transpose_error():
-    with pytest.raises(TypeError, match="Cannot coerce.*to a common type."):
-        dataframe.DataFrame([[1, "hello"], [2, "world"]]).transpose()
-
-
-def test_df_transpose_repeated_uses_cache():
-    bf_df = dataframe.DataFrame([[1, 2.5], [2, 3.5]])
-    pd_df = pandas.DataFrame([[1, 2.5], [2, 3.5]])
-    # Transposing many times so that operation will fail from complexity if not using cache
-    for i in range(10):
-        # Cache still works even with simple scalar binop
-        bf_df = bf_df.transpose() + i
-        pd_df = pd_df.transpose() + i
-
-    pd.testing.assert_frame_equal(
-        pd_df, bf_df.to_pandas(), check_dtype=False, check_index_type=False
-    )
-
-
-@pytest.mark.parametrize(
-    ("ordered"),
-    [
-        (True),
-        (False),
-    ],
-)
-def test_df_stack(scalars_dfs, ordered):
-    if pandas.__version__.startswith("1.") or pandas.__version__.startswith("2.0"):
-        pytest.skip("pandas <2.1 uses different stack implementation")
-    scalars_df, scalars_pandas_df = scalars_dfs
-    # To match bigquery dataframes
-    scalars_pandas_df = scalars_pandas_df.copy()
-    scalars_pandas_df.columns = scalars_pandas_df.columns.astype("string[pyarrow]")
-    # Can only stack identically-typed columns
-    columns = ["int64_col", "int64_too", "rowindex_2"]
-
-    bf_result = scalars_df[columns].stack().to_pandas(ordered=ordered)
-    pd_result = scalars_pandas_df[columns].stack(future_stack=True)
-
-    # Pandas produces NaN, where bq dataframes produces pd.NA
-    assert_series_equal(
-        bf_result, pd_result, check_dtype=False, ignore_order=not ordered
-    )
-
-
-def test_df_melt_default(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    # To match bigquery dataframes
-    scalars_pandas_df = scalars_pandas_df.copy()
-    scalars_pandas_df.columns = scalars_pandas_df.columns.astype("string[pyarrow]")
-    # Can only stack identically-typed columns
-    columns = ["int64_col", "int64_too", "rowindex_2"]
-
-    bf_result = scalars_df[columns].melt().to_pandas()
-    pd_result = scalars_pandas_df[columns].melt()
-
-    # Pandas produces int64 index, Bigframes produces Int64 (nullable)
-    pd.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-        check_index_type=False,
-        check_dtype=False,
-    )
-
-
-def test_df_melt_parameterized(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    # To match bigquery dataframes
-    scalars_pandas_df = scalars_pandas_df.copy()
-    scalars_pandas_df.columns = scalars_pandas_df.columns.astype("string[pyarrow]")
-    # Can only stack identically-typed columns
-
-    bf_result = scalars_df.melt(
-        var_name="alice",
-        value_name="bob",
-        id_vars=["string_col"],
-        value_vars=["int64_col", "int64_too"],
-    ).to_pandas()
-    pd_result = scalars_pandas_df.melt(
-        var_name="alice",
-        value_name="bob",
-        id_vars=["string_col"],
-        value_vars=["int64_col", "int64_too"],
-    )
-
-    # Pandas produces int64 index, Bigframes produces Int64 (nullable)
-    pd.testing.assert_frame_equal(
-        bf_result, pd_result, check_index_type=False, check_dtype=False
-    )
-
-
-@pytest.mark.parametrize(
-    ("ordered"),
-    [
-        (True),
-        (False),
-    ],
-)
-def test_df_unstack(scalars_dfs, ordered):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    # To match bigquery dataframes
-    scalars_pandas_df = scalars_pandas_df.copy()
-    scalars_pandas_df.columns = scalars_pandas_df.columns.astype("string[pyarrow]")
-    # Can only stack identically-typed columns
-    columns = [
-        "rowindex_2",
-        "int64_col",
-        "int64_too",
-    ]
-
-    # unstack on mono-index produces series
-    bf_result = scalars_df[columns].unstack().to_pandas(ordered=ordered)
-    pd_result = scalars_pandas_df[columns].unstack()
-
-    # Pandas produces NaN, where bq dataframes produces pd.NA
-    assert_series_equal(
-        bf_result, pd_result, check_dtype=False, ignore_order=not ordered
-    )
-
-
-@pytest.mark.parametrize(
-    ("values", "index", "columns"),
-    [
-        ("int64_col", "int64_too", ["string_col"]),
-        (["int64_col"], "int64_too", ["string_col"]),
-        (["int64_col", "float64_col"], "int64_too", ["string_col"]),
-    ],
-)
-def test_df_pivot(scalars_dfs, values, index, columns):
-    scalars_df, scalars_pandas_df = scalars_dfs
-
-    bf_result = scalars_df.pivot(
-        values=values, index=index, columns=columns
-    ).to_pandas()
-    pd_result = scalars_pandas_df.pivot(values=values, index=index, columns=columns)
-
-    # Pandas produces NaN, where bq dataframes produces pd.NA
-    bf_result = bf_result.fillna(float("nan"))
-    pd_result = pd_result.fillna(float("nan"))
-    pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    ("values", "index", "columns"),
-    [
-        (["goals", "assists"], ["team_name", "season"], ["position"]),
-        (["goals", "assists"], ["season"], ["team_name", "position"]),
-    ],
-)
-def test_df_pivot_hockey(hockey_df, hockey_pandas_df, values, index, columns):
-    bf_result = (
-        hockey_df.reset_index()
-        .pivot(values=values, index=index, columns=columns)
-        .to_pandas()
-    )
-    pd_result = hockey_pandas_df.reset_index().pivot(
-        values=values, index=index, columns=columns
-    )
-
-    # Pandas produces NaN, where bq dataframes produces pd.NA
-    pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
-
-
-@pytest.mark.parametrize(
-    ("values", "index", "columns", "aggfunc"),
-    [
-        (("culmen_length_mm", "body_mass_g"), "species", "sex", "std"),
-        (["body_mass_g", "culmen_length_mm"], ("species", "island"), "sex", "sum"),
-        ("body_mass_g", "sex", ["island", "species"], "mean"),
-        ("culmen_depth_mm", "island", "species", "max"),
-    ],
-)
-def test_df_pivot_table(
-    penguins_df_default_index,
-    penguins_pandas_df_default_index,
-    values,
-    index,
-    columns,
-    aggfunc,
-):
-    bf_result = penguins_df_default_index.pivot_table(
-        values=values, index=index, columns=columns, aggfunc=aggfunc
-    ).to_pandas()
-    pd_result = penguins_pandas_df_default_index.pivot_table(
-        values=values, index=index, columns=columns, aggfunc=aggfunc
-    )
-    pd.testing.assert_frame_equal(
-        bf_result, pd_result, check_dtype=False, check_column_type=False
-    )
-
-
-def test_ipython_key_completions_with_drop(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    col_names = "string_col"
-    bf_dataframe = scalars_df.drop(columns=col_names)
-    pd_dataframe = scalars_pandas_df.drop(columns=col_names)
-    expected = pd_dataframe.columns.tolist()
-
-    results = bf_dataframe._ipython_key_completions_()
-
-    assert col_names not in results
-    assert results == expected
-    # _ipython_key_completions_ is called with square brackets
-    # so only column names are relevant with tab completion
-    assert "to_gbq" not in results
-    assert "merge" not in results
-    assert "drop" not in results
-
-
-def test_ipython_key_completions_with_rename(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    col_name_dict = {"string_col": "a_renamed_column"}
-    bf_dataframe = scalars_df.rename(columns=col_name_dict)
-    pd_dataframe = scalars_pandas_df.rename(columns=col_name_dict)
-    expected = pd_dataframe.columns.tolist()
-
-    results = bf_dataframe._ipython_key_completions_()
-
-    assert "string_col" not in results
-    assert "a_renamed_column" in results
-    assert results == expected
-    # _ipython_key_completions_ is called with square brackets
-    # so only column names are relevant with tab completion
-    assert "to_gbq" not in results
-    assert "merge" not in results
-    assert "drop" not in results
-
-
-def test__dir__with_drop(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    col_names = "string_col"
-    bf_dataframe = scalars_df.drop(columns=col_names)
-    pd_dataframe = scalars_pandas_df.drop(columns=col_names)
-    expected = pd_dataframe.columns.tolist()
-
-    results = dir(bf_dataframe)
-
-    assert col_names not in results
-    assert frozenset(expected) <= frozenset(results)
-    # __dir__ is called with a '.' and displays all methods, columns names, etc.
-    assert "to_gbq" in results
-    assert "merge" in results
-    assert "drop" in results
-
-
-def test__dir__with_rename(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    col_name_dict = {"string_col": "a_renamed_column"}
-    bf_dataframe = scalars_df.rename(columns=col_name_dict)
-    pd_dataframe = scalars_pandas_df.rename(columns=col_name_dict)
-    expected = pd_dataframe.columns.tolist()
-
-    results = dir(bf_dataframe)
-
-    assert "string_col" not in results
-    assert "a_renamed_column" in results
-    assert frozenset(expected) <= frozenset(results)
-    # __dir__ is called with a '.' and displays all methods, columns names, etc.
-    assert "to_gbq" in results
-    assert "merge" in results
-    assert "drop" in results
-
-
-def test_loc_select_columns_w_repeats(scalars_df_index, scalars_pandas_df_index):
-    bf_result = scalars_df_index[["int64_col", "int64_col", "int64_too"]].to_pandas()
-    pd_result = scalars_pandas_df_index[["int64_col", "int64_col", "int64_too"]]
-    pd.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-@pytest.mark.parametrize(
-    ("start", "stop", "step"),
-    [
-        (0, 0, None),
-        (None, None, None),
-        (1, None, None),
-        (None, 4, None),
-        (None, None, 2),
-        (None, 50000000000, 1),
-        (5, 4, None),
-        (3, None, 2),
-        (1, 7, 2),
-        (1, 7, 50000000000),
-    ],
-)
-def test_iloc_slice(scalars_df_index, scalars_pandas_df_index, start, stop, step):
-    bf_result = scalars_df_index.iloc[start:stop:step].to_pandas()
-    pd_result = scalars_pandas_df_index.iloc[start:stop:step]
-    pd.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-@pytest.mark.parametrize(
-    ("start", "stop", "step"),
-    [
-        (0, 0, None),
-    ],
-)
-def test_iloc_slice_after_cache(
-    scalars_df_index, scalars_pandas_df_index, start, stop, step
-):
-    scalars_df_index.cache()
-    bf_result = scalars_df_index.iloc[start:stop:step].to_pandas()
-    pd_result = scalars_pandas_df_index.iloc[start:stop:step]
-    pd.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-def test_iloc_slice_zero_step(scalars_df_index):
-    with pytest.raises(ValueError):
-        scalars_df_index.iloc[0:0:0]
-
-
-@pytest.mark.parametrize(
-    ("ordered"),
-    [
-        (True),
-        (False),
-    ],
-)
-def test_iloc_slice_nested(scalars_df_index, scalars_pandas_df_index, ordered):
-    bf_result = scalars_df_index.iloc[1:].iloc[1:].to_pandas(ordered=ordered)
-    pd_result = scalars_pandas_df_index.iloc[1:].iloc[1:]
-
-    assert_pandas_df_equal(bf_result, pd_result, ignore_order=not ordered)
-
-
-@pytest.mark.parametrize(
-    "index",
-    [0, 5, -2, (2,)],
-)
-def test_iloc_single_integer(scalars_df_index, scalars_pandas_df_index, index):
-    bf_result = scalars_df_index.iloc[index]
-    pd_result = scalars_pandas_df_index.iloc[index]
-
-    pd.testing.assert_series_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-@pytest.mark.parametrize(
-    "index",
-    [(2, 5), (5, 0), (0, 0)],
-)
-def test_iloc_tuple(scalars_df_index, scalars_pandas_df_index, index):
-    bf_result = scalars_df_index.iloc[index]
-    pd_result = scalars_pandas_df_index.iloc[index]
-
-    assert bf_result == pd_result
-
-
-@pytest.mark.parametrize(
-    "index",
-    [(slice(None), [1, 2, 3]), (slice(1, 7, 2), [2, 5, 3])],
-)
-def test_iloc_tuple_multi_columns(scalars_df_index, scalars_pandas_df_index, index):
-    bf_result = scalars_df_index.iloc[index].to_pandas()
-    pd_result = scalars_pandas_df_index.iloc[index]
-
-    pd.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_iloc_tuple_multi_columns_single_row(scalars_df_index, scalars_pandas_df_index):
-    index = (2, [2, 1, 3, -4])
-    bf_result = scalars_df_index.iloc[index]
-    pd_result = scalars_pandas_df_index.iloc[index]
-    pd.testing.assert_series_equal(bf_result, pd_result)
-
-
-@pytest.mark.parametrize(
-    ("index", "error"),
-    [
-        ((1, 1, 1), pd.errors.IndexingError),
-        (("asd", "asd", "asd"), pd.errors.IndexingError),
-        (("asd"), TypeError),
-    ],
-)
-def test_iloc_tuple_errors(scalars_df_index, scalars_pandas_df_index, index, error):
-    with pytest.raises(error):
-        scalars_df_index.iloc[index]
-    with pytest.raises(error):
-        scalars_pandas_df_index.iloc[index]
-
-
-@pytest.mark.parametrize(
-    "index",
-    [(2, 5), (5, 0), (0, 0)],
-)
-def test_iat(scalars_df_index, scalars_pandas_df_index, index):
-    bf_result = scalars_df_index.iat[index]
-    pd_result = scalars_pandas_df_index.iat[index]
-
-    assert bf_result == pd_result
-
-
-@pytest.mark.parametrize(
-    ("index", "error"),
-    [
-        (0, TypeError),
-        ("asd", ValueError),
-        ((1, 2, 3), TypeError),
-        (("asd", "asd"), ValueError),
-    ],
-)
-def test_iat_errors(scalars_df_index, scalars_pandas_df_index, index, error):
-    with pytest.raises(error):
-        scalars_pandas_df_index.iat[index]
-    with pytest.raises(error):
-        scalars_df_index.iat[index]
-
-
-def test_iloc_single_integer_out_of_bound_error(scalars_df_index):
-    with pytest.raises(IndexError, match="single positional indexer is out-of-bounds"):
-        scalars_df_index.iloc[99]
-
-
-def test_loc_bool_series(scalars_df_index, scalars_pandas_df_index):
-    bf_result = scalars_df_index.loc[scalars_df_index.bool_col].to_pandas()
-    pd_result = scalars_pandas_df_index.loc[scalars_pandas_df_index.bool_col]
-
-    pd.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-def test_loc_list_select_rows_and_columns(scalars_df_index, scalars_pandas_df_index):
-    idx_list = [0, 3, 5]
-    bf_result = scalars_df_index.loc[idx_list, ["bool_col", "int64_col"]].to_pandas()
-    pd_result = scalars_pandas_df_index.loc[idx_list, ["bool_col", "int64_col"]]
-
-    pd.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-def test_loc_select_column(scalars_df_index, scalars_pandas_df_index):
-    bf_result = scalars_df_index.loc[:, "int64_col"].to_pandas()
-    pd_result = scalars_pandas_df_index.loc[:, "int64_col"]
-    pd.testing.assert_series_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-def test_loc_select_with_column_condition(scalars_df_index, scalars_pandas_df_index):
-    bf_result = scalars_df_index.loc[:, scalars_df_index.dtypes == "Int64"].to_pandas()
-    pd_result = scalars_pandas_df_index.loc[
-        :, scalars_pandas_df_index.dtypes == "Int64"
-    ]
-    pd.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-def test_loc_select_with_column_condition_bf_series(
-    scalars_df_index, scalars_pandas_df_index
-):
-    # (b/347072677) GEOGRAPH type doesn't support DISTINCT op
-    columns = [
-        item for item in scalars_pandas_df_index.columns if item != "geography_col"
-    ]
-    scalars_df_index = scalars_df_index[columns]
-    scalars_pandas_df_index = scalars_pandas_df_index[columns]
-
-    size_half = len(scalars_pandas_df_index) / 2
-    bf_result = scalars_df_index.loc[
-        :, scalars_df_index.nunique() > size_half
-    ].to_pandas()
-    pd_result = scalars_pandas_df_index.loc[
-        :, scalars_pandas_df_index.nunique() > size_half
-    ]
-    pd.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-def test_loc_single_index_with_duplicate(scalars_df_index, scalars_pandas_df_index):
-    scalars_df_index = scalars_df_index.set_index("string_col", drop=False)
-    scalars_pandas_df_index = scalars_pandas_df_index.set_index(
-        "string_col", drop=False
-    )
-    index = "Hello, World!"
-    bf_result = scalars_df_index.loc[index]
-    pd_result = scalars_pandas_df_index.loc[index]
-    pd.testing.assert_frame_equal(
-        bf_result.to_pandas(),
-        pd_result,
-    )
-
-
-def test_loc_single_index_no_duplicate(scalars_df_index, scalars_pandas_df_index):
-    scalars_df_index = scalars_df_index.set_index("int64_too", drop=False)
-    scalars_pandas_df_index = scalars_pandas_df_index.set_index("int64_too", drop=False)
-    index = -2345
-    bf_result = scalars_df_index.loc[index]
-    pd_result = scalars_pandas_df_index.loc[index]
-    pd.testing.assert_series_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-def test_at_with_duplicate(scalars_df_index, scalars_pandas_df_index):
-    scalars_df_index = scalars_df_index.set_index("string_col", drop=False)
-    scalars_pandas_df_index = scalars_pandas_df_index.set_index(
-        "string_col", drop=False
-    )
-    index = "Hello, World!"
-    bf_result = scalars_df_index.at[index, "int64_too"]
-    pd_result = scalars_pandas_df_index.at[index, "int64_too"]
-    pd.testing.assert_series_equal(
-        bf_result.to_pandas(),
-        pd_result,
-    )
-
-
-def test_at_no_duplicate(scalars_df_index, scalars_pandas_df_index):
-    scalars_df_index = scalars_df_index.set_index("int64_too", drop=False)
-    scalars_pandas_df_index = scalars_pandas_df_index.set_index("int64_too", drop=False)
-    index = -2345
-    bf_result = scalars_df_index.at[index, "string_col"]
-    pd_result = scalars_pandas_df_index.at[index, "string_col"]
-    assert bf_result == pd_result
-
-
-def test_loc_setitem_bool_series_scalar_new_col(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_df = scalars_df.copy()
-    pd_df = scalars_pandas_df.copy()
-    bf_df.loc[bf_df["int64_too"] == 0, "new_col"] = 99
-    pd_df.loc[pd_df["int64_too"] == 0, "new_col"] = 99
-
-    # pandas uses float64 instead
-    pd_df["new_col"] = pd_df["new_col"].astype("Float64")
-
-    pd.testing.assert_frame_equal(
-        bf_df.to_pandas(),
-        pd_df,
-    )
-
-
-@pytest.mark.parametrize(
-    ("col", "value"),
-    [
-        ("string_col", "hello"),
-        ("int64_col", 3),
-        ("float64_col", 3.5),
-    ],
-)
-def test_loc_setitem_bool_series_scalar_existing_col(scalars_dfs, col, value):
-    if pd.__version__.startswith("1."):
-        pytest.skip("this loc overload not supported in pandas 1.x.")
-
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_df = scalars_df.copy()
-    pd_df = scalars_pandas_df.copy()
-    bf_df.loc[bf_df["int64_too"] == 1, col] = value
-    pd_df.loc[pd_df["int64_too"] == 1, col] = value
-
-    pd.testing.assert_frame_equal(
-        bf_df.to_pandas(),
-        pd_df,
-    )
-
-
-def test_loc_setitem_bool_series_scalar_error(scalars_dfs):
-    if pd.__version__.startswith("1."):
-        pytest.skip("this loc overload not supported in pandas 1.x.")
-
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_df = scalars_df.copy()
-    pd_df = scalars_pandas_df.copy()
-
-    with pytest.raises(Exception):
-        bf_df.loc[bf_df["int64_too"] == 1, "string_col"] = 99
-    with pytest.raises(Exception):
-        pd_df.loc[pd_df["int64_too"] == 1, "string_col"] = 99
-
-
-@pytest.mark.parametrize(
-    ("col", "op"),
-    [
-        # Int aggregates
-        pytest.param("int64_col", lambda x: x.sum(), id="int-sum"),
-        pytest.param("int64_col", lambda x: x.min(), id="int-min"),
-        pytest.param("int64_col", lambda x: x.max(), id="int-max"),
-        pytest.param("int64_col", lambda x: x.count(), id="int-count"),
-        pytest.param("int64_col", lambda x: x.nunique(), id="int-nunique"),
-        # Float aggregates
-        pytest.param("float64_col", lambda x: x.count(), id="float-count"),
-        pytest.param("float64_col", lambda x: x.nunique(), id="float-nunique"),
-        # Bool aggregates
-        pytest.param("bool_col", lambda x: x.sum(), id="bool-sum"),
-        pytest.param("bool_col", lambda x: x.count(), id="bool-count"),
-        pytest.param("bool_col", lambda x: x.nunique(), id="bool-nunique"),
-        # String aggregates
-        pytest.param("string_col", lambda x: x.count(), id="string-count"),
-        pytest.param("string_col", lambda x: x.nunique(), id="string-nunique"),
-    ],
-)
-def test_dataframe_aggregate_int(scalars_df_index, scalars_pandas_df_index, col, op):
-    bf_result = op(scalars_df_index[[col]]).to_pandas()
-    pd_result = op(scalars_pandas_df_index[[col]])
-
-    # Check dtype separately
-    assert bf_result.dtype == "Int64"
-    # Is otherwise "object" dtype
-    pd_result.index = pd_result.index.astype("string[pyarrow]")
-    # Pandas may produce narrower numeric types
-    assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False)
-
-
-@pytest.mark.parametrize(
-    ("col", "op"),
-    [
-        pytest.param("bool_col", lambda x: x.min(), id="bool-min"),
-        pytest.param("bool_col", lambda x: x.max(), id="bool-max"),
-    ],
-)
-def test_dataframe_aggregate_bool(scalars_df_index, scalars_pandas_df_index, col, op):
-    bf_result = op(scalars_df_index[[col]]).to_pandas()
-    pd_result = op(scalars_pandas_df_index[[col]])
-
-    # Check dtype separately
-    assert bf_result.dtype == "boolean"
-
-    # Pandas may produce narrower numeric types
-    # Pandas has object index type
-    pd_result.index = pd_result.index.astype("string[pyarrow]")
-    assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False)
-
-
-@pytest.mark.parametrize(
-    ("op", "bf_dtype"),
-    [
-        (lambda x: x.sum(numeric_only=True), "Float64"),
-        (lambda x: x.mean(numeric_only=True), "Float64"),
-        (lambda x: x.min(numeric_only=True), "Float64"),
-        (lambda x: x.max(numeric_only=True), "Float64"),
-        (lambda x: x.std(numeric_only=True), "Float64"),
-        (lambda x: x.var(numeric_only=True), "Float64"),
-        (lambda x: x.count(numeric_only=False), "Int64"),
-        (lambda x: x.nunique(), "Int64"),
-    ],
-    ids=["sum", "mean", "min", "max", "std", "var", "count", "nunique"],
-)
-def test_dataframe_aggregates(scalars_dfs_maybe_ordered, op, bf_dtype):
-    scalars_df_index, scalars_pandas_df_index = scalars_dfs_maybe_ordered
-    col_names = ["int64_too", "float64_col", "string_col", "int64_col", "bool_col"]
-    bf_series = op(scalars_df_index[col_names])
-    bf_result = bf_series
-    pd_result = op(scalars_pandas_df_index[col_names])
-
-    # Check dtype separately
-    assert bf_result.dtype == bf_dtype
-
-    # Pandas may produce narrower numeric types, but bigframes always produces Float64
-    # Pandas has object index type
-    pd_result.index = pd_result.index.astype("string[pyarrow]")
-    assert_series_equivalent(
-        pd_result,
-        bf_result,
-        check_dtype=False,
-        check_index_type=False,
-    )
-
-
-@pytest.mark.parametrize(
-    ("op"),
-    [
-        (lambda x: x.sum(axis=1, numeric_only=True)),
-        (lambda x: x.mean(axis=1, numeric_only=True)),
-        (lambda x: x.min(axis=1, numeric_only=True)),
-        (lambda x: x.max(axis=1, numeric_only=True)),
-        (lambda x: x.std(axis=1, numeric_only=True)),
-        (lambda x: x.var(axis=1, numeric_only=True)),
-    ],
-    ids=["sum", "mean", "min", "max", "std", "var"],
-)
-def test_dataframe_aggregates_axis_1(scalars_df_index, scalars_pandas_df_index, op):
-    col_names = ["int64_too", "int64_col", "float64_col", "bool_col", "string_col"]
-    bf_result = op(scalars_df_index[col_names]).to_pandas()
-    pd_result = op(scalars_pandas_df_index[col_names])
-
-    # Pandas may produce narrower numeric types, but bigframes always produces Float64
-    pd_result = pd_result.astype("Float64")
-    # Pandas has object index type
-    pd.testing.assert_series_equal(pd_result, bf_result, check_index_type=False)
-
-
-def test_dataframe_aggregates_median(scalars_df_index, scalars_pandas_df_index):
-    col_names = ["int64_too", "float64_col", "int64_col", "bool_col"]
-    bf_result = scalars_df_index[col_names].median(numeric_only=True).to_pandas()
-    pd_result = scalars_pandas_df_index[col_names].agg(["min", "max"])
-
-    # Pandas may produce narrower numeric types, but bigframes always produces Float64
-    pd_result = pd_result.astype("Float64")
-
-    # Median is an approximation, but double-check that median is plausible.
-    for col in col_names:
-        assert (pd_result.loc["min", col] <= bf_result[col]) and (
-            bf_result[col] <= pd_result.loc["max", col]
-        )
-
-
-def test_dataframe_aggregates_quantile_mono(scalars_df_index, scalars_pandas_df_index):
-    q = 0.45
-    col_names = ["int64_too", "int64_col", "float64_col"]
-    bf_result = scalars_df_index[col_names].quantile(q=q).to_pandas()
-    pd_result = scalars_pandas_df_index[col_names].quantile(q=q)
-
-    # Pandas may produce narrower numeric types, but bigframes always produces Float64
-    pd_result = pd_result.astype("Float64")
-
-    pd.testing.assert_series_equal(bf_result, pd_result, check_index_type=False)
-
-
-def test_dataframe_aggregates_quantile_multi(scalars_df_index, scalars_pandas_df_index):
-    q = [0, 0.33, 0.67, 1.0]
-    col_names = ["int64_too", "int64_col", "float64_col"]
-    bf_result = scalars_df_index[col_names].quantile(q=q).to_pandas()
-    pd_result = scalars_pandas_df_index[col_names].quantile(q=q)
-
-    # Pandas may produce narrower numeric types, but bigframes always produces Float64
-    pd_result = pd_result.astype("Float64")
-    pd_result.index = pd_result.index.astype("Float64")
-
-    pd.testing.assert_frame_equal(bf_result, pd_result)
-
-
-@pytest.mark.parametrize(
-    ("op"),
-    [
-        (lambda x: x.all(bool_only=True)),
-        (lambda x: x.any(bool_only=True)),
-        (lambda x: x.all(axis=1, bool_only=True)),
-        (lambda x: x.any(axis=1, bool_only=True)),
-    ],
-    ids=["all_axis0", "any_axis0", "all_axis1", "any_axis1"],
-)
-def test_dataframe_bool_aggregates(scalars_df_index, scalars_pandas_df_index, op):
-    # Pandas will drop nullable 'boolean' dtype so we convert first to bool, then cast back later
-    scalars_df_index = scalars_df_index.assign(
-        bool_col=scalars_df_index.bool_col.fillna(False)
-    )
-    scalars_pandas_df_index = scalars_pandas_df_index.assign(
-        bool_col=scalars_pandas_df_index.bool_col.fillna(False).astype("bool")
-    )
-    bf_series = op(scalars_df_index)
-    pd_series = op(scalars_pandas_df_index).astype("boolean")
-    bf_result = bf_series.to_pandas()
-
-    pd_series.index = pd_series.index.astype(bf_result.index.dtype)
-    pd.testing.assert_series_equal(pd_series, bf_result, check_index_type=False)
-
-
-def test_dataframe_prod(scalars_df_index, scalars_pandas_df_index):
-    col_names = ["int64_too", "float64_col"]
-    bf_series = scalars_df_index[col_names].prod()
-    pd_series = scalars_pandas_df_index[col_names].prod()
-    bf_result = bf_series.to_pandas()
-
-    # Pandas may produce narrower numeric types, but bigframes always produces Float64
-    pd_series = pd_series.astype("Float64")
-    # Pandas has object index type
-    pd.testing.assert_series_equal(pd_series, bf_result, check_index_type=False)
-
-
-def test_df_skew_too_few_values(scalars_dfs):
-    columns = ["float64_col", "int64_col"]
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = scalars_df[columns].head(2).skew().to_pandas()
-    pd_result = scalars_pandas_df[columns].head(2).skew()
-
-    # Pandas may produce narrower numeric types, but bigframes always produces Float64
-    pd_result = pd_result.astype("Float64")
-
-    pd.testing.assert_series_equal(pd_result, bf_result, check_index_type=False)
-
-
-@pytest.mark.parametrize(
-    ("ordered"),
-    [
-        (True),
-        (False),
-    ],
-)
-def test_df_skew(scalars_dfs, ordered):
-    columns = ["float64_col", "int64_col"]
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = scalars_df[columns].skew().to_pandas(ordered=ordered)
-    pd_result = scalars_pandas_df[columns].skew()
-
-    # Pandas may produce narrower numeric types, but bigframes always produces Float64
-    pd_result = pd_result.astype("Float64")
-
-    assert_series_equal(
-        pd_result, bf_result, check_index_type=False, ignore_order=not ordered
-    )
-
-
-def test_df_kurt_too_few_values(scalars_dfs):
-    columns = ["float64_col", "int64_col"]
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = scalars_df[columns].head(2).kurt().to_pandas()
-    pd_result = scalars_pandas_df[columns].head(2).kurt()
-
-    # Pandas may produce narrower numeric types, but bigframes always produces Float64
-    pd_result = pd_result.astype("Float64")
-
-    pd.testing.assert_series_equal(pd_result, bf_result, check_index_type=False)
-
-
-def test_df_kurt(scalars_dfs):
-    columns = ["float64_col", "int64_col"]
-    scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = scalars_df[columns].kurt().to_pandas()
-    pd_result = scalars_pandas_df[columns].kurt()
-
-    # Pandas may produce narrower numeric types, but bigframes always produces Float64
-    pd_result = pd_result.astype("Float64")
-
-    pd.testing.assert_series_equal(pd_result, bf_result, check_index_type=False)
-
-
-@pytest.mark.parametrize(
-    ("frac", "n", "random_state"),
-    [
-        (None, 4, None),
-        (0.5, None, None),
-        (None, 4, 10),
-        (0.5, None, 10),
-        (None, None, None),
-    ],
-    ids=[
-        "n_wo_random_state",
-        "frac_wo_random_state",
-        "n_w_random_state",
-        "frac_w_random_state",
-        "n_default",
-    ],
-)
-def test_sample(scalars_dfs, frac, n, random_state):
-    scalars_df, _ = scalars_dfs
-    df = scalars_df.sample(frac=frac, n=n, random_state=random_state)
-    bf_result = df.to_pandas()
-
-    n = 1 if n is None else n
-    expected_sample_size = round(frac * scalars_df.shape[0]) if frac is not None else n
-    assert bf_result.shape[0] == expected_sample_size
-    assert bf_result.shape[1] == scalars_df.shape[1]
-
-
-def test_sample_determinism(penguins_df_default_index):
-    df = penguins_df_default_index.sample(n=100, random_state=12345).head(15)
-    bf_result = df.to_pandas()
-    bf_result2 = df.to_pandas()
-
-    pandas.testing.assert_frame_equal(bf_result, bf_result2)
-
-
-def test_sample_raises_value_error(scalars_dfs):
-    scalars_df, _ = scalars_dfs
-    with pytest.raises(
-        ValueError, match="Only one of 'n' or 'frac' parameter can be specified."
-    ):
-        scalars_df.sample(frac=0.5, n=4)
-
-
-def test_sample_args_sort(scalars_dfs):
-    scalars_df, _ = scalars_dfs
-    index = [4, 3, 2, 5, 1, 0]
-    scalars_df = scalars_df.iloc[index]
-
-    kwargs = {"frac": 1.0, "random_state": 333}
-
-    df = scalars_df.sample(**kwargs).to_pandas()
-    assert df.index.values != index
-    assert df.index.values != sorted(index)
-
-    df = scalars_df.sample(sort="random", **kwargs).to_pandas()
-    assert df.index.values != index
-    assert df.index.values != sorted(index)
-
-    df = scalars_df.sample(sort=True, **kwargs).to_pandas()
-    assert df.index.values == sorted(index)
-
-    df = scalars_df.sample(sort=False, **kwargs).to_pandas()
-    assert df.index.values == index
-
-
-@pytest.mark.parametrize(
-    ("axis",),
-    [
-        (None,),
-        (0,),
-        (1,),
-    ],
-)
-def test_df_add_prefix(scalars_df_index, scalars_pandas_df_index, axis):
-    if pd.__version__.startswith("1."):
-        pytest.skip("add_prefix axis parameter not supported in pandas 1.x.")
-    bf_result = scalars_df_index.add_prefix("prefix_", axis).to_pandas()
-
-    pd_result = scalars_pandas_df_index.add_prefix("prefix_", axis)
-
-    pd.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-        check_index_type=False,
-    )
-
-
-@pytest.mark.parametrize(
-    ("axis",),
-    [
-        (0,),
-        (1,),
-    ],
-)
-def test_df_add_suffix(scalars_df_index, scalars_pandas_df_index, axis):
-    if pd.__version__.startswith("1."):
-        pytest.skip("add_prefix axis parameter not supported in pandas 1.x.")
-    bf_result = scalars_df_index.add_suffix("_suffix", axis).to_pandas()
-
-    pd_result = scalars_pandas_df_index.add_suffix("_suffix", axis)
-
-    pd.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-        check_index_type=False,
-    )
-
-
-def test_df_astype_error_error(session):
-    input = pd.DataFrame(["hello", "world", "3.11", "4000"])
-    with pytest.raises(ValueError):
-        session.read_pandas(input).astype("Float64", errors="bad_value")
-
-
-def test_df_columns_filter_items(scalars_df_index, scalars_pandas_df_index):
-    if pd.__version__.startswith("2.0") or pd.__version__.startswith("1."):
-        pytest.skip("pandas filter items behavior different pre-2.1")
-    bf_result = scalars_df_index.filter(items=["string_col", "int64_col"]).to_pandas()
-
-    pd_result = scalars_pandas_df_index.filter(items=["string_col", "int64_col"])
-    # Ignore column ordering as pandas order differently depending on version
-    pd.testing.assert_frame_equal(
-        bf_result.sort_index(axis=1),
-        pd_result.sort_index(axis=1),
-    )
-
-
-def test_df_columns_filter_like(scalars_df_index, scalars_pandas_df_index):
-    bf_result = scalars_df_index.filter(like="64_col").to_pandas()
-
-    pd_result = scalars_pandas_df_index.filter(like="64_col")
-
-    pd.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-def test_df_columns_filter_regex(scalars_df_index, scalars_pandas_df_index):
-    bf_result = scalars_df_index.filter(regex="^[^_]+$").to_pandas()
-
-    pd_result = scalars_pandas_df_index.filter(regex="^[^_]+$")
-
-    pd.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-def test_df_rows_filter_items(scalars_df_index, scalars_pandas_df_index):
-    if pd.__version__.startswith("2.0") or pd.__version__.startswith("1."):
-        pytest.skip("pandas filter items behavior different pre-2.1")
-    bf_result = scalars_df_index.filter(items=[5, 1, 3], axis=0).to_pandas()
-
-    pd_result = scalars_pandas_df_index.filter(items=[5, 1, 3], axis=0)
-
-    # Pandas uses int64 instead of Int64 (nullable) dtype.
-    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
-    # Ignore ordering as pandas order differently depending on version
-    assert_pandas_df_equal(
-        bf_result,
-        pd_result,
-        ignore_order=True,
-        check_names=False,
-    )
-
-
-def test_df_rows_filter_like(scalars_df_index, scalars_pandas_df_index):
-    scalars_df_index = scalars_df_index.copy().set_index("string_col")
-    scalars_pandas_df_index = scalars_pandas_df_index.copy().set_index("string_col")
-
-    bf_result = scalars_df_index.filter(like="ello", axis=0).to_pandas()
-
-    pd_result = scalars_pandas_df_index.filter(like="ello", axis=0)
-
-    pd.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-def test_df_rows_filter_regex(scalars_df_index, scalars_pandas_df_index):
-    scalars_df_index = scalars_df_index.copy().set_index("string_col")
-    scalars_pandas_df_index = scalars_pandas_df_index.copy().set_index("string_col")
-
-    bf_result = scalars_df_index.filter(regex="^[GH].*", axis=0).to_pandas()
-
-    pd_result = scalars_pandas_df_index.filter(regex="^[GH].*", axis=0)
-
-    pd.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-def test_df_reindex_rows_list(scalars_dfs_maybe_ordered):
-    scalars_df_index, scalars_pandas_df_index = scalars_dfs_maybe_ordered
-    bf_result = scalars_df_index.reindex(index=[5, 1, 3, 99, 1])
-
-    pd_result = scalars_pandas_df_index.reindex(index=[5, 1, 3, 99, 1])
-
-    # Pandas uses int64 instead of Int64 (nullable) dtype.
-    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
-    assert_dfs_equivalent(
-        pd_result,
-        bf_result,
-    )
-
-
-def test_df_reindex_rows_index(scalars_df_index, scalars_pandas_df_index):
-    bf_result = scalars_df_index.reindex(
-        index=pd.Index([5, 1, 3, 99, 1], name="newname")
-    ).to_pandas()
-
-    pd_result = scalars_pandas_df_index.reindex(
-        index=pd.Index([5, 1, 3, 99, 1], name="newname")
-    )
-
-    # Pandas uses int64 instead of Int64 (nullable) dtype.
-    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
-    pd.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-def test_df_reindex_nonunique(scalars_df_index):
-    with pytest.raises(ValueError):
-        # int64_too is non-unique
-        scalars_df_index.set_index("int64_too").reindex(
-            index=[5, 1, 3, 99, 1], validate=True
-        )
-
-
-def test_df_reindex_columns(scalars_df_index, scalars_pandas_df_index):
-    bf_result = scalars_df_index.reindex(
-        columns=["not_a_col", "int64_col", "int64_too"]
-    ).to_pandas()
-
-    pd_result = scalars_pandas_df_index.reindex(
-        columns=["not_a_col", "int64_col", "int64_too"]
-    )
-
-    # Pandas uses float64 as default for newly created empty column, bf uses Float64
-    pd_result.not_a_col = pd_result.not_a_col.astype(pandas.Float64Dtype())
-    pd.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-def test_df_reindex_columns_with_same_order(scalars_df_index, scalars_pandas_df_index):
-    # First, make sure the two dataframes have the same columns in order.
-    columns = ["int64_col", "int64_too"]
-    bf = scalars_df_index[columns]
-    pd_df = scalars_pandas_df_index[columns]
-
-    bf_result = bf.reindex(columns=columns).to_pandas()
-    pd_result = pd_df.reindex(columns=columns)
-
-    pd.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-def test_df_equals_identical(scalars_df_index, scalars_pandas_df_index):
-    unsupported = [
-        "geography_col",
-    ]
-    scalars_df_index = scalars_df_index.drop(columns=unsupported)
-    scalars_pandas_df_index = scalars_pandas_df_index.drop(columns=unsupported)
-
-    bf_result = scalars_df_index.equals(scalars_df_index)
-    pd_result = scalars_pandas_df_index.equals(scalars_pandas_df_index)
-
-    assert pd_result == bf_result
-
-
-def test_df_equals_series(scalars_df_index, scalars_pandas_df_index):
-    bf_result = scalars_df_index[["int64_col"]].equals(scalars_df_index["int64_col"])
-    pd_result = scalars_pandas_df_index[["int64_col"]].equals(
-        scalars_pandas_df_index["int64_col"]
-    )
-
-    assert pd_result == bf_result
-
-
-def test_df_equals_different_dtype(scalars_df_index, scalars_pandas_df_index):
-    columns = ["int64_col", "int64_too"]
-    scalars_df_index = scalars_df_index[columns]
-    scalars_pandas_df_index = scalars_pandas_df_index[columns]
-
-    bf_modified = scalars_df_index.copy()
-    bf_modified = bf_modified.astype("Float64")
-
-    pd_modified = scalars_pandas_df_index.copy()
-    pd_modified = pd_modified.astype("Float64")
-
-    bf_result = scalars_df_index.equals(bf_modified)
-    pd_result = scalars_pandas_df_index.equals(pd_modified)
-
-    assert pd_result == bf_result
-
-
-def test_df_equals_different_values(scalars_df_index, scalars_pandas_df_index):
-    columns = ["int64_col", "int64_too"]
-    scalars_df_index = scalars_df_index[columns]
-    scalars_pandas_df_index = scalars_pandas_df_index[columns]
-
-    bf_modified = scalars_df_index.copy()
-    bf_modified["int64_col"] = bf_modified.int64_col + 1
-
-    pd_modified = scalars_pandas_df_index.copy()
-    pd_modified["int64_col"] = pd_modified.int64_col + 1
-
-    bf_result = scalars_df_index.equals(bf_modified)
-    pd_result = scalars_pandas_df_index.equals(pd_modified)
-
-    assert pd_result == bf_result
-
-
-def test_df_equals_extra_column(scalars_df_index, scalars_pandas_df_index):
-    columns = ["int64_col", "int64_too"]
-    more_columns = ["int64_col", "int64_too", "float64_col"]
-
-    bf_result = scalars_df_index[columns].equals(scalars_df_index[more_columns])
-    pd_result = scalars_pandas_df_index[columns].equals(
-        scalars_pandas_df_index[more_columns]
-    )
-
-    assert pd_result == bf_result
-
-
-def test_df_reindex_like(scalars_df_index, scalars_pandas_df_index):
-    reindex_target_bf = scalars_df_index.reindex(
-        columns=["not_a_col", "int64_col", "int64_too"], index=[5, 1, 3, 99, 1]
-    )
-    bf_result = scalars_df_index.reindex_like(reindex_target_bf).to_pandas()
-
-    reindex_target_pd = scalars_pandas_df_index.reindex(
-        columns=["not_a_col", "int64_col", "int64_too"], index=[5, 1, 3, 99, 1]
-    )
-    pd_result = scalars_pandas_df_index.reindex_like(reindex_target_pd)
-
-    # Pandas uses float64 as default for newly created empty column, bf uses Float64
-    # Pandas uses int64 instead of Int64 (nullable) dtype.
-    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
-    # Pandas uses float64 as default for newly created empty column, bf uses Float64
-    pd_result.not_a_col = pd_result.not_a_col.astype(pandas.Float64Dtype())
-    pd.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-def test_df_values(scalars_df_index, scalars_pandas_df_index):
-    bf_result = scalars_df_index.values
-
-    pd_result = scalars_pandas_df_index.values
-    # Numpy isn't equipped to compare non-numeric objects, so convert back to dataframe
-    pd.testing.assert_frame_equal(
-        pd.DataFrame(bf_result), pd.DataFrame(pd_result), check_dtype=False
-    )
-
-
-def test_df_to_numpy(scalars_df_index, scalars_pandas_df_index):
-    bf_result = scalars_df_index.to_numpy()
-
-    pd_result = scalars_pandas_df_index.to_numpy()
-    # Numpy isn't equipped to compare non-numeric objects, so convert back to dataframe
-    pd.testing.assert_frame_equal(
-        pd.DataFrame(bf_result), pd.DataFrame(pd_result), check_dtype=False
-    )
-
-
-def test_df___array__(scalars_df_index, scalars_pandas_df_index):
-    bf_result = scalars_df_index.__array__()
-
-    pd_result = scalars_pandas_df_index.__array__()
-    # Numpy isn't equipped to compare non-numeric objects, so convert back to dataframe
-    pd.testing.assert_frame_equal(
-        pd.DataFrame(bf_result), pd.DataFrame(pd_result), check_dtype=False
-    )
-
-
-@pytest.mark.parametrize(
-    ("key",),
-    [
-        ("hello",),
-        (2,),
-        ("int64_col",),
-        (None,),
-    ],
-)
-def test_df_contains(scalars_df_index, scalars_pandas_df_index, key):
-    bf_result = key in scalars_df_index
-    pd_result = key in scalars_pandas_df_index
-
-    assert bf_result == pd_result
-
-
-def test_df_getattr_attribute_error_when_pandas_has(scalars_df_index):
-    # swapaxes is implemented in pandas but not in bigframes
-    with pytest.raises(AttributeError):
-        scalars_df_index.swapaxes()
-
-
-def test_df_getattr_attribute_error(scalars_df_index):
-    with pytest.raises(AttributeError):
-        scalars_df_index.not_a_method()
-
-
-def test_df_getattr_axes():
-    df = dataframe.DataFrame(
-        [[1, 1, 1], [1, 1, 1]], columns=["index", "columns", "my_column"]
-    )
-    assert isinstance(df.index, bigframes.core.indexes.Index)
-    assert isinstance(df.columns, pandas.Index)
-    assert isinstance(df.my_column, series.Series)
-
-
-def test_df_setattr_index():
-    pd_df = pandas.DataFrame(
-        [[1, 1, 1], [1, 1, 1]], columns=["index", "columns", "my_column"]
-    )
-    bf_df = dataframe.DataFrame(pd_df)
-
-    pd_df.index = pandas.Index([4, 5])
-    bf_df.index = [4, 5]
-
-    assert_pandas_df_equal(
-        pd_df, bf_df.to_pandas(), check_index_type=False, check_dtype=False
-    )
-
-
-def test_df_setattr_columns():
-    pd_df = pandas.DataFrame(
-        [[1, 1, 1], [1, 1, 1]], columns=["index", "columns", "my_column"]
-    )
-    bf_df = dataframe.DataFrame(pd_df)
-
-    pd_df.columns = typing.cast(pandas.Index, pandas.Index([4, 5, 6]))
-
-    bf_df.columns = pandas.Index([4, 5, 6])
-
-    assert_pandas_df_equal(
-        pd_df, bf_df.to_pandas(), check_index_type=False, check_dtype=False
-    )
-
-
-def test_df_setattr_modify_column():
-    pd_df = pandas.DataFrame(
-        [[1, 1, 1], [1, 1, 1]], columns=["index", "columns", "my_column"]
-    )
-    bf_df = dataframe.DataFrame(pd_df)
-    pd_df.my_column = [4, 5]
-    bf_df.my_column = [4, 5]
-
-    assert_pandas_df_equal(
-        pd_df, bf_df.to_pandas(), check_index_type=False, check_dtype=False
-    )
-
-
-def test_loc_list_string_index(scalars_df_index, scalars_pandas_df_index):
-    index_list = scalars_pandas_df_index.string_col.iloc[[0, 1, 1, 5]].values
-
-    scalars_df_index = scalars_df_index.set_index("string_col")
-    scalars_pandas_df_index = scalars_pandas_df_index.set_index("string_col")
-
-    bf_result = scalars_df_index.loc[index_list].to_pandas()
-    pd_result = scalars_pandas_df_index.loc[index_list]
-
-    pd.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-def test_loc_list_integer_index(scalars_df_index, scalars_pandas_df_index):
-    index_list = [3, 2, 1, 3, 2, 1]
-
-    bf_result = scalars_df_index.loc[index_list]
-    pd_result = scalars_pandas_df_index.loc[index_list]
-
-    pd.testing.assert_frame_equal(
-        bf_result.to_pandas(),
-        pd_result,
-    )
-
-
-def test_loc_list_multiindex(scalars_dfs_maybe_ordered):
-    scalars_df_index, scalars_pandas_df_index = scalars_dfs_maybe_ordered
-    scalars_df_multiindex = scalars_df_index.set_index(["string_col", "int64_col"])
-    scalars_pandas_df_multiindex = scalars_pandas_df_index.set_index(
-        ["string_col", "int64_col"]
-    )
-    index_list = [("Hello, World!", -234892), ("Hello, World!", 123456789)]
-
-    bf_result = scalars_df_multiindex.loc[index_list]
-    pd_result = scalars_pandas_df_multiindex.loc[index_list]
-
-    assert_dfs_equivalent(
-        pd_result,
-        bf_result,
-    )
-
-
-@pytest.mark.parametrize(
-    "index_list",
-    [
-        [0, 1, 2, 3, 4, 4],
-        [0, 0, 0, 5, 4, 7, -2, -5, 3],
-        [-1, -2, -3, -4, -5, -5],
-    ],
-)
-def test_iloc_list(scalars_df_index, scalars_pandas_df_index, index_list):
-    bf_result = scalars_df_index.iloc[index_list]
-    pd_result = scalars_pandas_df_index.iloc[index_list]
-
-    pd.testing.assert_frame_equal(
-        bf_result.to_pandas(),
-        pd_result,
-    )
-
-
-@pytest.mark.parametrize(
-    "index_list",
-    [
-        [0, 1, 2, 3, 4, 4],
-        [0, 0, 0, 5, 4, 7, -2, -5, 3],
-        [-1, -2, -3, -4, -5, -5],
-    ],
-)
-def test_iloc_list_partial_ordering(
-    scalars_df_partial_ordering, scalars_pandas_df_index, index_list
-):
-    bf_result = scalars_df_partial_ordering.iloc[index_list]
-    pd_result = scalars_pandas_df_index.iloc[index_list]
-
-    pd.testing.assert_frame_equal(
-        bf_result.to_pandas(),
-        pd_result,
-    )
-
-
-def test_iloc_list_multiindex(scalars_dfs):
-    scalars_df, scalars_pandas_df = scalars_dfs
-    scalars_df = scalars_df.copy()
-    scalars_pandas_df = scalars_pandas_df.copy()
-    scalars_df = scalars_df.set_index(["bytes_col", "numeric_col"])
-    scalars_pandas_df = scalars_pandas_df.set_index(["bytes_col", "numeric_col"])
-
-    index_list = [0, 0, 0, 5, 4, 7]
-
-    bf_result = scalars_df.iloc[index_list]
-    pd_result = scalars_pandas_df.iloc[index_list]
-
-    pd.testing.assert_frame_equal(
-        bf_result.to_pandas(),
-        pd_result,
-    )
-
-
-def test_iloc_empty_list(scalars_df_index, scalars_pandas_df_index):
-
-    index_list: List[int] = []
-
-    bf_result = scalars_df_index.iloc[index_list]
-    pd_result = scalars_pandas_df_index.iloc[index_list]
-
-    bf_result = bf_result.to_pandas()
-    assert bf_result.shape == pd_result.shape  # types are known to be different
-
-
-def test_rename_axis(scalars_df_index, scalars_pandas_df_index):
-    bf_result = scalars_df_index.rename_axis("newindexname")
-    pd_result = scalars_pandas_df_index.rename_axis("newindexname")
-
-    pd.testing.assert_frame_equal(
-        bf_result.to_pandas(),
-        pd_result,
-    )
-
-
-def test_rename_axis_nonstring(scalars_df_index, scalars_pandas_df_index):
-    bf_result = scalars_df_index.rename_axis((4,))
-    pd_result = scalars_pandas_df_index.rename_axis((4,))
-
-    pd.testing.assert_frame_equal(
-        bf_result.to_pandas(),
-        pd_result,
-    )
-
-
-def test_loc_bf_series_string_index(scalars_df_index, scalars_pandas_df_index):
-    pd_string_series = scalars_pandas_df_index.string_col.iloc[[0, 5, 1, 1, 5]]
-    bf_string_series = scalars_df_index.string_col.iloc[[0, 5, 1, 1, 5]]
-
-    scalars_df_index = scalars_df_index.set_index("string_col")
-    scalars_pandas_df_index = scalars_pandas_df_index.set_index("string_col")
-
-    bf_result = scalars_df_index.loc[bf_string_series]
-    pd_result = scalars_pandas_df_index.loc[pd_string_series]
-
-    pd.testing.assert_frame_equal(
-        bf_result.to_pandas(),
-        pd_result,
-    )
-
-
-def test_loc_bf_series_multiindex(scalars_df_index, scalars_pandas_df_index):
-    pd_string_series = scalars_pandas_df_index.string_col.iloc[[0, 5, 1, 1, 5]]
-    bf_string_series = scalars_df_index.string_col.iloc[[0, 5, 1, 1, 5]]
-
-    scalars_df_multiindex = scalars_df_index.set_index(["string_col", "int64_col"])
-    scalars_pandas_df_multiindex = scalars_pandas_df_index.set_index(
-        ["string_col", "int64_col"]
-    )
-
-    bf_result = scalars_df_multiindex.loc[bf_string_series]
-    pd_result = scalars_pandas_df_multiindex.loc[pd_string_series]
-
-    pd.testing.assert_frame_equal(
-        bf_result.to_pandas(),
-        pd_result,
-    )
-
-
-def test_loc_bf_index_integer_index(scalars_df_index, scalars_pandas_df_index):
-    pd_index = scalars_pandas_df_index.iloc[[0, 5, 1, 1, 5]].index
-    bf_index = scalars_df_index.iloc[[0, 5, 1, 1, 5]].index
-
-    bf_result = scalars_df_index.loc[bf_index]
-    pd_result = scalars_pandas_df_index.loc[pd_index]
-
-    pd.testing.assert_frame_equal(
-        bf_result.to_pandas(),
-        pd_result,
-    )
-
-
-def test_loc_bf_index_integer_index_renamed_col(
-    scalars_df_index, scalars_pandas_df_index
-):
-    scalars_df_index = scalars_df_index.rename(columns={"int64_col": "rename"})
-    scalars_pandas_df_index = scalars_pandas_df_index.rename(
-        columns={"int64_col": "rename"}
-    )
-
-    pd_index = scalars_pandas_df_index.iloc[[0, 5, 1, 1, 5]].index
-    bf_index = scalars_df_index.iloc[[0, 5, 1, 1, 5]].index
-
-    bf_result = scalars_df_index.loc[bf_index]
-    pd_result = scalars_pandas_df_index.loc[pd_index]
-
-    pd.testing.assert_frame_equal(
-        bf_result.to_pandas(),
-        pd_result,
-    )
-
-
-@pytest.mark.parametrize(
-    ("subset"),
-    [
-        None,
-        "bool_col",
-        ["bool_col", "int64_too"],
-    ],
-)
-@pytest.mark.parametrize(
-    ("keep",),
-    [
-        ("first",),
-        ("last",),
-        (False,),
-    ],
-)
-def test_df_drop_duplicates(scalars_df_index, scalars_pandas_df_index, keep, subset):
-    columns = ["bool_col", "int64_too", "int64_col"]
-    bf_df = scalars_df_index[columns].drop_duplicates(subset, keep=keep).to_pandas()
-    pd_df = scalars_pandas_df_index[columns].drop_duplicates(subset, keep=keep)
-    pd.testing.assert_frame_equal(
-        pd_df,
-        bf_df,
-    )
-
-
-@pytest.mark.parametrize(
-    ("keep",),
-    [
-        ("first",),
-        ("last",),
-        (False,),
-    ],
-)
-def test_df_drop_duplicates_w_json(json_df, keep):
-    bf_df = json_df.drop_duplicates(keep=keep).to_pandas()
-
-    # drop_duplicates relies on pa.compute.dictionary_encode, which is incompatible
-    # with Arrow string extension types. Temporary conversion to standard Pandas
-    # strings is required.
-    json_pandas_df = json_df.to_pandas()
-    json_pandas_df["json_col"] = json_pandas_df["json_col"].astype(
-        pd.StringDtype(storage="pyarrow")
-    )
-
-    pd_df = json_pandas_df.drop_duplicates(keep=keep)
-    pd_df["json_col"] = pd_df["json_col"].astype(dtypes.JSON_DTYPE)
-    pd.testing.assert_frame_equal(
-        pd_df,
-        bf_df,
-    )
-
-
-@pytest.mark.parametrize(
-    ("subset"),
-    [
-        None,
-        ["bool_col"],
-    ],
-)
-@pytest.mark.parametrize(
-    ("keep",),
-    [
-        ("first",),
-        ("last",),
-        (False,),
-    ],
-)
-def test_df_duplicated(scalars_df_index, scalars_pandas_df_index, keep, subset):
-    columns = ["bool_col", "int64_too", "int64_col"]
-    bf_series = scalars_df_index[columns].duplicated(subset, keep=keep).to_pandas()
-    pd_series = scalars_pandas_df_index[columns].duplicated(subset, keep=keep)
-    pd.testing.assert_series_equal(pd_series, bf_series, check_dtype=False)
-
-
-def test_df_from_dict_columns_orient():
-    data = {"a": [1, 2], "b": [3.3, 2.4]}
-    bf_result = dataframe.DataFrame.from_dict(data, orient="columns").to_pandas()
-    pd_result = pd.DataFrame.from_dict(data, orient="columns")
-    assert_pandas_df_equal(
-        pd_result, bf_result, check_dtype=False, check_index_type=False
-    )
-
-
-def test_df_from_dict_index_orient():
-    data = {"a": [1, 2], "b": [3.3, 2.4]}
-    bf_result = dataframe.DataFrame.from_dict(
-        data, orient="index", columns=["col1", "col2"]
-    ).to_pandas()
-    pd_result = pd.DataFrame.from_dict(data, orient="index", columns=["col1", "col2"])
-    assert_pandas_df_equal(
-        pd_result, bf_result, check_dtype=False, check_index_type=False
-    )
-
-
-def test_df_from_dict_tight_orient():
-    data = {
-        "index": [("i1", "i2"), ("i3", "i4")],
-        "columns": ["col1", "col2"],
-        "data": [[1, 2.6], [3, 4.5]],
-        "index_names": ["in1", "in2"],
-        "column_names": ["column_axis"],
-    }
-
-    bf_result = dataframe.DataFrame.from_dict(data, orient="tight").to_pandas()
-    pd_result = pd.DataFrame.from_dict(data, orient="tight")
-    assert_pandas_df_equal(
-        pd_result, bf_result, check_dtype=False, check_index_type=False
-    )
-
-
-def test_df_from_records():
-    records = ((1, "a"), (2.5, "b"), (3.3, "c"), (4.9, "d"))
-
-    bf_result = dataframe.DataFrame.from_records(
-        records, columns=["c1", "c2"]
-    ).to_pandas()
-    pd_result = pd.DataFrame.from_records(records, columns=["c1", "c2"])
-    assert_pandas_df_equal(
-        pd_result, bf_result, check_dtype=False, check_index_type=False
-    )
-
-
-def test_df_to_dict(scalars_df_index, scalars_pandas_df_index):
-    unsupported = ["numeric_col"]  # formatted differently
-    bf_result = scalars_df_index.drop(columns=unsupported).to_dict()
-    pd_result = scalars_pandas_df_index.drop(columns=unsupported).to_dict()
-
-    assert bf_result == pd_result
-
-
-def test_df_to_excel(scalars_df_index, scalars_pandas_df_index):
-    unsupported = ["timestamp_col"]
-    with tempfile.TemporaryFile() as bf_result_file, tempfile.TemporaryFile() as pd_result_file:
-        scalars_df_index.drop(columns=unsupported).to_excel(bf_result_file)
-        scalars_pandas_df_index.drop(columns=unsupported).to_excel(pd_result_file)
-        bf_result = bf_result_file.read()
-        pd_result = bf_result_file.read()
-
-    assert bf_result == pd_result
-
-
-def test_df_to_latex(scalars_df_index, scalars_pandas_df_index):
-    unsupported = ["numeric_col"]  # formatted differently
-    bf_result = scalars_df_index.drop(columns=unsupported).to_latex()
-    pd_result = scalars_pandas_df_index.drop(columns=unsupported).to_latex()
-
-    assert bf_result == pd_result
-
-
-def test_df_to_json_local_str(scalars_df_index, scalars_pandas_df_index):
-    bf_result = scalars_df_index.to_json()
-    # default_handler for arrow types that have no default conversion
-    pd_result = scalars_pandas_df_index.to_json(default_handler=str)
-
-    assert bf_result == pd_result
-
-
-def test_df_to_json_local_file(scalars_df_index, scalars_pandas_df_index):
-    # TODO: supply a reason why this isn't compatible with pandas 1.x
-    pytest.importorskip("pandas", minversion="2.0.0")
-    # duration not fully supported at pandas level
-    scalars_df_index = scalars_df_index.drop(columns="duration_col")
-    scalars_pandas_df_index = scalars_pandas_df_index.drop(columns="duration_col")
-    with tempfile.TemporaryFile() as bf_result_file, tempfile.TemporaryFile() as pd_result_file:
-        scalars_df_index.to_json(bf_result_file, orient="table")
-        # default_handler for arrow types that have no default conversion
-        scalars_pandas_df_index.to_json(
-            pd_result_file, orient="table", default_handler=str
-        )
-
-        bf_result = bf_result_file.read()
-        pd_result = pd_result_file.read()
-
-    assert bf_result == pd_result
-
-
-def test_df_to_csv_local_str(scalars_df_index, scalars_pandas_df_index):
-    bf_result = scalars_df_index.to_csv()
-    # default_handler for arrow types that have no default conversion
-    pd_result = scalars_pandas_df_index.to_csv()
-
-    assert bf_result == pd_result
-
-
-def test_df_to_csv_local_file(scalars_df_index, scalars_pandas_df_index):
-    with tempfile.TemporaryFile() as bf_result_file, tempfile.TemporaryFile() as pd_result_file:
-        scalars_df_index.to_csv(bf_result_file)
-        scalars_pandas_df_index.to_csv(pd_result_file)
-
-        bf_result = bf_result_file.read()
-        pd_result = pd_result_file.read()
-
-    assert bf_result == pd_result
-
-
-def test_df_to_parquet_local_bytes(scalars_df_index, scalars_pandas_df_index):
-    # GEOGRAPHY not supported in parquet export.
-    unsupported = ["geography_col"]
-
-    bf_result = scalars_df_index.drop(columns=unsupported).to_parquet()
-    # default_handler for arrow types that have no default conversion
-    pd_result = scalars_pandas_df_index.drop(columns=unsupported).to_parquet()
-
-    assert bf_result == pd_result
-
-
-def test_df_to_parquet_local_file(scalars_df_index, scalars_pandas_df_index):
-    # GEOGRAPHY not supported in parquet export.
-    unsupported = ["geography_col"]
-    with tempfile.TemporaryFile() as bf_result_file, tempfile.TemporaryFile() as pd_result_file:
-        scalars_df_index.drop(columns=unsupported).to_parquet(bf_result_file)
-        scalars_pandas_df_index.drop(columns=unsupported).to_parquet(pd_result_file)
-
-        bf_result = bf_result_file.read()
-        pd_result = pd_result_file.read()
-
-    assert bf_result == pd_result
-
-
-def test_df_to_records(scalars_df_index, scalars_pandas_df_index):
-    unsupported = ["numeric_col"]
-    bf_result = scalars_df_index.drop(columns=unsupported).to_records()
-    pd_result = scalars_pandas_df_index.drop(columns=unsupported).to_records()
-
-    for bfi, pdi in zip(bf_result, pd_result):
-        for bfj, pdj in zip(bfi, pdi):
-            assert pd.isna(bfj) and pd.isna(pdj) or bfj == pdj
-
-
-def test_df_to_string(scalars_df_index, scalars_pandas_df_index):
-    unsupported = ["numeric_col"]  # formatted differently
-
-    bf_result = scalars_df_index.drop(columns=unsupported).to_string()
-    pd_result = scalars_pandas_df_index.drop(columns=unsupported).to_string()
-
-    assert bf_result == pd_result
-
-
-def test_df_to_html(scalars_df_index, scalars_pandas_df_index):
-    unsupported = ["numeric_col"]  # formatted differently
-
-    bf_result = scalars_df_index.drop(columns=unsupported).to_html()
-    pd_result = scalars_pandas_df_index.drop(columns=unsupported).to_html()
-
-    assert bf_result == pd_result
-
-
-def test_df_to_markdown(scalars_df_index, scalars_pandas_df_index):
-    # Nulls have bug from tabulate https://github.com/astanin/python-tabulate/issues/231
-    bf_result = scalars_df_index.dropna().to_markdown()
-    pd_result = scalars_pandas_df_index.dropna().to_markdown()
-
-    assert bf_result == pd_result
-
-
-def test_df_to_pickle(scalars_df_index, scalars_pandas_df_index):
-    with tempfile.TemporaryFile() as bf_result_file, tempfile.TemporaryFile() as pd_result_file:
-        scalars_df_index.to_pickle(bf_result_file)
-        scalars_pandas_df_index.to_pickle(pd_result_file)
-        bf_result = bf_result_file.read()
-        pd_result = pd_result_file.read()
-
-    assert bf_result == pd_result
-
-
-def test_df_to_orc(scalars_df_index, scalars_pandas_df_index):
-    unsupported = [
-        "numeric_col",
-        "bytes_col",
-        "date_col",
-        "datetime_col",
-        "time_col",
-        "timestamp_col",
-        "geography_col",
-        "duration_col",
-    ]
-
-    bf_result_file = tempfile.TemporaryFile()
-    pd_result_file = tempfile.TemporaryFile()
-    scalars_df_index.drop(columns=unsupported).to_orc(bf_result_file)
-    scalars_pandas_df_index.drop(columns=unsupported).reset_index().to_orc(
-        pd_result_file
-    )
-    bf_result = bf_result_file.read()
-    pd_result = bf_result_file.read()
-
-    assert bf_result == pd_result
-
-
-@pytest.mark.parametrize(
-    ("expr",),
-    [
-        ("new_col = int64_col + int64_too",),
-        ("new_col = (rowindex > 3) | bool_col",),
-        ("int64_too = bool_col\nnew_col2 = rowindex",),
-    ],
-)
-def test_df_eval(scalars_dfs, expr):
-    # TODO: supply a reason why this isn't compatible with pandas 1.x
-    pytest.importorskip("pandas", minversion="2.0.0")
-    scalars_df, scalars_pandas_df = scalars_dfs
-
-    bf_result = scalars_df.eval(expr).to_pandas()
-    pd_result = scalars_pandas_df.eval(expr)
-
-    pd.testing.assert_frame_equal(bf_result, pd_result)
-
-
-@pytest.mark.parametrize(
-    ("expr",),
-    [
-        ("int64_col > int64_too",),
-        ("bool_col",),
-        ("((int64_col - int64_too) % @local_var) == 0",),
-    ],
-)
-def test_df_query(scalars_dfs, expr):
-    # TODO: supply a reason why this isn't compatible with pandas 1.x
-    pytest.importorskip("pandas", minversion="2.0.0")
-    # local_var is referenced in expressions
-    local_var = 3  # NOQA
-    scalars_df, scalars_pandas_df = scalars_dfs
-
-    bf_result = scalars_df.query(expr).to_pandas()
-    pd_result = scalars_pandas_df.query(expr)
-
-    pd.testing.assert_frame_equal(bf_result, pd_result)
-
-
-@pytest.mark.parametrize(
-    ("subset", "normalize", "ascending", "dropna"),
-    [
-        (None, False, False, False),
-        (None, True, True, True),
-        ("bool_col", True, False, True),
-    ],
-)
-def test_df_value_counts(scalars_dfs, subset, normalize, ascending, dropna):
-    if pd.__version__.startswith("1."):
-        pytest.skip("pandas 1.x produces different column labels.")
-    scalars_df, scalars_pandas_df = scalars_dfs
-
-    bf_result = (
-        scalars_df[["string_col", "bool_col"]]
-        .value_counts(subset, normalize=normalize, ascending=ascending, dropna=dropna)
-        .to_pandas()
-    )
-    pd_result = scalars_pandas_df[["string_col", "bool_col"]].value_counts(
-        subset, normalize=normalize, ascending=ascending, dropna=dropna
-    )
-
-    pd.testing.assert_series_equal(
-        bf_result, pd_result, check_dtype=False, check_index_type=False
-    )
-
-
-@pytest.mark.parametrize(
-    ("na_option", "method", "ascending", "numeric_only", "pct"),
-    [
-        ("keep", "average", True, True, True),
-        ("top", "min", False, False, False),
-        ("bottom", "max", False, False, True),
-        ("top", "first", False, False, False),
-        ("bottom", "dense", False, False, True),
-    ],
-)
-def test_df_rank_with_nulls(
-    scalars_df_index,
-    scalars_pandas_df_index,
-    na_option,
-    method,
-    ascending,
-    numeric_only,
-    pct,
-):
-    unsupported_columns = ["geography_col"]
-    bf_result = (
-        scalars_df_index.drop(columns=unsupported_columns)
-        .rank(
-            na_option=na_option,
-            method=method,
-            ascending=ascending,
-            numeric_only=numeric_only,
-            pct=pct,
-        )
-        .to_pandas()
-    )
-    pd_result = (
-        scalars_pandas_df_index.drop(columns=unsupported_columns)
-        .rank(
-            na_option=na_option,
-            method=method,
-            ascending=ascending,
-            numeric_only=numeric_only,
-            pct=pct,
-        )
-        .astype(pd.Float64Dtype())
-    )
-
-    pd.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-def test_df_bool_interpretation_error(scalars_df_index):
-    with pytest.raises(ValueError):
-        True if scalars_df_index else False
-
-
-def test_query_job_setters(scalars_df_default_index: dataframe.DataFrame):
-    # if allow_large_results=False, might not create query job
-    with bigframes.option_context("compute.allow_large_results", True):
-        job_ids = set()
-        repr(scalars_df_default_index)
-        assert scalars_df_default_index.query_job is not None
-        job_ids.add(scalars_df_default_index.query_job.job_id)
-        scalars_df_default_index.to_pandas(allow_large_results=True)
-        job_ids.add(scalars_df_default_index.query_job.job_id)
-
-        assert len(job_ids) == 2
-
-
-def test_df_cached(scalars_df_index):
-    df = scalars_df_index.set_index(["int64_too", "int64_col"]).sort_values(
-        "string_col"
-    )
-    df = df[df["rowindex_2"] % 2 == 0]
-
-    df_cached_copy = df.cache()
-    pandas.testing.assert_frame_equal(df.to_pandas(), df_cached_copy.to_pandas())
-
-
-def test_df_cached_many_index_cols(scalars_df_index):
-    index_cols = [
-        "int64_too",
-        "time_col",
-        "int64_col",
-        "bool_col",
-        "date_col",
-        "timestamp_col",
-        "string_col",
-    ]
-    df = scalars_df_index.set_index(index_cols)
-    df = df[df["rowindex_2"] % 2 == 0]
-
-    df_cached_copy = df.cache()
-    pandas.testing.assert_frame_equal(df.to_pandas(), df_cached_copy.to_pandas())
-
-
-def test_assign_after_binop_row_joins():
-    pd_df = pd.DataFrame(
-        {
-            "idx1": [1, 1, 1, 1, 2, 2, 2, 2],
-            "idx2": [10, 10, 20, 20, 10, 10, 20, 20],
-            "metric1": [10, 14, 2, 13, 6, 2, 9, 5],
-            "metric2": [25, -3, 8, 2, -1, 0, 0, -4],
-        },
-        dtype=pd.Int64Dtype(),
-    ).set_index(["idx1", "idx2"])
-    bf_df = dataframe.DataFrame(pd_df)
-
-    # Expect implicit joiner to be used, preserving input cardinality rather than getting relational join
-    bf_df["metric_diff"] = bf_df.metric1 - bf_df.metric2
-    pd_df["metric_diff"] = pd_df.metric1 - pd_df.metric2
-
-    assert_pandas_df_equal(bf_df.to_pandas(), pd_df)
-
-
-def test_df_cache_with_implicit_join(scalars_df_index):
-    """expectation is that cache will be used, but no explicit join will be performed"""
-    df = scalars_df_index[["int64_col", "int64_too"]].sort_index().reset_index() + 3
-    df.cache()
-    bf_result = df + (df * 2)
-    sql = bf_result.sql
-
-    # Very crude asserts, want sql to not use join and not use base table, only reference cached table
-    assert "JOIN" not in sql
-    assert "bigframes_testing" not in sql
-
-
-def test_df_dot_inline(session):
-    df1 = pd.DataFrame([[1, 2, 3], [2, 5, 7]])
-    df2 = pd.DataFrame([[2, 4, 8], [1, 5, 10], [3, 6, 9]])
-
-    bf1 = session.read_pandas(df1)
-    bf2 = session.read_pandas(df2)
-    bf_result = bf1.dot(bf2).to_pandas()
-    pd_result = df1.dot(df2)
-
-    # Patch pandas dtypes for testing parity
-    # Pandas uses int64 instead of Int64 (nullable) dtype.
-    for name in pd_result.columns:
-        pd_result[name] = pd_result[name].astype(pd.Int64Dtype())
-    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
-
-    pd.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-def test_df_dot(
-    matrix_2by3_df, matrix_2by3_pandas_df, matrix_3by4_df, matrix_3by4_pandas_df
-):
-    bf_result = matrix_2by3_df.dot(matrix_3by4_df).to_pandas()
-    pd_result = matrix_2by3_pandas_df.dot(matrix_3by4_pandas_df)
-
-    # Patch pandas dtypes for testing parity
-    # Pandas result is object instead of Int64 (nullable) dtype.
-    for name in pd_result.columns:
-        pd_result[name] = pd_result[name].astype(pd.Int64Dtype())
-
-    pd.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-def test_df_dot_operator(
-    matrix_2by3_df, matrix_2by3_pandas_df, matrix_3by4_df, matrix_3by4_pandas_df
-):
-    bf_result = (matrix_2by3_df @ matrix_3by4_df).to_pandas()
-    pd_result = matrix_2by3_pandas_df @ matrix_3by4_pandas_df
-
-    # Patch pandas dtypes for testing parity
-    # Pandas result is object instead of Int64 (nullable) dtype.
-    for name in pd_result.columns:
-        pd_result[name] = pd_result[name].astype(pd.Int64Dtype())
-
-    pd.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-def test_df_dot_series_inline():
-    left = [[1, 2, 3], [2, 5, 7]]
-    right = [2, 1, 3]
-
-    bf1 = dataframe.DataFrame(left)
-    bf2 = series.Series(right)
-    bf_result = bf1.dot(bf2).to_pandas()
-
-    df1 = pd.DataFrame(left)
-    df2 = pd.Series(right)
-    pd_result = df1.dot(df2)
-
-    # Patch pandas dtypes for testing parity
-    # Pandas result is int64 instead of Int64 (nullable) dtype.
-    pd_result = pd_result.astype(pd.Int64Dtype())
-    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
-
-    pd.testing.assert_series_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-def test_df_dot_series(
-    matrix_2by3_df, matrix_2by3_pandas_df, matrix_3by4_df, matrix_3by4_pandas_df
-):
-    bf_result = matrix_2by3_df.dot(matrix_3by4_df["x"]).to_pandas()
-    pd_result = matrix_2by3_pandas_df.dot(matrix_3by4_pandas_df["x"])
-
-    # Patch pandas dtypes for testing parity
-    # Pandas result is object instead of Int64 (nullable) dtype.
-    pd_result = pd_result.astype(pd.Int64Dtype())
-
-    pd.testing.assert_series_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-def test_df_dot_operator_series(
-    matrix_2by3_df, matrix_2by3_pandas_df, matrix_3by4_df, matrix_3by4_pandas_df
-):
-    bf_result = (matrix_2by3_df @ matrix_3by4_df["x"]).to_pandas()
-    pd_result = matrix_2by3_pandas_df @ matrix_3by4_pandas_df["x"]
-
-    # Patch pandas dtypes for testing parity
-    # Pandas result is object instead of Int64 (nullable) dtype.
-    pd_result = pd_result.astype(pd.Int64Dtype())
-
-    pd.testing.assert_series_equal(
-        bf_result,
-        pd_result,
-    )
-
-
-# TODO(tswast): We may be able to re-enable this test after we break large
-# queries up in https://github.com/googleapis/python-bigquery-dataframes/pull/427
-@pytest.mark.skipif(
-    sys.version_info >= (3, 12),
-    # See: https://github.com/python/cpython/issues/112282
-    reason="setrecursionlimit has no effect on the Python C stack since Python 3.12.",
-)
-def test_recursion_limit(scalars_df_index):
-    scalars_df_index = scalars_df_index[["int64_too", "int64_col", "float64_col"]]
-    for i in range(400):
-        scalars_df_index = scalars_df_index + 4
-    scalars_df_index.to_pandas()
-
-
-@pytest.mark.skipif(
-    reason="b/366477265: Skip until query complexity error can be reliably triggered."
-)
-def test_query_complexity_error(scalars_df_index):
-    # This test requires automatic caching/query decomposition to be turned off
-    bf_df = scalars_df_index
-    for _ in range(8):
-        bf_df = bf_df.merge(bf_df, on="int64_col").head(30)
-        bf_df = bf_df[bf_df.columns[:20]]
-
-    with pytest.raises(
-        bigframes.exceptions.QueryComplexityError, match=r"Try using DataFrame\.cache"
-    ):
-        bf_df.to_pandas()
-
-
-def test_query_complexity_repeated_joins(
-    scalars_df_index, scalars_pandas_df_index, with_multiquery_execution
-):
-    pd_df = scalars_pandas_df_index
-    bf_df = scalars_df_index
-    for _ in range(8):
-        # recursively join, resuling in 2^8 - 1 = 255 joins
-        pd_df = pd_df.merge(pd_df, on="int64_col").head(30)
-        pd_df = pd_df[pd_df.columns[:20]]
-        bf_df = bf_df.merge(bf_df, on="int64_col").head(30)
-        bf_df = bf_df[bf_df.columns[:20]]
-
-    bf_result = bf_df.to_pandas()
-    pd_result = pd_df
-    assert_pandas_df_equal(bf_result, pd_result, check_index_type=False)
-
-
-def test_query_complexity_repeated_subtrees(
-    scalars_df_index, scalars_pandas_df_index, with_multiquery_execution
-):
-    # Recursively union the data, if fully inlined has 10^5 identical root tables.
-    pd_df = scalars_pandas_df_index
-    bf_df = scalars_df_index
-    for _ in range(5):
-        pd_df = pd.concat(10 * [pd_df]).head(5)
-        bf_df = bpd.concat(10 * [bf_df]).head(5)
-    bf_result = bf_df.to_pandas()
-    pd_result = pd_df
-    assert_pandas_df_equal(bf_result, pd_result)
-
-
-@pytest.mark.skipif(
-    sys.version_info >= (3, 12),
-    # See: https://github.com/python/cpython/issues/112282
-    reason="setrecursionlimit has no effect on the Python C stack since Python 3.12.",
-)
-def test_query_complexity_repeated_analytic(scalars_df_index, scalars_pandas_df_index):
-    bf_df = scalars_df_index[["int64_col", "int64_too"]]
-    pd_df = scalars_pandas_df_index[["int64_col", "int64_too"]]
-    # Uses LAG analytic operator, each in a new SELECT
-    for _ in range(50):
-        bf_df = bf_df.diff()
-        pd_df = pd_df.diff()
-    bf_result = bf_df.to_pandas()
-    pd_result = pd_df
-    assert_pandas_df_equal(bf_result, pd_result)
-
-
-def test_to_gbq_and_create_dataset(session, scalars_df_index, dataset_id_not_created):
-    dataset_id = dataset_id_not_created
-    destination_table = f"{dataset_id}.scalars_df"
-
-    result_table = scalars_df_index.to_gbq(destination_table)
-    assert (
-        result_table == destination_table
-        if destination_table
-        else result_table is not None
-    )
-
-    loaded_scalars_df_index = session.read_gbq(result_table)
-    assert not loaded_scalars_df_index.empty
-
-
-def test_read_gbq_to_pandas_no_exec(unordered_session: bigframes.Session):
-    metrics = unordered_session._metrics
-    execs_pre = metrics.execution_count
-    df = unordered_session.read_gbq("bigquery-public-data.ml_datasets.penguins")
-    df.to_pandas()
-    execs_post = metrics.execution_count
-    assert df.shape == (344, 7)
-    assert execs_pre == execs_post
-
-
-def test_to_gbq_table_labels(scalars_df_index):
-    destination_table = "bigframes-dev.bigframes_tests_sys.table_labels"
-    result_table = scalars_df_index.to_gbq(
-        destination_table, labels={"test": "labels"}, if_exists="replace"
-    )
-    client = scalars_df_index._session.bqclient
-    table = client.get_table(result_table)
-    assert table.labels
-    assert table.labels["test"] == "labels"
-
-
-@pytest.mark.parametrize(
-    ("col_names", "ignore_index"),
-    [
-        pytest.param(["A"], False, id="one_array_false"),
-        pytest.param(["A"], True, id="one_array_true"),
-        pytest.param(["B"], False, id="one_float_false"),
-        pytest.param(["B"], True, id="one_float_true"),
-        pytest.param(["A", "C"], False, id="two_arrays_false"),
-        pytest.param(["A", "C"], True, id="two_arrays_true"),
-    ],
-)
-def test_dataframe_explode(col_names, ignore_index, session):
-    data = {
-        "A": [[0, 1, 2], [], [3, 4]],
-        "B": 3,
-        "C": [["a", "b", "c"], np.nan, ["d", "e"]],
-    }
-
-    metrics = session._metrics
-    df = bpd.DataFrame(data, session=session)
-    pd_df = df.to_pandas()
-    pd_result = pd_df.explode(col_names, ignore_index=ignore_index)
-    bf_result = df.explode(col_names, ignore_index=ignore_index)
-
-    # Check that to_pandas() results in at most a single query execution
-    execs_pre = metrics.execution_count
-    bf_materialized = bf_result.to_pandas()
-    execs_post = metrics.execution_count
-
-    pd.testing.assert_frame_equal(
-        bf_materialized,
-        pd_result,
-        check_index_type=False,
-        check_dtype=False,
-    )
-    # we test this property on this method in particular as compilation
-    # is non-deterministic and won't use the query cache as implemented
-    assert execs_post - execs_pre <= 1
-
-
-@pytest.mark.parametrize(
-    ("ignore_index", "ordered"),
-    [
-        pytest.param(True, True, id="include_index_ordered"),
-        pytest.param(True, False, id="include_index_unordered"),
-        pytest.param(False, True, id="ignore_index_ordered"),
-    ],
-)
-def test_dataframe_explode_reserve_order(ignore_index, ordered):
-    data = {
-        "a": [np.random.randint(0, 10, 10) for _ in range(10)],
-        "b": [np.random.randint(0, 10, 10) for _ in range(10)],
-    }
-    df = bpd.DataFrame(data)
-    pd_df = pd.DataFrame(data)
-
-    res = df.explode(["a", "b"], ignore_index=ignore_index).to_pandas(ordered=ordered)
-    pd_res = pd_df.explode(["a", "b"], ignore_index=ignore_index).astype(
-        pd.Int64Dtype()
-    )
-    pd.testing.assert_frame_equal(
-        res if ordered else res.sort_index(),
-        pd_res,
-        check_index_type=False,
-    )
-
-
-@pytest.mark.parametrize(
-    ("col_names"),
-    [
-        pytest.param([], id="empty", marks=pytest.mark.xfail(raises=ValueError)),
-        pytest.param(
-            ["A", "A"], id="duplicate", marks=pytest.mark.xfail(raises=ValueError)
-        ),
-        pytest.param("unknown", id="unknown", marks=pytest.mark.xfail(raises=KeyError)),
-    ],
-)
-def test_dataframe_explode_xfail(col_names):
-    df = bpd.DataFrame({"A": [[0, 1, 2], [], [3, 4]]})
-    df.explode(col_names)
-
-
-@pytest.mark.parametrize(
-    ("on", "rule", "origin"),
-    [
-        pytest.param("datetime_col", "100D", "start"),
-        pytest.param("datetime_col", "30W", "start"),
-        pytest.param("datetime_col", "5M", "epoch"),
-        pytest.param("datetime_col", "3Q", "start_day"),
-        pytest.param("datetime_col", "3YE", "start"),
-        pytest.param(
-            "int64_col", "100D", "start", marks=pytest.mark.xfail(raises=TypeError)
-        ),
-        pytest.param(
-            "datetime_col", "100D", "end", marks=pytest.mark.xfail(raises=ValueError)
-        ),
-    ],
-)
-def test__resample_with_column(
-    scalars_df_index, scalars_pandas_df_index, on, rule, origin
-):
-    # TODO: supply a reason why this isn't compatible with pandas 1.x
-    pytest.importorskip("pandas", minversion="2.0.0")
-    bf_result = (
-        scalars_df_index._resample(rule=rule, on=on, origin=origin)[
-            ["int64_col", "int64_too"]
-        ]
-        .max()
-        .to_pandas()
-    )
-    pd_result = scalars_pandas_df_index.resample(rule=rule, on=on, origin=origin)[
-        ["int64_col", "int64_too"]
-    ].max()
-    pd.testing.assert_frame_equal(
-        bf_result, pd_result, check_dtype=False, check_index_type=False
-    )
-
-
-@pytest.mark.parametrize(
-    ("append", "level", "col", "rule"),
-    [
-        pytest.param(False, None, "timestamp_col", "100d"),
-        pytest.param(True, 1, "timestamp_col", "1200h"),
-        pytest.param(False, None, "datetime_col", "100d"),
-    ],
-)
-def test__resample_with_index(
-    scalars_df_index, scalars_pandas_df_index, append, level, col, rule
-):
-    # TODO: supply a reason why this isn't compatible with pandas 1.x
-    pytest.importorskip("pandas", minversion="2.0.0")
-    scalars_df_index = scalars_df_index.set_index(col, append=append)
-    scalars_pandas_df_index = scalars_pandas_df_index.set_index(col, append=append)
-    bf_result = (
-        scalars_df_index[["int64_col", "int64_too"]]
-        ._resample(rule=rule, level=level)
-        .min()
-        .to_pandas()
-    )
-    pd_result = (
-        scalars_pandas_df_index[["int64_col", "int64_too"]]
-        .resample(rule=rule, level=level)
-        .min()
-    )
-    assert_pandas_df_equal(bf_result, pd_result)
-
-
-@pytest.mark.parametrize(
-    ("rule", "origin", "data"),
-    [
-        (
-            "5h",
-            "epoch",
-            {
-                "timestamp_col": pd.date_range(
-                    start="2021-01-01 13:00:00", periods=30, freq="1h"
-                ),
-                "int64_col": range(30),
-                "int64_too": range(10, 40),
-            },
-        ),
-        (
-            "75min",
-            "start_day",
-            {
-                "timestamp_col": pd.date_range(
-                    start="2021-01-01 13:00:00", periods=30, freq="10min"
-                ),
-                "int64_col": range(30),
-                "int64_too": range(10, 40),
-            },
-        ),
-        (
-            "7s",
-            "epoch",
-            {
-                "timestamp_col": pd.date_range(
-                    start="2021-01-01 13:00:00", periods=30, freq="1s"
-                ),
-                "int64_col": range(30),
-                "int64_too": range(10, 40),
-            },
-        ),
-    ],
-)
-def test__resample_start_time(rule, origin, data):
-    # TODO: supply a reason why this isn't compatible with pandas 1.x
-    pytest.importorskip("pandas", minversion="2.0.0")
-    col = "timestamp_col"
-    scalars_df_index = bpd.DataFrame(data).set_index(col)
-    scalars_pandas_df_index = pd.DataFrame(data).set_index(col)
-    scalars_pandas_df_index.index.name = None
-
-    bf_result = scalars_df_index._resample(rule=rule, origin=origin).min().to_pandas()
-
-    pd_result = scalars_pandas_df_index.resample(rule=rule, origin=origin).min()
-
-    pd.testing.assert_frame_equal(
-        bf_result, pd_result, check_dtype=False, check_index_type=False
-    )
-
-
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        pytest.param("string[pyarrow]", id="type-string"),
-        pytest.param(pd.StringDtype(storage="pyarrow"), id="type-literal"),
-        pytest.param(
-            {"bool_col": "string[pyarrow]", "int64_col": pd.Float64Dtype()},
-            id="multiple-types",
-        ),
-    ],
-)
-def test_df_astype(scalars_dfs, dtype):
-    bf_df, pd_df = scalars_dfs
-    target_cols = ["bool_col", "int64_col"]
-    bf_df = bf_df[target_cols]
-    pd_df = pd_df[target_cols]
-
-    bf_result = bf_df.astype(dtype).to_pandas()
-    pd_result = pd_df.astype(dtype)
-
-    pd.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False)
-
-
-def test_df_astype_python_types(scalars_dfs):
-    bf_df, pd_df = scalars_dfs
-    target_cols = ["bool_col", "int64_col"]
-    bf_df = bf_df[target_cols]
-    pd_df = pd_df[target_cols]
-
-    bf_result = bf_df.astype({"bool_col": str, "int64_col": float}).to_pandas()
-    pd_result = pd_df.astype(
-        {"bool_col": "string[pyarrow]", "int64_col": pd.Float64Dtype()}
-    )
-
-    pd.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False)
-
-
-def test_astype_invalid_type_fail(scalars_dfs):
-    bf_df, _ = scalars_dfs
-
-    with pytest.raises(TypeError, match=r".*Share your use case with.*"):
-        bf_df.astype(123)
-
-
-def test_agg_with_dict_lists_strings(scalars_dfs):
-    bf_df, pd_df = scalars_dfs
-    agg_funcs = {
-        "int64_too": ["min", "max"],
-        "int64_col": ["min", "count"],
-    }
-
-    bf_result = bf_df.agg(agg_funcs).to_pandas()
-    pd_result = pd_df.agg(agg_funcs)
-
-    pd.testing.assert_frame_equal(
-        bf_result, pd_result, check_dtype=False, check_index_type=False
-    )
-
-
-def test_agg_with_dict_lists_callables(scalars_dfs):
-    bf_df, pd_df = scalars_dfs
-    agg_funcs = {
-        "int64_too": [np.min, np.max],
-        "int64_col": [np.min, np.var],
-    }
-
-    bf_result = bf_df.agg(agg_funcs).to_pandas()
-    pd_result = pd_df.agg(agg_funcs)
-
-    pd.testing.assert_frame_equal(
-        bf_result, pd_result, check_dtype=False, check_index_type=False
-    )
-
-
-def test_agg_with_dict_list_and_str(scalars_dfs):
-    bf_df, pd_df = scalars_dfs
-    agg_funcs = {
-        "int64_too": ["min", "max"],
-        "int64_col": "sum",
-    }
-
-    bf_result = bf_df.agg(agg_funcs).to_pandas()
-    pd_result = pd_df.agg(agg_funcs)
-
-    pd.testing.assert_frame_equal(
-        bf_result, pd_result, check_dtype=False, check_index_type=False
-    )
-
-
-def test_agg_with_dict_strs(scalars_dfs):
-    bf_df, pd_df = scalars_dfs
-    agg_funcs = {
-        "int64_too": "min",
-        "int64_col": "sum",
-        "float64_col": "max",
-    }
-
-    bf_result = bf_df.agg(agg_funcs).to_pandas()
-    pd_result = pd_df.agg(agg_funcs)
-    pd_result.index = pd_result.index.astype("string[pyarrow]")
-
-    pd.testing.assert_series_equal(
-        bf_result, pd_result, check_dtype=False, check_index_type=False
-    )
-
-
-def test_agg_with_dict_containing_non_existing_col_raise_key_error(scalars_dfs):
-    bf_df, _ = scalars_dfs
-    agg_funcs = {
-        "int64_too": ["min", "max"],
-        "nonexisting_col": ["count"],
-    }
-
-    with pytest.raises(KeyError):
-        bf_df.agg(agg_funcs)
+    # Verify the result is a string representation
+    assert isinstance(result["json_col"].iloc[0], str)

From 30a9ef621e903109e0dcc213940a097e9d415afc Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Mon, 27 Oct 2025 22:29:28 +0000
Subject: [PATCH 29/37] Revert scalar_op_registry.py chnage

---
 bigframes/core/compile/ibis_compiler/scalar_op_registry.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py
index 7b17aac61a..e983fc7e21 100644
--- a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py
+++ b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py
@@ -1036,7 +1036,7 @@ def astype_op_impl(x: ibis_types.Value, op: ops.AsTypeOp):
         if to_type == ibis_dtypes.bool:
             return cast_json_to_bool_in_safe(x) if op.safe else cast_json_to_bool(x)
         if to_type == ibis_dtypes.string:
-            return to_json_string(x)
+            return cast_json_to_string_in_safe(x) if op.safe else cast_json_to_string(x)
 
     # TODO: either inline this function, or push rest of this op into the function
     return bigframes.core.compile.ibis_types.cast_ibis_value(x, to_type, safe=op.safe)

From 6895def33e6a43577f1908c7b2c171d7b94e87ca Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Mon, 27 Oct 2025 22:31:53 +0000
Subject: [PATCH 30/37] remove unnecessary import

---
 bigframes/dataframe.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 788a47f38b..f3b78e8218 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -852,9 +852,7 @@ def _repr_html_(self) -> str:
 
         if opts.repr_mode == "anywidget":
             try:
-                import anywidget  # noqa: F401
                 from IPython.display import display as ipython_display
-                import traitlets  # noqa: F401
 
                 from bigframes import display
 

From 46444c11ec6148f0ec595a44f0fefcc91ad802d0 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Mon, 27 Oct 2025 22:47:44 +0000
Subject: [PATCH 31/37] Remove duplicate conversation

---
 bigframes/display/anywidget.py | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py
index ff5a51f312..cf5d4e6310 100644
--- a/bigframes/display/anywidget.py
+++ b/bigframes/display/anywidget.py
@@ -74,21 +74,7 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
                 "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use TableWidget."
             )
 
-        super().__init__()
-        # Workaround for Arrow bug https://github.com/apache/arrow/issues/45262
-        # JSON columns are not supported in `to_pandas_batches` and will be converted to string.
-        json_cols = [
-            col
-            for col, dtype in dataframe.dtypes.items()
-            if dtype == bigframes.dtypes.JSON_DTYPE
-        ]
-        if json_cols:
-            df_copy = dataframe.copy()
-            for col in json_cols:
-                df_copy[str(col)] = df_copy[str(col)].astype("string")
-            self._dataframe = df_copy
-        else:
-            self._dataframe = dataframe
+        self._dataframe = dataframe
 
         super().__init__()
 

From 3b8367b3fc74abaf72d5b246b7038ecc6d9a763e Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Mon, 27 Oct 2025 22:52:55 +0000
Subject: [PATCH 32/37] revert changes to test_dataframe.py

---
 tests/system/small/test_dataframe.py | 6151 +++++++++++++++++++++++++-
 1 file changed, 6142 insertions(+), 9 deletions(-)

diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index ffd9bc512b..79f8efd00f 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -1,11 +1,6144 @@
-def test_to_pandas_batches_with_json_columns(session):
-    """Test that JSON columns are properly handled in to_pandas_batches."""
-    # Create a DataFrame with JSON column
-    df = session.read_gbq('SELECT JSON \'{"key": "value"}\' as json_col')
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
-    # This should not raise an error
-    batches = df._to_pandas_batches(page_size=10)
-    result = next(batches)
+import io
+import operator
+import sys
+import tempfile
+import typing
+from typing import Dict, List, Tuple
 
-    # Verify the result is a string representation
-    assert isinstance(result["json_col"].iloc[0], str)
+import geopandas as gpd  # type: ignore
+import numpy as np
+import pandas as pd
+import pandas.testing
+import pyarrow as pa  # type: ignore
+import pytest
+
+import bigframes
+import bigframes._config.display_options as display_options
+import bigframes.core.indexes as bf_indexes
+import bigframes.dataframe as dataframe
+import bigframes.dtypes as dtypes
+import bigframes.pandas as bpd
+import bigframes.series as series
+from bigframes.testing.utils import (
+    assert_dfs_equivalent,
+    assert_pandas_df_equal,
+    assert_series_equal,
+    assert_series_equivalent,
+)
+
+
+def test_df_construct_copy(scalars_dfs):
+    columns = ["int64_col", "string_col", "float64_col"]
+    scalars_df, scalars_pandas_df = scalars_dfs
+    # Make the mapping from label to col_id non-trivial
+    bf_df = scalars_df.copy()
+    bf_df["int64_col"] = bf_df["int64_col"] / 2
+    pd_df = scalars_pandas_df.copy()
+    pd_df["int64_col"] = pd_df["int64_col"] / 2
+
+    bf_result = dataframe.DataFrame(bf_df, columns=columns).to_pandas()
+
+    pd_result = pd.DataFrame(pd_df, columns=columns)
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_df_construct_pandas_default(scalars_dfs):
+    # This should trigger the inlined codepath
+    columns = [
+        "int64_too",
+        "int64_col",
+        "float64_col",
+        "bool_col",
+        "string_col",
+        "date_col",
+        "datetime_col",
+        "numeric_col",
+        "float64_col",
+        "time_col",
+        "timestamp_col",
+    ]
+    _, scalars_pandas_df = scalars_dfs
+    bf_result = dataframe.DataFrame(scalars_pandas_df, columns=columns).to_pandas()
+    pd_result = pd.DataFrame(scalars_pandas_df, columns=columns)
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+@pytest.mark.parametrize(
+    ("write_engine"),
+    [
+        ("bigquery_inline"),
+        ("bigquery_load"),
+        ("bigquery_streaming"),
+        ("bigquery_write"),
+    ],
+)
+def test_read_pandas_all_nice_types(
+    session: bigframes.Session, scalars_pandas_df_index: pd.DataFrame, write_engine
+):
+    bf_result = session.read_pandas(
+        scalars_pandas_df_index, write_engine=write_engine
+    ).to_pandas()
+    pandas.testing.assert_frame_equal(bf_result, scalars_pandas_df_index)
+
+
+def test_df_construct_large_strings():
+    data = [["hello", "w" + "o" * 50000 + "rld"]]
+    bf_result = dataframe.DataFrame(data).to_pandas()
+    pd_result = pd.DataFrame(data, dtype=pd.StringDtype(storage="pyarrow"))
+    pandas.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False)
+
+
+def test_df_construct_pandas_load_job(scalars_dfs_maybe_ordered):
+    # This should trigger the inlined codepath
+    columns = [
+        "int64_too",
+        "int64_col",
+        "float64_col",
+        "bool_col",
+        "string_col",
+        "date_col",
+        "datetime_col",
+        "numeric_col",
+        "float64_col",
+        "time_col",
+        "timestamp_col",
+        "geography_col",
+    ]
+    _, scalars_pandas_df = scalars_dfs_maybe_ordered
+    bf_result = dataframe.DataFrame(scalars_pandas_df, columns=columns)
+    pd_result = pd.DataFrame(scalars_pandas_df, columns=columns)
+    assert_dfs_equivalent(pd_result, bf_result)
+
+
+def test_df_construct_structs(session):
+    pd_frame = pd.Series(
+        [
+            {"version": 1, "project": "pandas"},
+            {"version": 2, "project": "pandas"},
+            {"version": 1, "project": "numpy"},
+        ]
+    ).to_frame()
+    bf_series = session.read_pandas(pd_frame)
+    pd.testing.assert_frame_equal(
+        bf_series.to_pandas(), pd_frame, check_index_type=False, check_dtype=False
+    )
+
+
+def test_df_construct_local_concat_pd(scalars_pandas_df_index, session):
+    pd_df = pd.concat([scalars_pandas_df_index, scalars_pandas_df_index])
+
+    bf_df = session.read_pandas(pd_df)
+
+    pd.testing.assert_frame_equal(
+        bf_df.to_pandas(), pd_df, check_index_type=False, check_dtype=False
+    )
+
+
+def test_df_construct_pandas_set_dtype(scalars_dfs):
+    columns = [
+        "int64_too",
+        "int64_col",
+        "float64_col",
+        "bool_col",
+    ]
+    _, scalars_pandas_df = scalars_dfs
+    bf_result = dataframe.DataFrame(
+        scalars_pandas_df, columns=columns, dtype="Float64"
+    ).to_pandas()
+    pd_result = pd.DataFrame(scalars_pandas_df, columns=columns, dtype="Float64")
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_df_construct_from_series(scalars_dfs_maybe_ordered):
+    scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
+    bf_result = dataframe.DataFrame(
+        {"a": scalars_df["int64_col"], "b": scalars_df["string_col"]},
+        dtype="string[pyarrow]",
+    )
+    pd_result = pd.DataFrame(
+        {"a": scalars_pandas_df["int64_col"], "b": scalars_pandas_df["string_col"]},
+        dtype="string[pyarrow]",
+    )
+    assert_dfs_equivalent(pd_result, bf_result)
+
+
+def test_df_construct_from_dict():
+    input_dict = {
+        "Animal": ["Falcon", "Falcon", "Parrot", "Parrot"],
+        # With a space in column name. We use standardized SQL schema ids to solve the problem that BQ schema doesn't support column names with spaces. b/296751058
+        "Max Speed": [380.0, 370.0, 24.0, 26.0],
+    }
+    bf_result = dataframe.DataFrame(input_dict).to_pandas()
+    pd_result = pd.DataFrame(input_dict)
+
+    pandas.testing.assert_frame_equal(
+        bf_result, pd_result, check_dtype=False, check_index_type=False
+    )
+
+
+@pytest.mark.parametrize(
+    ("json_type"),
+    [
+        pytest.param(dtypes.JSON_DTYPE),
+        pytest.param("json"),
+    ],
+)
+def test_df_construct_w_json_dtype(json_type):
+    data = [
+        "1",
+        "false",
+        '["a", {"b": 1}, null]',
+        None,
+    ]
+    df = dataframe.DataFrame({"json_col": data}, dtype=json_type)
+
+    assert df["json_col"].dtype == dtypes.JSON_DTYPE
+    assert df["json_col"][1] == "false"
+
+
+def test_df_construct_inline_respects_location(reset_default_session_and_location):
+    # Note: This starts a thread-local session.
+    with bpd.option_context("bigquery.location", "europe-west1"):
+        df = bpd.DataFrame([[1, 2, 3], [4, 5, 6]])
+        df.to_gbq()
+        assert df.query_job is not None
+        table = bpd.get_global_session().bqclient.get_table(df.query_job.destination)
+
+        assert table.location == "europe-west1"
+
+
+def test_df_construct_dtype():
+    data = {
+        "int_col": [1, 2, 3],
+        "string_col": ["1.1", "2.0", "3.5"],
+        "float_col": [1.0, 2.0, 3.0],
+    }
+    dtype = pd.StringDtype(storage="pyarrow")
+    bf_result = dataframe.DataFrame(data, dtype=dtype)
+    pd_result = pd.DataFrame(data, dtype=dtype)
+    pd_result.index = pd_result.index.astype("Int64")
+    pandas.testing.assert_frame_equal(bf_result.to_pandas(), pd_result)
+
+
+def test_get_column(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_name = "int64_col"
+    series = scalars_df[col_name]
+    bf_result = series.to_pandas()
+    pd_result = scalars_pandas_df[col_name]
+    assert_series_equal(bf_result, pd_result)
+
+
+def test_get_column_nonstring(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    series = scalars_df.rename(columns={"int64_col": 123.1})[123.1]
+    bf_result = series.to_pandas()
+    pd_result = scalars_pandas_df.rename(columns={"int64_col": 123.1})[123.1]
+    assert_series_equal(bf_result, pd_result)
+
+
+@pytest.mark.parametrize(
+    "row_slice",
+    [
+        (slice(1, 7, 2)),
+        (slice(1, 7, None)),
+        (slice(None, -3, None)),
+    ],
+)
+def test_get_rows_with_slice(scalars_dfs, row_slice):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df[row_slice].to_pandas()
+    pd_result = scalars_pandas_df[row_slice]
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+def test_hasattr(scalars_dfs):
+    scalars_df, _ = scalars_dfs
+    assert hasattr(scalars_df, "int64_col")
+    assert hasattr(scalars_df, "head")
+    assert not hasattr(scalars_df, "not_exist")
+
+
+@pytest.mark.parametrize(
+    ("ordered"),
+    [
+        (True),
+        (False),
+    ],
+)
+def test_head_with_custom_column_labels(
+    scalars_df_index, scalars_pandas_df_index, ordered
+):
+    rename_mapping = {
+        "int64_col": "Integer Column",
+        "string_col": "言語列",
+    }
+    bf_df = scalars_df_index.rename(columns=rename_mapping).head(3)
+    bf_result = bf_df.to_pandas(ordered=ordered)
+    pd_result = scalars_pandas_df_index.rename(columns=rename_mapping).head(3)
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=not ordered)
+
+
+def test_tail_with_custom_column_labels(scalars_df_index, scalars_pandas_df_index):
+    rename_mapping = {
+        "int64_col": "Integer Column",
+        "string_col": "言語列",
+    }
+    bf_df = scalars_df_index.rename(columns=rename_mapping).tail(3)
+    bf_result = bf_df.to_pandas()
+    pd_result = scalars_pandas_df_index.rename(columns=rename_mapping).tail(3)
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+@pytest.mark.parametrize(
+    ("keep",),
+    [
+        ("first",),
+        ("last",),
+        ("all",),
+    ],
+)
+def test_df_nlargest(scalars_df_index, scalars_pandas_df_index, keep):
+    bf_result = scalars_df_index.nlargest(3, ["bool_col", "int64_too"], keep=keep)
+    pd_result = scalars_pandas_df_index.nlargest(
+        3, ["bool_col", "int64_too"], keep=keep
+    )
+
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
+@pytest.mark.parametrize(
+    ("keep",),
+    [
+        ("first",),
+        ("last",),
+        ("all",),
+    ],
+)
+def test_df_nsmallest(scalars_df_index, scalars_pandas_df_index, keep):
+    bf_result = scalars_df_index.nsmallest(6, ["bool_col"], keep=keep)
+    pd_result = scalars_pandas_df_index.nsmallest(6, ["bool_col"], keep=keep)
+
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
+def test_get_column_by_attr(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    series = scalars_df.int64_col
+    bf_result = series.to_pandas()
+    pd_result = scalars_pandas_df.int64_col
+    assert_series_equal(bf_result, pd_result)
+
+
+def test_get_columns(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_names = ["bool_col", "float64_col", "int64_col"]
+    df_subset = scalars_df.get(col_names)
+    df_pandas = df_subset.to_pandas()
+    pd.testing.assert_index_equal(
+        df_pandas.columns, scalars_pandas_df[col_names].columns
+    )
+
+
+def test_get_columns_default(scalars_dfs):
+    scalars_df, _ = scalars_dfs
+    col_names = ["not", "column", "names"]
+    result = scalars_df.get(col_names, "default_val")
+    assert result == "default_val"
+
+
+@pytest.mark.parametrize(
+    ("loc", "column", "value", "allow_duplicates"),
+    [
+        (0, 666, 2, False),
+        (5, "float64_col", 2.2, True),
+        (13, "rowindex_2", [8, 7, 6, 5, 4, 3, 2, 1, 0], True),
+        pytest.param(
+            14,
+            "test",
+            2,
+            False,
+            marks=pytest.mark.xfail(
+                raises=IndexError,
+            ),
+        ),
+        pytest.param(
+            12,
+            "int64_col",
+            2,
+            False,
+            marks=pytest.mark.xfail(
+                raises=ValueError,
+            ),
+        ),
+    ],
+)
+def test_insert(scalars_dfs, loc, column, value, allow_duplicates):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    # insert works inplace, so will influence other tests.
+    # make a copy to avoid inplace changes.
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+    bf_df.insert(loc, column, value, allow_duplicates)
+    pd_df.insert(loc, column, value, allow_duplicates)
+
+    pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df, check_dtype=False)
+
+
+def test_mask_series_cond(scalars_df_index, scalars_pandas_df_index):
+    cond_bf = scalars_df_index["int64_col"] > 0
+    cond_pd = scalars_pandas_df_index["int64_col"] > 0
+
+    bf_df = scalars_df_index[["int64_too", "int64_col", "float64_col"]]
+    pd_df = scalars_pandas_df_index[["int64_too", "int64_col", "float64_col"]]
+    bf_result = bf_df.mask(cond_bf, bf_df + 1).to_pandas()
+    pd_result = pd_df.mask(cond_pd, pd_df + 1)
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_mask_callable(scalars_df_index, scalars_pandas_df_index):
+    def is_positive(x):
+        return x > 0
+
+    bf_df = scalars_df_index[["int64_too", "int64_col", "float64_col"]]
+    pd_df = scalars_pandas_df_index[["int64_too", "int64_col", "float64_col"]]
+    bf_result = bf_df.mask(cond=is_positive, other=lambda x: x + 1).to_pandas()
+    pd_result = pd_df.mask(cond=is_positive, other=lambda x: x + 1)
+
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_where_multi_column(scalars_df_index, scalars_pandas_df_index):
+    # Test when a dataframe has multi-columns.
+    columns = ["int64_col", "float64_col"]
+    dataframe_bf = scalars_df_index[columns]
+
+    dataframe_bf.columns = pd.MultiIndex.from_tuples(
+        [("str1", 1), ("str2", 2)], names=["STR", "INT"]
+    )
+    cond_bf = dataframe_bf["str1"] > 0
+
+    with pytest.raises(NotImplementedError) as context:
+        dataframe_bf.where(cond_bf).to_pandas()
+    assert (
+        str(context.value)
+        == "The dataframe.where() method does not support multi-column."
+    )
+
+
+def test_where_series_cond(scalars_df_index, scalars_pandas_df_index):
+    # Condition is dataframe, other is None (as default).
+    cond_bf = scalars_df_index["int64_col"] > 0
+    cond_pd = scalars_pandas_df_index["int64_col"] > 0
+    bf_result = scalars_df_index.where(cond_bf).to_pandas()
+    pd_result = scalars_pandas_df_index.where(cond_pd)
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_where_series_cond_const_other(scalars_df_index, scalars_pandas_df_index):
+    # Condition is a series, other is a constant.
+    columns = ["int64_col", "float64_col"]
+    dataframe_bf = scalars_df_index[columns]
+    dataframe_pd = scalars_pandas_df_index[columns]
+    dataframe_bf.columns.name = "test_name"
+    dataframe_pd.columns.name = "test_name"
+
+    cond_bf = dataframe_bf["int64_col"] > 0
+    cond_pd = dataframe_pd["int64_col"] > 0
+    other = 0
+
+    bf_result = dataframe_bf.where(cond_bf, other).to_pandas()
+    pd_result = dataframe_pd.where(cond_pd, other)
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_where_series_cond_dataframe_other(scalars_df_index, scalars_pandas_df_index):
+    # Condition is a series, other is a dataframe.
+    columns = ["int64_col", "float64_col"]
+    dataframe_bf = scalars_df_index[columns]
+    dataframe_pd = scalars_pandas_df_index[columns]
+
+    cond_bf = dataframe_bf["int64_col"] > 0
+    cond_pd = dataframe_pd["int64_col"] > 0
+    other_bf = -dataframe_bf
+    other_pd = -dataframe_pd
+
+    bf_result = dataframe_bf.where(cond_bf, other_bf).to_pandas()
+    pd_result = dataframe_pd.where(cond_pd, other_pd)
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_where_dataframe_cond(scalars_df_index, scalars_pandas_df_index):
+    # Condition is a dataframe, other is None.
+    columns = ["int64_col", "float64_col"]
+    dataframe_bf = scalars_df_index[columns]
+    dataframe_pd = scalars_pandas_df_index[columns]
+
+    cond_bf = dataframe_bf > 0
+    cond_pd = dataframe_pd > 0
+
+    bf_result = dataframe_bf.where(cond_bf, None).to_pandas()
+    pd_result = dataframe_pd.where(cond_pd, None)
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_where_dataframe_cond_const_other(scalars_df_index, scalars_pandas_df_index):
+    # Condition is a dataframe, other is a constant.
+    columns = ["int64_col", "float64_col"]
+    dataframe_bf = scalars_df_index[columns]
+    dataframe_pd = scalars_pandas_df_index[columns]
+
+    cond_bf = dataframe_bf > 0
+    cond_pd = dataframe_pd > 0
+    other_bf = 10
+    other_pd = 10
+
+    bf_result = dataframe_bf.where(cond_bf, other_bf).to_pandas()
+    pd_result = dataframe_pd.where(cond_pd, other_pd)
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_where_dataframe_cond_dataframe_other(
+    scalars_df_index, scalars_pandas_df_index
+):
+    # Condition is a dataframe, other is a dataframe.
+    columns = ["int64_col", "float64_col"]
+    dataframe_bf = scalars_df_index[columns]
+    dataframe_pd = scalars_pandas_df_index[columns]
+
+    cond_bf = dataframe_bf > 0
+    cond_pd = dataframe_pd > 0
+    other_bf = dataframe_bf * 2
+    other_pd = dataframe_pd * 2
+
+    bf_result = dataframe_bf.where(cond_bf, other_bf).to_pandas()
+    pd_result = dataframe_pd.where(cond_pd, other_pd)
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_where_callable_cond_constant_other(scalars_df_index, scalars_pandas_df_index):
+    # Condition is callable, other is a constant.
+    columns = ["int64_col", "float64_col"]
+    dataframe_bf = scalars_df_index[columns]
+    dataframe_pd = scalars_pandas_df_index[columns]
+
+    other = 10
+
+    bf_result = dataframe_bf.where(lambda x: x > 0, other).to_pandas()
+    pd_result = dataframe_pd.where(lambda x: x > 0, other)
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_where_dataframe_cond_callable_other(scalars_df_index, scalars_pandas_df_index):
+    # Condition is a dataframe, other is callable.
+    columns = ["int64_col", "float64_col"]
+    dataframe_bf = scalars_df_index[columns]
+    dataframe_pd = scalars_pandas_df_index[columns]
+
+    cond_bf = dataframe_bf > 0
+    cond_pd = dataframe_pd > 0
+
+    def func(x):
+        return x * 2
+
+    bf_result = dataframe_bf.where(cond_bf, func).to_pandas()
+    pd_result = dataframe_pd.where(cond_pd, func)
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_where_callable_cond_callable_other(scalars_df_index, scalars_pandas_df_index):
+    # Condition is callable, other is callable too.
+    columns = ["int64_col", "float64_col"]
+    dataframe_bf = scalars_df_index[columns]
+    dataframe_pd = scalars_pandas_df_index[columns]
+
+    def func(x):
+        return x["int64_col"] > 0
+
+    bf_result = dataframe_bf.where(func, lambda x: x * 2).to_pandas()
+    pd_result = dataframe_pd.where(func, lambda x: x * 2)
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_where_series_other(scalars_df_index):
+    # When other is a series, throw an error.
+    columns = ["int64_col", "float64_col"]
+    dataframe_bf = scalars_df_index[columns]
+
+    with pytest.raises(
+        ValueError,
+        match="Seires is not a supported replacement type!",
+    ):
+        dataframe_bf.where(dataframe_bf > 0, dataframe_bf["int64_col"])
+
+
+def test_drop_column(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_name = "int64_col"
+    df_pandas = scalars_df.drop(columns=col_name).to_pandas()
+    pd.testing.assert_index_equal(
+        df_pandas.columns, scalars_pandas_df.drop(columns=col_name).columns
+    )
+
+
+def test_drop_columns(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_names = ["int64_col", "geography_col", "time_col"]
+    df_pandas = scalars_df.drop(columns=col_names).to_pandas()
+    pd.testing.assert_index_equal(
+        df_pandas.columns, scalars_pandas_df.drop(columns=col_names).columns
+    )
+
+
+def test_drop_labels_axis_1(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    labels = ["int64_col", "geography_col", "time_col"]
+
+    pd_result = scalars_pandas_df.drop(labels=labels, axis=1)
+    bf_result = scalars_df.drop(labels=labels, axis=1).to_pandas()
+
+    pd.testing.assert_frame_equal(pd_result, bf_result)
+
+
+def test_drop_with_custom_column_labels(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    rename_mapping = {
+        "int64_col": "Integer Column",
+        "string_col": "言語列",
+    }
+    dropped_columns = [
+        "言語列",
+        "timestamp_col",
+    ]
+    bf_df = scalars_df.rename(columns=rename_mapping).drop(columns=dropped_columns)
+    bf_result = bf_df.to_pandas()
+    pd_result = scalars_pandas_df.rename(columns=rename_mapping).drop(
+        columns=dropped_columns
+    )
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+def test_df_memory_usage(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    pd_result = scalars_pandas_df.memory_usage()
+    bf_result = scalars_df.memory_usage()
+
+    pd.testing.assert_series_equal(pd_result, bf_result, rtol=1.5)
+
+
+def test_df_info(scalars_dfs):
+    expected = (
+        "<class 'bigframes.dataframe.DataFrame'>\n"
+        "Index: 9 entries, 0 to 8\n"
+        "Data columns (total 14 columns):\n"
+        "  #  Column         Non-Null Count    Dtype\n"
+        "---  -------------  ----------------  ------------------------------\n"
+        "  0  bool_col       8 non-null        boolean\n"
+        "  1  bytes_col      6 non-null        binary[pyarrow]\n"
+        "  2  date_col       7 non-null        date32[day][pyarrow]\n"
+        "  3  datetime_col   6 non-null        timestamp[us][pyarrow]\n"
+        "  4  geography_col  4 non-null        geometry\n"
+        "  5  int64_col      8 non-null        Int64\n"
+        "  6  int64_too      9 non-null        Int64\n"
+        "  7  numeric_col    6 non-null        decimal128(38, 9)[pyarrow]\n"
+        "  8  float64_col    7 non-null        Float64\n"
+        "  9  rowindex_2     9 non-null        Int64\n"
+        " 10  string_col     8 non-null        string\n"
+        " 11  time_col       6 non-null        time64[us][pyarrow]\n"
+        " 12  timestamp_col  6 non-null        timestamp[us, tz=UTC][pyarrow]\n"
+        " 13  duration_col   7 non-null        duration[us][pyarrow]\n"
+        "dtypes: Float64(1), Int64(3), binary[pyarrow](1), boolean(1), date32[day][pyarrow](1), decimal128(38, 9)[pyarrow](1), duration[us][pyarrow](1), geometry(1), string(1), time64[us][pyarrow](1), timestamp[us, tz=UTC][pyarrow](1), timestamp[us][pyarrow](1)\n"
+        "memory usage: 1341 bytes\n"
+    )
+
+    scalars_df, _ = scalars_dfs
+    bf_result = io.StringIO()
+
+    scalars_df.info(buf=bf_result)
+
+    assert expected == bf_result.getvalue()
+
+
+@pytest.mark.parametrize(
+    ("include", "exclude"),
+    [
+        ("Int64", None),
+        (["int"], None),
+        ("number", None),
+        ([pd.Int64Dtype(), pd.BooleanDtype()], None),
+        (None, [pd.Int64Dtype(), pd.BooleanDtype()]),
+        ("Int64", ["boolean"]),
+    ],
+)
+def test_select_dtypes(scalars_dfs, include, exclude):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    pd_result = scalars_pandas_df.select_dtypes(include=include, exclude=exclude)
+    bf_result = scalars_df.select_dtypes(include=include, exclude=exclude).to_pandas()
+
+    pd.testing.assert_frame_equal(pd_result, bf_result)
+
+
+def test_drop_index(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    pd_result = scalars_pandas_df.drop(index=[4, 1, 2])
+    bf_result = scalars_df.drop(index=[4, 1, 2]).to_pandas()
+
+    pd.testing.assert_frame_equal(pd_result, bf_result)
+
+
+def test_drop_pandas_index(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    drop_index = scalars_pandas_df.iloc[[4, 1, 2]].index
+
+    pd_result = scalars_pandas_df.drop(index=drop_index)
+    bf_result = scalars_df.drop(index=drop_index).to_pandas()
+
+    pd.testing.assert_frame_equal(pd_result, bf_result)
+
+
+def test_drop_bigframes_index(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    drop_index = scalars_df.loc[[4, 1, 2]].index
+    drop_pandas_index = scalars_pandas_df.loc[[4, 1, 2]].index
+
+    pd_result = scalars_pandas_df.drop(index=drop_pandas_index)
+    bf_result = scalars_df.drop(index=drop_index).to_pandas()
+
+    pd.testing.assert_frame_equal(pd_result, bf_result)
+
+
+def test_drop_bigframes_index_with_na(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    scalars_df = scalars_df.copy()
+    scalars_pandas_df = scalars_pandas_df.copy()
+    scalars_df = scalars_df.set_index("bytes_col")
+    scalars_pandas_df = scalars_pandas_df.set_index("bytes_col")
+    drop_index = scalars_df.iloc[[3, 5]].index
+    drop_pandas_index = scalars_pandas_df.iloc[[3, 5]].index
+
+    pd_result = scalars_pandas_df.drop(index=drop_pandas_index)  # drop_pandas_index)
+    bf_result = scalars_df.drop(index=drop_index).to_pandas()
+
+    pd.testing.assert_frame_equal(pd_result, bf_result)
+
+
+def test_drop_bigframes_multiindex(scalars_dfs):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    scalars_df, scalars_pandas_df = scalars_dfs
+    scalars_df = scalars_df.copy()
+    scalars_pandas_df = scalars_pandas_df.copy()
+    sub_df = scalars_df.iloc[[4, 1, 2]]
+    sub_pandas_df = scalars_pandas_df.iloc[[4, 1, 2]]
+    sub_df = sub_df.set_index(["bytes_col", "numeric_col"])
+    sub_pandas_df = sub_pandas_df.set_index(["bytes_col", "numeric_col"])
+    drop_index = sub_df.index
+    drop_pandas_index = sub_pandas_df.index
+
+    scalars_df = scalars_df.set_index(["bytes_col", "numeric_col"])
+    scalars_pandas_df = scalars_pandas_df.set_index(["bytes_col", "numeric_col"])
+    bf_result = scalars_df.drop(index=drop_index).to_pandas()
+    pd_result = scalars_pandas_df.drop(index=drop_pandas_index)
+
+    pd.testing.assert_frame_equal(pd_result, bf_result)
+
+
+def test_drop_labels_axis_0(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    pd_result = scalars_pandas_df.drop(labels=[4, 1, 2], axis=0)
+    bf_result = scalars_df.drop(labels=[4, 1, 2], axis=0).to_pandas()
+
+    pd.testing.assert_frame_equal(pd_result, bf_result)
+
+
+def test_drop_index_and_columns(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    pd_result = scalars_pandas_df.drop(index=[4, 1, 2], columns="int64_col")
+    bf_result = scalars_df.drop(index=[4, 1, 2], columns="int64_col").to_pandas()
+
+    pd.testing.assert_frame_equal(pd_result, bf_result)
+
+
+def test_rename(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_name_dict = {"bool_col": 1.2345}
+    df_pandas = scalars_df.rename(columns=col_name_dict).to_pandas()
+    pd.testing.assert_index_equal(
+        df_pandas.columns, scalars_pandas_df.rename(columns=col_name_dict).columns
+    )
+
+
+def test_df_peek(scalars_dfs_maybe_ordered):
+    scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
+
+    peek_result = scalars_df.peek(n=3, force=False, allow_large_results=True)
+
+    pd.testing.assert_index_equal(scalars_pandas_df.columns, peek_result.columns)
+    assert len(peek_result) == 3
+
+
+def test_df_peek_with_large_results_not_allowed(scalars_dfs_maybe_ordered):
+    scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
+
+    peek_result = scalars_df.peek(n=3, force=False, allow_large_results=False)
+
+    pd.testing.assert_index_equal(scalars_pandas_df.columns, peek_result.columns)
+    assert len(peek_result) == 3
+
+
+def test_df_peek_filtered(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    peek_result = scalars_df[scalars_df.int64_col != 0].peek(n=3, force=False)
+    pd.testing.assert_index_equal(scalars_pandas_df.columns, peek_result.columns)
+    assert len(peek_result) == 3
+
+
+def test_df_peek_exception(scalars_dfs):
+    scalars_df, _ = scalars_dfs
+
+    with pytest.raises(ValueError):
+        # Window ops aren't compatible with efficient peeking
+        scalars_df[["int64_col", "int64_too"]].cumsum().peek(n=3, force=False)
+
+
+def test_df_peek_force_default(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    peek_result = scalars_df[["int64_col", "int64_too"]].cumsum().peek(n=3)
+    pd.testing.assert_index_equal(
+        scalars_pandas_df[["int64_col", "int64_too"]].columns, peek_result.columns
+    )
+    assert len(peek_result) == 3
+
+
+def test_df_peek_reset_index(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    peek_result = (
+        scalars_df[["int64_col", "int64_too"]].reset_index(drop=True).peek(n=3)
+    )
+    pd.testing.assert_index_equal(
+        scalars_pandas_df[["int64_col", "int64_too"]].columns, peek_result.columns
+    )
+    assert len(peek_result) == 3
+
+
+def test_repr_w_all_rows(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    # Remove columns with flaky formatting, like NUMERIC columns (which use the
+    # object dtype). Also makes a copy so that mutating the index name doesn't
+    # break other tests.
+    scalars_df = scalars_df.drop(columns=["numeric_col"])
+    scalars_pandas_df = scalars_pandas_df.drop(columns=["numeric_col"])
+
+    # When there are 10 or fewer rows, the outputs should be identical.
+    actual = repr(scalars_df.head(10))
+
+    with display_options.pandas_repr(bigframes.options.display):
+        expected = repr(scalars_pandas_df.head(10))
+
+    assert actual == expected
+
+
+def test_join_repr(scalars_dfs_maybe_ordered):
+    scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
+
+    scalars_df = (
+        scalars_df[["int64_col"]]
+        .join(scalars_df.set_index("int64_col")[["int64_too"]])
+        .sort_index()
+    )
+    scalars_pandas_df = (
+        scalars_pandas_df[["int64_col"]]
+        .join(scalars_pandas_df.set_index("int64_col")[["int64_too"]])
+        .sort_index()
+    )
+    # Pandas join result index name seems to depend on the index values in a way that bigframes can't match exactly
+    scalars_pandas_df.index.name = None
+
+    actual = repr(scalars_df)
+
+    with display_options.pandas_repr(bigframes.options.display):
+        expected = repr(scalars_pandas_df)
+
+    assert actual == expected
+
+
+def test_repr_w_display_options(scalars_dfs, session):
+    metrics = session._metrics
+    scalars_df, _ = scalars_dfs
+    # get a pandas df of the expected format
+    df, _ = scalars_df._block.to_pandas()
+    pandas_df = df.set_axis(scalars_df._block.column_labels, axis=1)
+    pandas_df.index.name = scalars_df.index.name
+
+    executions_pre = metrics.execution_count
+    with bigframes.option_context(
+        "display.max_rows", 10, "display.max_columns", 5, "display.max_colwidth", 10
+    ):
+
+        # When there are 10 or fewer rows, the outputs should be identical except for the extra note.
+        actual = scalars_df.head(10).__repr__()
+        executions_post = metrics.execution_count
+
+        with display_options.pandas_repr(bigframes.options.display):
+            pandas_repr = pandas_df.head(10).__repr__()
+
+    assert actual == pandas_repr
+    assert (executions_post - executions_pre) <= 3
+
+
+def test_repr_html_w_all_rows(scalars_dfs, session):
+    metrics = session._metrics
+    scalars_df, _ = scalars_dfs
+    # get a pandas df of the expected format
+    df, _ = scalars_df._block.to_pandas()
+    pandas_df = df.set_axis(scalars_df._block.column_labels, axis=1)
+    pandas_df.index.name = scalars_df.index.name
+
+    executions_pre = metrics.execution_count
+    # When there are 10 or fewer rows, the outputs should be identical except for the extra note.
+    actual = scalars_df.head(10)._repr_html_()
+    executions_post = metrics.execution_count
+
+    with display_options.pandas_repr(bigframes.options.display):
+        pandas_repr = pandas_df.head(10)._repr_html_()
+
+    expected = (
+        pandas_repr
+        + f"[{len(pandas_df.index)} rows x {len(pandas_df.columns)} columns in total]"
+    )
+    assert actual == expected
+    assert (executions_post - executions_pre) <= 3
+
+
+def test_df_column_name_with_space(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_name_dict = {"bool_col": "bool  col"}
+    df_pandas = scalars_df.rename(columns=col_name_dict).to_pandas()
+    pd.testing.assert_index_equal(
+        df_pandas.columns, scalars_pandas_df.rename(columns=col_name_dict).columns
+    )
+
+
+def test_df_column_name_duplicate(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_name_dict = {"int64_too": "int64_col"}
+    df_pandas = scalars_df.rename(columns=col_name_dict).to_pandas()
+    pd.testing.assert_index_equal(
+        df_pandas.columns, scalars_pandas_df.rename(columns=col_name_dict).columns
+    )
+
+
+def test_get_df_column_name_duplicate(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_name_dict = {"int64_too": "int64_col"}
+
+    bf_result = scalars_df.rename(columns=col_name_dict)["int64_col"].to_pandas()
+    pd_result = scalars_pandas_df.rename(columns=col_name_dict)["int64_col"]
+    pd.testing.assert_index_equal(bf_result.columns, pd_result.columns)
+
+
+@pytest.mark.parametrize(
+    ("indices", "axis"),
+    [
+        ([1, 3, 5], 0),
+        ([2, 4, 6], 1),
+        ([1, -3, -5, -6], "index"),
+        ([-2, -4, -6], "columns"),
+    ],
+)
+def test_take_df(scalars_dfs, indices, axis):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_result = scalars_df.take(indices, axis=axis).to_pandas()
+    pd_result = scalars_pandas_df.take(indices, axis=axis)
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+def test_filter_df(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_bool_series = scalars_df["bool_col"]
+    bf_result = scalars_df[bf_bool_series].to_pandas()
+
+    pd_bool_series = scalars_pandas_df["bool_col"]
+    pd_result = scalars_pandas_df[pd_bool_series]
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+def test_read_gbq_direct_to_batches_row_count(unordered_session):
+    df = unordered_session.read_gbq("bigquery-public-data.usa_names.usa_1910_2013")
+    iter = df.to_pandas_batches()
+    assert iter.total_rows == 5552452
+
+
+def test_df_to_pandas_batches(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    capped_unfiltered_batches = scalars_df.to_pandas_batches(page_size=2, max_results=6)
+    bf_bool_series = scalars_df["bool_col"]
+    filtered_batches = scalars_df[bf_bool_series].to_pandas_batches()
+
+    pd_bool_series = scalars_pandas_df["bool_col"]
+    pd_result = scalars_pandas_df[pd_bool_series]
+
+    assert 6 == capped_unfiltered_batches.total_rows
+    assert len(pd_result) == filtered_batches.total_rows
+    assert_pandas_df_equal(pd.concat(filtered_batches), pd_result)
+
+
+@pytest.mark.parametrize(
+    ("literal", "expected_dtype"),
+    (
+        pytest.param(
+            2,
+            dtypes.INT_DTYPE,
+            id="INT64",
+        ),
+        # ====================================================================
+        # NULL values
+        #
+        # These are regression tests for b/428999884. It needs to be possible to
+        # set a column to NULL with a desired type (not just the pandas default
+        # of float64).
+        # ====================================================================
+        pytest.param(None, dtypes.FLOAT_DTYPE, id="NULL-None"),
+        pytest.param(
+            pa.scalar(None, type=pa.int64()),
+            dtypes.INT_DTYPE,
+            id="NULL-pyarrow-TIMESTAMP",
+        ),
+        pytest.param(
+            pa.scalar(None, type=pa.timestamp("us", tz="UTC")),
+            dtypes.TIMESTAMP_DTYPE,
+            id="NULL-pyarrow-TIMESTAMP",
+        ),
+        pytest.param(
+            pa.scalar(None, type=pa.timestamp("us")),
+            dtypes.DATETIME_DTYPE,
+            id="NULL-pyarrow-DATETIME",
+        ),
+    ),
+)
+def test_assign_new_column_w_literal(scalars_dfs, literal, expected_dtype):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    df = scalars_df.assign(new_col=literal)
+    bf_result = df.to_pandas()
+
+    new_col_pd = literal
+    if isinstance(literal, pa.Scalar):
+        # PyArrow integer scalars aren't yet supported in pandas Int64Dtype.
+        new_col_pd = literal.as_py()
+
+    # Pandas might not pick the same dtype as BigFrames, but it should at least
+    # be castable to it.
+    pd_result = scalars_pandas_df.assign(new_col=new_col_pd)
+    pd_result["new_col"] = pd_result["new_col"].astype(expected_dtype)
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+def test_assign_new_column_w_loc(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+    bf_df.loc[:, "new_col"] = 2
+    pd_df.loc[:, "new_col"] = 2
+    bf_result = bf_df.to_pandas()
+    pd_result = pd_df
+
+    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
+    pd_result["new_col"] = pd_result["new_col"].astype("Int64")
+
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
+@pytest.mark.parametrize(
+    ("scalar",),
+    [
+        (2.1,),
+        (None,),
+    ],
+)
+def test_assign_new_column_w_setitem(scalars_dfs, scalar):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+    bf_df["new_col"] = scalar
+    pd_df["new_col"] = scalar
+    bf_result = bf_df.to_pandas()
+    pd_result = pd_df
+
+    # Convert default pandas dtypes `float64` to match BigQuery DataFrames dtypes.
+    pd_result["new_col"] = pd_result["new_col"].astype("Float64")
+
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_assign_new_column_w_setitem_dataframe(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+    bf_df["int64_col"] = bf_df["int64_too"].to_frame()
+    pd_df["int64_col"] = pd_df["int64_too"].to_frame()
+
+    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
+    pd_df["int64_col"] = pd_df["int64_col"].astype("Int64")
+
+    pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df)
+
+
+def test_assign_new_column_w_setitem_dataframe_error(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+
+    with pytest.raises(ValueError):
+        bf_df["impossible_col"] = bf_df[["int64_too", "string_col"]]
+    with pytest.raises(ValueError):
+        pd_df["impossible_col"] = pd_df[["int64_too", "string_col"]]
+
+
+def test_assign_new_column_w_setitem_list(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+    bf_df["new_col"] = [9, 8, 7, 6, 5, 4, 3, 2, 1]
+    pd_df["new_col"] = [9, 8, 7, 6, 5, 4, 3, 2, 1]
+    bf_result = bf_df.to_pandas()
+    pd_result = pd_df
+
+    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
+    pd_result["new_col"] = pd_result["new_col"].astype("Int64")
+
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_assign_new_column_w_setitem_list_repeated(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+    bf_df["new_col"] = [9, 8, 7, 6, 5, 4, 3, 2, 1]
+    pd_df["new_col"] = [9, 8, 7, 6, 5, 4, 3, 2, 1]
+    bf_df["new_col_2"] = [1, 3, 2, 5, 4, 7, 6, 9, 8]
+    pd_df["new_col_2"] = [1, 3, 2, 5, 4, 7, 6, 9, 8]
+    bf_result = bf_df.to_pandas()
+    pd_result = pd_df
+
+    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
+    pd_result["new_col"] = pd_result["new_col"].astype("Int64")
+    pd_result["new_col_2"] = pd_result["new_col_2"].astype("Int64")
+
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_assign_new_column_w_setitem_list_custom_index(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+
+    # set the custom index
+    pd_df = pd_df.set_index(["string_col", "int64_col"])
+    bf_df = bf_df.set_index(["string_col", "int64_col"])
+
+    bf_df["new_col"] = [9, 8, 7, 6, 5, 4, 3, 2, 1]
+    pd_df["new_col"] = [9, 8, 7, 6, 5, 4, 3, 2, 1]
+    bf_result = bf_df.to_pandas()
+    pd_result = pd_df
+
+    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
+    pd_result["new_col"] = pd_result["new_col"].astype("Int64")
+
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_assign_new_column_w_setitem_list_error(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+
+    with pytest.raises(ValueError):
+        pd_df["new_col"] = [1, 2, 3]  # should be len 9, is 3
+    with pytest.raises(ValueError):
+        bf_df["new_col"] = [1, 2, 3]
+
+
+@pytest.mark.parametrize(
+    ("key", "value"),
+    [
+        pytest.param(["int64_col", "int64_too"], 1, id="scalar_to_existing_column"),
+        pytest.param(
+            ["int64_col", "int64_too"], [1, 2], id="sequence_to_existing_column"
+        ),
+        pytest.param(
+            ["int64_col", "new_col"], [1, 2], id="sequence_to_partial_new_column"
+        ),
+        pytest.param(
+            ["new_col", "new_col_too"], [1, 2], id="sequence_to_full_new_column"
+        ),
+        pytest.param(
+            pd.Index(("new_col", "new_col_too")),
+            [1, 2],
+            id="sequence_to_full_new_column_as_index",
+        ),
+    ],
+)
+def test_setitem_multicolumn_with_literals(scalars_dfs, key, value):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df.copy()
+    pd_result = scalars_pandas_df.copy()
+
+    bf_result[key] = value
+    pd_result[key] = value
+
+    pd.testing.assert_frame_equal(pd_result, bf_result.to_pandas(), check_dtype=False)
+
+
+def test_setitem_multicolumn_with_literals_different_lengths_raise_error(scalars_dfs):
+    scalars_df, _ = scalars_dfs
+    bf_result = scalars_df.copy()
+
+    with pytest.raises(ValueError):
+        bf_result[["int64_col", "int64_too"]] = [1]
+
+
+def test_setitem_multicolumn_with_dataframes(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df.copy()
+    pd_result = scalars_pandas_df.copy()
+
+    bf_result[["int64_col", "int64_too"]] = bf_result[["int64_too", "int64_col"]] / 2
+    pd_result[["int64_col", "int64_too"]] = pd_result[["int64_too", "int64_col"]] / 2
+
+    pd.testing.assert_frame_equal(pd_result, bf_result.to_pandas(), check_dtype=False)
+
+
+def test_setitem_multicolumn_with_dataframes_series_on_rhs_raise_error(scalars_dfs):
+    scalars_df, _ = scalars_dfs
+    bf_result = scalars_df.copy()
+
+    with pytest.raises(ValueError):
+        bf_result[["int64_col", "int64_too"]] = bf_result["int64_col"] / 2
+
+
+def test_setitem_multicolumn_with_dataframes_different_lengths_raise_error(scalars_dfs):
+    scalars_df, _ = scalars_dfs
+    bf_result = scalars_df.copy()
+
+    with pytest.raises(ValueError):
+        bf_result[["int64_col"]] = bf_result[["int64_col", "int64_too"]] / 2
+
+
+def test_assign_existing_column(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    kwargs = {"int64_col": 2}
+    df = scalars_df.assign(**kwargs)
+    bf_result = df.to_pandas()
+    pd_result = scalars_pandas_df.assign(**kwargs)
+
+    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
+    pd_result["int64_col"] = pd_result["int64_col"].astype("Int64")
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+def test_assign_listlike_to_empty_df(session):
+    empty_df = dataframe.DataFrame(session=session)
+    empty_pandas_df = pd.DataFrame()
+
+    bf_result = empty_df.assign(new_col=[1, 2, 3])
+    pd_result = empty_pandas_df.assign(new_col=[1, 2, 3])
+
+    pd_result["new_col"] = pd_result["new_col"].astype("Int64")
+    pd_result.index = pd_result.index.astype("Int64")
+    assert_pandas_df_equal(bf_result.to_pandas(), pd_result)
+
+
+def test_assign_to_empty_df_multiindex_error(session):
+    empty_df = dataframe.DataFrame(session=session)
+    empty_pandas_df = pd.DataFrame()
+
+    empty_df["empty_col_1"] = typing.cast(series.Series, [])
+    empty_df["empty_col_2"] = typing.cast(series.Series, [])
+    empty_pandas_df["empty_col_1"] = []
+    empty_pandas_df["empty_col_2"] = []
+    empty_df = empty_df.set_index(["empty_col_1", "empty_col_2"])
+    empty_pandas_df = empty_pandas_df.set_index(["empty_col_1", "empty_col_2"])
+
+    with pytest.raises(ValueError):
+        empty_df.assign(new_col=[1, 2, 3, 4, 5, 6, 7, 8, 9])
+    with pytest.raises(ValueError):
+        empty_pandas_df.assign(new_col=[1, 2, 3, 4, 5, 6, 7, 8, 9])
+
+
+@pytest.mark.parametrize(
+    ("ordered"),
+    [
+        (True),
+        (False),
+    ],
+)
+def test_assign_series(scalars_dfs, ordered):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    column_name = "int64_col"
+    df = scalars_df.assign(new_col=scalars_df[column_name])
+    bf_result = df.to_pandas(ordered=ordered)
+    pd_result = scalars_pandas_df.assign(new_col=scalars_pandas_df[column_name])
+
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=not ordered)
+
+
+def test_assign_series_overwrite(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    column_name = "int64_col"
+    df = scalars_df.assign(**{column_name: scalars_df[column_name] + 3})
+    bf_result = df.to_pandas()
+    pd_result = scalars_pandas_df.assign(
+        **{column_name: scalars_pandas_df[column_name] + 3}
+    )
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+def test_assign_sequential(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    kwargs = {"int64_col": 2, "new_col": 3, "new_col2": 4}
+    df = scalars_df.assign(**kwargs)
+    bf_result = df.to_pandas()
+    pd_result = scalars_pandas_df.assign(**kwargs)
+
+    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
+    pd_result["int64_col"] = pd_result["int64_col"].astype("Int64")
+    pd_result["new_col"] = pd_result["new_col"].astype("Int64")
+    pd_result["new_col2"] = pd_result["new_col2"].astype("Int64")
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+# Require an index so that the self-join is consistent each time.
+def test_assign_same_table_different_index_performs_self_join(
+    scalars_df_index, scalars_pandas_df_index
+):
+    column_name = "int64_col"
+    bf_df = scalars_df_index.assign(
+        alternative_index=scalars_df_index["rowindex_2"] + 2
+    )
+    pd_df = scalars_pandas_df_index.assign(
+        alternative_index=scalars_pandas_df_index["rowindex_2"] + 2
+    )
+    bf_df_2 = bf_df.set_index("alternative_index")
+    pd_df_2 = pd_df.set_index("alternative_index")
+    bf_result = bf_df.assign(new_col=bf_df_2[column_name] * 10).to_pandas()
+    pd_result = pd_df.assign(new_col=pd_df_2[column_name] * 10)
+
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+# Different table expression must have Index
+def test_assign_different_df(
+    scalars_df_index, scalars_df_2_index, scalars_pandas_df_index
+):
+    column_name = "int64_col"
+    df = scalars_df_index.assign(new_col=scalars_df_2_index[column_name])
+    bf_result = df.to_pandas()
+    # Doesn't matter to pandas if it comes from the same DF or a different DF.
+    pd_result = scalars_pandas_df_index.assign(
+        new_col=scalars_pandas_df_index[column_name]
+    )
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+def test_assign_different_df_w_loc(
+    scalars_df_index, scalars_df_2_index, scalars_pandas_df_index
+):
+    bf_df = scalars_df_index.copy()
+    bf_df2 = scalars_df_2_index.copy()
+    pd_df = scalars_pandas_df_index.copy()
+    assert "int64_col" in bf_df.columns
+    assert "int64_col" in pd_df.columns
+    bf_df.loc[:, "int64_col"] = bf_df2.loc[:, "int64_col"] + 1
+    pd_df.loc[:, "int64_col"] = pd_df.loc[:, "int64_col"] + 1
+    bf_result = bf_df.to_pandas()
+    pd_result = pd_df
+
+    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
+    pd_result["int64_col"] = pd_result["int64_col"].astype("Int64")
+
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_assign_different_df_w_setitem(
+    scalars_df_index, scalars_df_2_index, scalars_pandas_df_index
+):
+    bf_df = scalars_df_index.copy()
+    bf_df2 = scalars_df_2_index.copy()
+    pd_df = scalars_pandas_df_index.copy()
+    assert "int64_col" in bf_df.columns
+    assert "int64_col" in pd_df.columns
+    bf_df["int64_col"] = bf_df2["int64_col"] + 1
+    pd_df["int64_col"] = pd_df["int64_col"] + 1
+    bf_result = bf_df.to_pandas()
+    pd_result = pd_df
+
+    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
+    pd_result["int64_col"] = pd_result["int64_col"].astype("Int64")
+
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_assign_callable_lambda(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    kwargs = {"new_col": lambda x: x["int64_col"] + x["int64_too"]}
+    df = scalars_df.assign(**kwargs)
+    bf_result = df.to_pandas()
+    pd_result = scalars_pandas_df.assign(**kwargs)
+
+    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
+    pd_result["new_col"] = pd_result["new_col"].astype("Int64")
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+@pytest.mark.parametrize(
+    ("axis", "how", "ignore_index", "subset"),
+    [
+        (0, "any", False, None),
+        (0, "any", True, None),
+        (0, "all", False, ["bool_col", "time_col"]),
+        (0, "any", False, ["bool_col", "time_col"]),
+        (0, "all", False, "time_col"),
+        (1, "any", False, None),
+        (1, "all", False, None),
+    ],
+)
+def test_df_dropna_by_how(scalars_dfs, axis, how, ignore_index, subset):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    scalars_df, scalars_pandas_df = scalars_dfs
+    df = scalars_df.dropna(axis=axis, how=how, ignore_index=ignore_index, subset=subset)
+    bf_result = df.to_pandas()
+    pd_result = scalars_pandas_df.dropna(
+        axis=axis, how=how, ignore_index=ignore_index, subset=subset
+    )
+
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+@pytest.mark.parametrize(
+    ("axis", "ignore_index", "subset", "thresh"),
+    [
+        (0, False, None, 2),
+        (0, True, None, 3),
+        (1, False, None, 2),
+    ],
+)
+def test_df_dropna_by_thresh(scalars_dfs, axis, ignore_index, subset, thresh):
+    """
+    Tests that dropna correctly keeps rows/columns with a minimum number
+    of non-null values.
+    """
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    df_result = scalars_df.dropna(
+        axis=axis, thresh=thresh, ignore_index=ignore_index, subset=subset
+    )
+    pd_result = scalars_pandas_df.dropna(
+        axis=axis, thresh=thresh, ignore_index=ignore_index, subset=subset
+    )
+
+    bf_result = df_result.to_pandas()
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_df_dropna_range_columns(scalars_dfs):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    scalars_df, scalars_pandas_df = scalars_dfs
+    scalars_df = scalars_df.copy()
+    scalars_pandas_df = scalars_pandas_df.copy()
+    scalars_df.columns = pandas.RangeIndex(0, len(scalars_df.columns))
+    scalars_pandas_df.columns = pandas.RangeIndex(0, len(scalars_pandas_df.columns))
+
+    df = scalars_df.dropna()
+    bf_result = df.to_pandas()
+    pd_result = scalars_pandas_df.dropna()
+
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_df_interpolate(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    columns = ["int64_col", "int64_too", "float64_col"]
+    bf_result = scalars_df[columns].interpolate().to_pandas()
+    # Pandas can only interpolate on "float64" columns
+    # https://github.com/pandas-dev/pandas/issues/40252
+    pd_result = scalars_pandas_df[columns].astype("float64").interpolate()
+
+    pandas.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+        check_index_type=False,
+        check_dtype=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "col, fill_value",
+    [
+        (["int64_col", "float64_col"], 3),
+        (["string_col"], "A"),
+        (["datetime_col"], pd.Timestamp("2023-01-01")),
+    ],
+)
+def test_df_fillna(scalars_dfs, col, fill_value):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df[col].fillna(fill_value).to_pandas()
+    pd_result = scalars_pandas_df[col].fillna(fill_value)
+
+    pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
+
+
+def test_df_replace_scalar_scalar(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df.replace(555.555, 3).to_pandas()
+    pd_result = scalars_pandas_df.replace(555.555, 3)
+
+    # pandas has narrower result types as they are determined dynamically
+    pd.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False)
+
+
+def test_df_replace_regex_scalar(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df.replace("^H.l", "Howdy, Planet!", regex=True).to_pandas()
+    pd_result = scalars_pandas_df.replace("^H.l", "Howdy, Planet!", regex=True)
+
+    pd.testing.assert_frame_equal(
+        pd_result,
+        bf_result,
+    )
+
+
+def test_df_replace_list_scalar(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df.replace([555.555, 3.2], 3).to_pandas()
+    pd_result = scalars_pandas_df.replace([555.555, 3.2], 3)
+
+    # pandas has narrower result types as they are determined dynamically
+    pd.testing.assert_frame_equal(
+        pd_result,
+        bf_result,
+        check_dtype=False,
+    )
+
+
+def test_df_replace_value_dict(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df.replace(1, {"int64_col": 100, "int64_too": 200}).to_pandas()
+    pd_result = scalars_pandas_df.replace(1, {"int64_col": 100, "int64_too": 200})
+
+    pd.testing.assert_frame_equal(
+        pd_result,
+        bf_result,
+    )
+
+
+def test_df_ffill(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df[["int64_col", "float64_col"]].ffill(limit=1).to_pandas()
+    pd_result = scalars_pandas_df[["int64_col", "float64_col"]].ffill(limit=1)
+
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_df_bfill(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df[["int64_col", "float64_col"]].bfill().to_pandas()
+    pd_result = scalars_pandas_df[["int64_col", "float64_col"]].bfill()
+
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_apply_series_series_callable(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    columns = ["int64_too", "int64_col"]
+
+    def foo(series, arg1, arg2, *, kwarg1=0, kwarg2=0):
+        return series**2 + (arg1 * arg2 % 4) + (kwarg1 * kwarg2 % 7)
+
+    bf_result = (
+        scalars_df_index[columns]
+        .apply(foo, args=(33, 61), kwarg1=52, kwarg2=21)
+        .to_pandas()
+    )
+
+    pd_result = scalars_pandas_df_index[columns].apply(
+        foo, args=(33, 61), kwarg1=52, kwarg2=21
+    )
+
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_apply_series_listlike_callable(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    columns = ["int64_too", "int64_col"]
+    bf_result = (
+        scalars_df_index[columns].apply(lambda x: [len(x), x.min(), 24]).to_pandas()
+    )
+
+    pd_result = scalars_pandas_df_index[columns].apply(lambda x: [len(x), x.min(), 24])
+
+    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
+    pd_result.index = pd_result.index.astype("Int64")
+    pd_result = pd_result.astype("Int64")
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_apply_series_scalar_callable(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    columns = ["int64_too", "int64_col"]
+    bf_result = scalars_df_index[columns].apply(lambda x: x.sum())
+
+    pd_result = scalars_pandas_df_index[columns].apply(lambda x: x.sum())
+
+    pandas.testing.assert_series_equal(bf_result, pd_result)
+
+
+def test_df_pipe(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    columns = ["int64_too", "int64_col"]
+
+    def foo(x: int, y: int, df):
+        return (df + x) % y
+
+    bf_result = (
+        scalars_df_index[columns]
+        .pipe((foo, "df"), x=7, y=9)
+        .pipe(lambda x: x**2)
+        .to_pandas()
+    )
+
+    pd_result = (
+        scalars_pandas_df_index[columns]
+        .pipe((foo, "df"), x=7, y=9)
+        .pipe(lambda x: x**2)
+    )
+
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_df_keys(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    pandas.testing.assert_index_equal(
+        scalars_df_index.keys(), scalars_pandas_df_index.keys()
+    )
+
+
+def test_df_iter(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    for bf_i, df_i in zip(scalars_df_index, scalars_pandas_df_index):
+        assert bf_i == df_i
+
+
+def test_iterrows(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    scalars_df_index = scalars_df_index.add_suffix("_suffix", axis=1)
+    scalars_pandas_df_index = scalars_pandas_df_index.add_suffix("_suffix", axis=1)
+    for (bf_index, bf_series), (pd_index, pd_series) in zip(
+        scalars_df_index.iterrows(), scalars_pandas_df_index.iterrows()
+    ):
+        assert bf_index == pd_index
+        pandas.testing.assert_series_equal(bf_series, pd_series)
+
+
+@pytest.mark.parametrize(
+    (
+        "index",
+        "name",
+    ),
+    [
+        (
+            True,
+            "my_df",
+        ),
+        (False, None),
+    ],
+)
+def test_itertuples(scalars_df_index, index, name):
+    # Numeric has slightly different representation as a result of conversions.
+    bf_tuples = scalars_df_index.itertuples(index, name)
+    pd_tuples = scalars_df_index.to_pandas().itertuples(index, name)
+    for bf_tuple, pd_tuple in zip(bf_tuples, pd_tuples):
+        assert bf_tuple == pd_tuple
+
+
+def test_df_isin_list_w_null(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    values = ["Hello, World!", 55555, 2.51, pd.NA, True]
+    bf_result = (
+        scalars_df[["int64_col", "float64_col", "string_col", "bool_col"]]
+        .isin(values)
+        .to_pandas()
+    )
+    pd_result = scalars_pandas_df[
+        ["int64_col", "float64_col", "string_col", "bool_col"]
+    ].isin(values)
+
+    pandas.testing.assert_frame_equal(bf_result, pd_result.astype("boolean"))
+
+
+def test_df_isin_list_wo_null(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    values = ["Hello, World!", 55555, 2.51, True]
+    bf_result = (
+        scalars_df[["int64_col", "float64_col", "string_col", "bool_col"]]
+        .isin(values)
+        .to_pandas()
+    )
+    pd_result = scalars_pandas_df[
+        ["int64_col", "float64_col", "string_col", "bool_col"]
+    ].isin(values)
+
+    pandas.testing.assert_frame_equal(bf_result, pd_result.astype("boolean"))
+
+
+def test_df_isin_dict(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    values = {
+        "string_col": ["Hello, World!", 55555, 2.51, pd.NA, True],
+        "int64_col": [5555, 2.51],
+        "bool_col": [pd.NA],
+    }
+    bf_result = (
+        scalars_df[["int64_col", "float64_col", "string_col", "bool_col"]]
+        .isin(values)
+        .to_pandas()
+    )
+    pd_result = scalars_pandas_df[
+        ["int64_col", "float64_col", "string_col", "bool_col"]
+    ].isin(values)
+
+    pandas.testing.assert_frame_equal(bf_result, pd_result.astype("boolean"))
+
+
+def test_df_cross_merge(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    left_columns = ["int64_col", "float64_col", "rowindex_2"]
+    right_columns = ["int64_col", "bool_col", "string_col", "rowindex_2"]
+
+    left = scalars_df[left_columns]
+    # Offset the rows somewhat so that outer join can have an effect.
+    right = scalars_df[right_columns].assign(rowindex_2=scalars_df["rowindex_2"] + 2)
+
+    bf_result = left.merge(right, "cross").to_pandas()
+
+    pd_result = scalars_pandas_df[left_columns].merge(
+        scalars_pandas_df[right_columns].assign(
+            rowindex_2=scalars_pandas_df["rowindex_2"] + 2
+        ),
+        "cross",
+    )
+    pd.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False)
+
+
+@pytest.mark.parametrize(
+    ("merge_how",),
+    [
+        ("inner",),
+        ("outer",),
+        ("left",),
+        ("right",),
+    ],
+)
+def test_df_merge(scalars_dfs, merge_how):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    on = "rowindex_2"
+    left_columns = ["int64_col", "float64_col", "rowindex_2"]
+    right_columns = ["int64_col", "bool_col", "string_col", "rowindex_2"]
+
+    left = scalars_df[left_columns]
+    # Offset the rows somewhat so that outer join can have an effect.
+    right = scalars_df[right_columns].assign(rowindex_2=scalars_df["rowindex_2"] + 2)
+
+    df = left.merge(right, merge_how, on, sort=True)
+    bf_result = df.to_pandas()
+
+    pd_result = scalars_pandas_df[left_columns].merge(
+        scalars_pandas_df[right_columns].assign(
+            rowindex_2=scalars_pandas_df["rowindex_2"] + 2
+        ),
+        merge_how,
+        on,
+        sort=True,
+    )
+
+    assert_pandas_df_equal(
+        bf_result, pd_result, ignore_order=True, check_index_type=False
+    )
+
+
+@pytest.mark.parametrize(
+    ("left_on", "right_on"),
+    [
+        (["int64_col", "rowindex_2"], ["int64_col", "rowindex_2"]),
+        (["rowindex_2", "int64_col"], ["int64_col", "rowindex_2"]),
+        (["rowindex_2", "float64_col"], ["int64_col", "rowindex_2"]),
+    ],
+)
+def test_df_merge_multi_key(scalars_dfs, left_on, right_on):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    left_columns = ["int64_col", "float64_col", "rowindex_2"]
+    right_columns = ["int64_col", "bool_col", "string_col", "rowindex_2"]
+
+    left = scalars_df[left_columns]
+    # Offset the rows somewhat so that outer join can have an effect.
+    right = scalars_df[right_columns].assign(rowindex_2=scalars_df["rowindex_2"] + 2)
+
+    df = left.merge(right, "outer", left_on=left_on, right_on=right_on, sort=True)
+    bf_result = df.to_pandas()
+
+    pd_result = scalars_pandas_df[left_columns].merge(
+        scalars_pandas_df[right_columns].assign(
+            rowindex_2=scalars_pandas_df["rowindex_2"] + 2
+        ),
+        "outer",
+        left_on=left_on,
+        right_on=right_on,
+        sort=True,
+    )
+
+    assert_pandas_df_equal(
+        bf_result, pd_result, ignore_order=True, check_index_type=False
+    )
+
+
+@pytest.mark.parametrize(
+    ("merge_how",),
+    [
+        ("inner",),
+        ("outer",),
+        ("left",),
+        ("right",),
+    ],
+)
+def test_merge_custom_col_name(scalars_dfs, merge_how):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    left_columns = ["int64_col", "float64_col"]
+    right_columns = ["int64_col", "bool_col", "string_col"]
+    on = "int64_col"
+    rename_columns = {"float64_col": "f64_col"}
+
+    left = scalars_df[left_columns]
+    left = left.rename(columns=rename_columns)
+    right = scalars_df[right_columns]
+    df = left.merge(right, merge_how, on, sort=True)
+    bf_result = df.to_pandas()
+
+    pandas_left_df = scalars_pandas_df[left_columns]
+    pandas_left_df = pandas_left_df.rename(columns=rename_columns)
+    pandas_right_df = scalars_pandas_df[right_columns]
+    pd_result = pandas_left_df.merge(pandas_right_df, merge_how, on, sort=True)
+
+    assert_pandas_df_equal(
+        bf_result, pd_result, ignore_order=True, check_index_type=False
+    )
+
+
+@pytest.mark.parametrize(
+    ("merge_how",),
+    [
+        ("inner",),
+        ("outer",),
+        ("left",),
+        ("right",),
+    ],
+)
+def test_merge_left_on_right_on(scalars_dfs, merge_how):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    left_columns = ["int64_col", "float64_col", "int64_too"]
+    right_columns = ["int64_col", "bool_col", "string_col", "rowindex_2"]
+
+    left = scalars_df[left_columns]
+    right = scalars_df[right_columns]
+
+    df = left.merge(
+        right, merge_how, left_on="int64_too", right_on="rowindex_2", sort=True
+    )
+    bf_result = df.to_pandas()
+
+    pd_result = scalars_pandas_df[left_columns].merge(
+        scalars_pandas_df[right_columns],
+        merge_how,
+        left_on="int64_too",
+        right_on="rowindex_2",
+        sort=True,
+    )
+
+    assert_pandas_df_equal(
+        bf_result, pd_result, ignore_order=True, check_index_type=False
+    )
+
+
+def test_self_merge_self_w_on_args():
+    data = {
+        "A": pd.Series([1, 2, 3], dtype="Int64"),
+        "B": pd.Series([1, 2, 3], dtype="Int64"),
+        "C": pd.Series([100, 200, 300], dtype="Int64"),
+        "D": pd.Series(["alpha", "beta", "gamma"], dtype="string[pyarrow]"),
+    }
+    df = pd.DataFrame(data)
+
+    df1 = df[["A", "C"]]
+    df2 = df[["B", "C", "D"]]
+    pd_result = df1.merge(df2, left_on=["A", "C"], right_on=["B", "C"], how="inner")
+
+    bf_df = bpd.DataFrame(data)
+
+    bf_df1 = bf_df[["A", "C"]]
+    bf_df2 = bf_df[["B", "C", "D"]]
+    bf_result = bf_df1.merge(
+        bf_df2, left_on=["A", "C"], right_on=["B", "C"], how="inner"
+    ).to_pandas()
+    pd.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False)
+
+
+@pytest.mark.parametrize(
+    ("decimals",),
+    [
+        (2,),
+        ({"float64_col": 0, "bool_col": 1, "int64_too": -3},),
+        ({},),
+    ],
+)
+def test_dataframe_round(scalars_dfs, decimals):
+    if pd.__version__.startswith("1."):
+        pytest.skip("Rounding doesn't work as expected in pandas 1.x")
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_result = scalars_df.round(decimals).to_pandas()
+    pd_result = scalars_pandas_df.round(decimals)
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+def test_get_dtypes(scalars_df_default_index):
+    dtypes = scalars_df_default_index.dtypes
+    dtypes_dict: Dict[str, bigframes.dtypes.Dtype] = {
+        "bool_col": pd.BooleanDtype(),
+        "bytes_col": pd.ArrowDtype(pa.binary()),
+        "date_col": pd.ArrowDtype(pa.date32()),
+        "datetime_col": pd.ArrowDtype(pa.timestamp("us")),
+        "geography_col": gpd.array.GeometryDtype(),
+        "int64_col": pd.Int64Dtype(),
+        "int64_too": pd.Int64Dtype(),
+        "numeric_col": pd.ArrowDtype(pa.decimal128(38, 9)),
+        "float64_col": pd.Float64Dtype(),
+        "rowindex": pd.Int64Dtype(),
+        "rowindex_2": pd.Int64Dtype(),
+        "string_col": pd.StringDtype(storage="pyarrow"),
+        "time_col": pd.ArrowDtype(pa.time64("us")),
+        "timestamp_col": pd.ArrowDtype(pa.timestamp("us", tz="UTC")),
+        "duration_col": pd.ArrowDtype(pa.duration("us")),
+    }
+    pd.testing.assert_series_equal(
+        dtypes,
+        pd.Series(dtypes_dict),
+    )
+
+
+def test_get_dtypes_array_struct_query(session):
+    df = session.read_gbq(
+        """SELECT
+        [1, 3, 2] AS array_column,
+        STRUCT(
+            "a" AS string_field,
+            1.2 AS float_field) AS struct_column"""
+    )
+
+    dtypes = df.dtypes
+    pd.testing.assert_series_equal(
+        dtypes,
+        pd.Series(
+            {
+                "array_column": pd.ArrowDtype(pa.list_(pa.int64())),
+                "struct_column": pd.ArrowDtype(
+                    pa.struct(
+                        [
+                            ("string_field", pa.string()),
+                            ("float_field", pa.float64()),
+                        ]
+                    )
+                ),
+            }
+        ),
+    )
+
+
+def test_get_dtypes_array_struct_table(nested_df):
+    dtypes = nested_df.dtypes
+    pd.testing.assert_series_equal(
+        dtypes,
+        pd.Series(
+            {
+                "customer_id": pd.StringDtype(storage="pyarrow"),
+                "day": pd.ArrowDtype(pa.date32()),
+                "flag": pd.Int64Dtype(),
+                "label": pd.ArrowDtype(
+                    pa.struct(
+                        [
+                            ("key", pa.string()),
+                            ("value", pa.string()),
+                        ]
+                    ),
+                ),
+                "event_sequence": pd.ArrowDtype(
+                    pa.list_(
+                        pa.struct(
+                            [
+                                pa.field(
+                                    "data",
+                                    pa.list_(
+                                        pa.struct(
+                                            [
+                                                ("value", pa.float64()),
+                                                ("key", pa.string()),
+                                            ],
+                                        ),
+                                    ),
+                                    nullable=False,
+                                ),
+                                ("timestamp", pa.timestamp("us", "UTC")),
+                                ("category", pa.string()),
+                            ]
+                        ),
+                    ),
+                ),
+                "address": pd.ArrowDtype(
+                    pa.struct(
+                        [
+                            ("street", pa.string()),
+                            ("city", pa.string()),
+                        ]
+                    ),
+                ),
+            }
+        ),
+    )
+
+
+def test_shape(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df.shape
+    pd_result = scalars_pandas_df.shape
+
+    assert bf_result == pd_result
+
+
+@pytest.mark.parametrize(
+    "reference_table, test_table",
+    [
+        (
+            "bigframes-dev.bigframes_tests_sys.base_table",
+            "bigframes-dev.bigframes_tests_sys.base_table_mat_view",
+        ),
+        (
+            "bigframes-dev.bigframes_tests_sys.base_table",
+            "bigframes-dev.bigframes_tests_sys.base_table_view",
+        ),
+        (
+            "bigframes-dev.bigframes_tests_sys.csv_native_table",
+            "bigframes-dev.bigframes_tests_sys.csv_external_table",
+        ),
+    ],
+)
+def test_view_and_external_table_shape(session, reference_table, test_table):
+    reference_df = session.read_gbq(reference_table)
+    test_df = session.read_gbq(test_table)
+
+    assert test_df.shape == reference_df.shape
+
+
+def test_len(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = len(scalars_df)
+    pd_result = len(scalars_pandas_df)
+
+    assert bf_result == pd_result
+
+
+@pytest.mark.parametrize(
+    ("n_rows",),
+    [
+        (50,),
+        (10000,),
+    ],
+)
+@pytest.mark.parametrize(
+    "write_engine",
+    ["bigquery_load", "bigquery_streaming", "bigquery_write"],
+)
+def test_df_len_local(session, n_rows, write_engine):
+    assert (
+        len(
+            session.read_pandas(
+                pd.DataFrame(np.random.randint(1, 7, n_rows), columns=["one"]),
+                write_engine=write_engine,
+            )
+        )
+        == n_rows
+    )
+
+
+def test_size(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df.size
+    pd_result = scalars_pandas_df.size
+
+    assert bf_result == pd_result
+
+
+def test_ndim(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df.ndim
+    pd_result = scalars_pandas_df.ndim
+
+    assert bf_result == pd_result
+
+
+def test_empty_false(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_result = scalars_df.empty
+    pd_result = scalars_pandas_df.empty
+
+    assert bf_result == pd_result
+
+
+def test_empty_true_column_filter(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_result = scalars_df[[]].empty
+    pd_result = scalars_pandas_df[[]].empty
+
+    assert bf_result == pd_result
+
+
+def test_empty_true_row_filter(scalars_dfs: Tuple[dataframe.DataFrame, pd.DataFrame]):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_bool: series.Series = typing.cast(series.Series, scalars_df["bool_col"])
+    pd_bool: pd.Series = scalars_pandas_df["bool_col"]
+    bf_false = bf_bool.notna() & (bf_bool != bf_bool)
+    pd_false = pd_bool.notna() & (pd_bool != pd_bool)
+
+    bf_result = scalars_df[bf_false].empty
+    pd_result = scalars_pandas_df[pd_false].empty
+
+    assert pd_result
+    assert bf_result == pd_result
+
+
+def test_empty_true_memtable(session: bigframes.Session):
+    bf_df = dataframe.DataFrame(session=session)
+    pd_df = pd.DataFrame()
+
+    bf_result = bf_df.empty
+    pd_result = pd_df.empty
+
+    assert pd_result
+    assert bf_result == pd_result
+
+
+@pytest.mark.parametrize(
+    ("drop",),
+    ((True,), (False,)),
+)
+def test_reset_index(scalars_df_index, scalars_pandas_df_index, drop):
+    df = scalars_df_index.reset_index(drop=drop)
+    assert df.index.name is None
+
+    bf_result = df.to_pandas()
+    pd_result = scalars_pandas_df_index.reset_index(drop=drop)
+
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+
+    # reset_index should maintain the original ordering.
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_reset_index_allow_duplicates(scalars_df_index, scalars_pandas_df_index):
+    scalars_df_index = scalars_df_index.copy()
+    scalars_df_index.index.name = "int64_col"
+    df = scalars_df_index.reset_index(allow_duplicates=True, drop=False)
+    assert df.index.name is None
+
+    bf_result = df.to_pandas()
+
+    scalars_pandas_df_index = scalars_pandas_df_index.copy()
+    scalars_pandas_df_index.index.name = "int64_col"
+    pd_result = scalars_pandas_df_index.reset_index(allow_duplicates=True, drop=False)
+
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+
+    # reset_index should maintain the original ordering.
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_reset_index_duplicates_error(scalars_df_index):
+    scalars_df_index = scalars_df_index.copy()
+    scalars_df_index.index.name = "int64_col"
+    with pytest.raises(ValueError):
+        scalars_df_index.reset_index(allow_duplicates=False, drop=False)
+
+
+@pytest.mark.parametrize(
+    ("drop",),
+    ((True,), (False,)),
+)
+def test_reset_index_inplace(scalars_df_index, scalars_pandas_df_index, drop):
+    df = scalars_df_index.copy()
+    df.reset_index(drop=drop, inplace=True)
+    assert df.index.name is None
+
+    bf_result = df.to_pandas()
+    pd_result = scalars_pandas_df_index.copy()
+    pd_result.reset_index(drop=drop, inplace=True)
+
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+
+    # reset_index should maintain the original ordering.
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_reset_index_then_filter(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    bf_filter = scalars_df_index["bool_col"].fillna(True)
+    bf_df = scalars_df_index.reset_index()[bf_filter]
+    bf_result = bf_df.to_pandas()
+    pd_filter = scalars_pandas_df_index["bool_col"].fillna(True)
+    pd_result = scalars_pandas_df_index.reset_index()[pd_filter]
+
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+
+    # reset_index should maintain the original ordering and index keys
+    # post-filter will have gaps.
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_reset_index_with_unnamed_index(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    scalars_df_index = scalars_df_index.copy()
+    scalars_pandas_df_index = scalars_pandas_df_index.copy()
+
+    scalars_df_index.index.name = None
+    scalars_pandas_df_index.index.name = None
+    df = scalars_df_index.reset_index(drop=False)
+    assert df.index.name is None
+
+    # reset_index(drop=False) creates a new column "index".
+    assert df.columns[0] == "index"
+
+    bf_result = df.to_pandas()
+    pd_result = scalars_pandas_df_index.reset_index(drop=False)
+
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+
+    # reset_index should maintain the original ordering.
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_reset_index_with_unnamed_multiindex(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    bf_df = dataframe.DataFrame(
+        ([1, 2, 3], [2, 5, 7]),
+        index=pd.MultiIndex.from_tuples([("a", "aa"), ("a", "aa")]),
+    )
+    pd_df = pd.DataFrame(
+        ([1, 2, 3], [2, 5, 7]),
+        index=pd.MultiIndex.from_tuples([("a", "aa"), ("a", "aa")]),
+    )
+
+    bf_df = bf_df.reset_index()
+    pd_df = pd_df.reset_index()
+
+    assert pd_df.columns[0] == "level_0"
+    assert bf_df.columns[0] == "level_0"
+    assert pd_df.columns[1] == "level_1"
+    assert bf_df.columns[1] == "level_1"
+
+
+def test_reset_index_with_unnamed_index_and_index_column(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    scalars_df_index = scalars_df_index.copy()
+    scalars_pandas_df_index = scalars_pandas_df_index.copy()
+
+    scalars_df_index.index.name = None
+    scalars_pandas_df_index.index.name = None
+    df = scalars_df_index.assign(index=scalars_df_index["int64_col"]).reset_index(
+        drop=False
+    )
+    assert df.index.name is None
+
+    # reset_index(drop=False) creates a new column "level_0" if the "index" column already exists.
+    assert df.columns[0] == "level_0"
+
+    bf_result = df.to_pandas()
+    pd_result = scalars_pandas_df_index.assign(
+        index=scalars_pandas_df_index["int64_col"]
+    ).reset_index(drop=False)
+
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+
+    # reset_index should maintain the original ordering.
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+@pytest.mark.parametrize(
+    ("drop",),
+    (
+        (True,),
+        (False,),
+    ),
+)
+@pytest.mark.parametrize(
+    ("append",),
+    (
+        (True,),
+        (False,),
+    ),
+)
+@pytest.mark.parametrize(
+    ("index_column",),
+    (("int64_too",), ("string_col",), ("timestamp_col",)),
+)
+def test_set_index(scalars_dfs, index_column, drop, append):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    df = scalars_df.set_index(index_column, append=append, drop=drop)
+    bf_result = df.to_pandas()
+    pd_result = scalars_pandas_df.set_index(index_column, append=append, drop=drop)
+
+    # Sort to disambiguate when there are duplicate index labels.
+    # Note: Doesn't use assert_pandas_df_equal_ignore_ordering because we get
+    # "ValueError: 'timestamp_col' is both an index level and a column label,
+    # which is ambiguous" when trying to sort by a column with the same name as
+    # the index.
+    bf_result = bf_result.sort_values("rowindex_2")
+    pd_result = pd_result.sort_values("rowindex_2")
+
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_set_index_key_error(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    with pytest.raises(KeyError):
+        scalars_pandas_df.set_index(["not_a_col"])
+    with pytest.raises(KeyError):
+        scalars_df.set_index(["not_a_col"])
+
+
+@pytest.mark.parametrize(
+    ("ascending",),
+    ((True,), (False,)),
+)
+@pytest.mark.parametrize(
+    ("na_position",),
+    (("first",), ("last",)),
+)
+@pytest.mark.parametrize(
+    ("axis",),
+    ((0,), ("columns",)),
+)
+def test_sort_index(scalars_dfs, ascending, na_position, axis):
+    index_column = "int64_col"
+    scalars_df, scalars_pandas_df = scalars_dfs
+    df = scalars_df.set_index(index_column)
+    bf_result = df.sort_index(
+        ascending=ascending, na_position=na_position, axis=axis
+    ).to_pandas()
+    pd_result = scalars_pandas_df.set_index(index_column).sort_index(
+        ascending=ascending, na_position=na_position, axis=axis
+    )
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_dataframe_sort_index_inplace(scalars_dfs):
+    index_column = "int64_col"
+    scalars_df, scalars_pandas_df = scalars_dfs
+    df = scalars_df.copy().set_index(index_column)
+    df.sort_index(ascending=False, inplace=True)
+    bf_result = df.to_pandas()
+
+    pd_result = scalars_pandas_df.set_index(index_column).sort_index(ascending=False)
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_df_abs(scalars_dfs_maybe_ordered):
+    scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
+    columns = ["int64_col", "int64_too", "float64_col"]
+
+    bf_result = scalars_df[columns].abs()
+    pd_result = scalars_pandas_df[columns].abs()
+
+    assert_dfs_equivalent(pd_result, bf_result)
+
+
+def test_df_pos(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = (+scalars_df[["int64_col", "numeric_col"]]).to_pandas()
+    pd_result = +scalars_pandas_df[["int64_col", "numeric_col"]]
+
+    assert_pandas_df_equal(pd_result, bf_result)
+
+
+def test_df_neg(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = (-scalars_df[["int64_col", "numeric_col"]]).to_pandas()
+    pd_result = -scalars_pandas_df[["int64_col", "numeric_col"]]
+
+    assert_pandas_df_equal(pd_result, bf_result)
+
+
+def test_df__abs__(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = (
+        abs(scalars_df[["int64_col", "numeric_col", "float64_col"]])
+    ).to_pandas()
+    pd_result = abs(scalars_pandas_df[["int64_col", "numeric_col", "float64_col"]])
+
+    assert_pandas_df_equal(pd_result, bf_result)
+
+
+def test_df_invert(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    columns = ["int64_col", "bool_col"]
+
+    bf_result = (~scalars_df[columns]).to_pandas()
+    pd_result = ~scalars_pandas_df[columns]
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+def test_df_isnull(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    columns = ["int64_col", "int64_too", "string_col", "bool_col"]
+    bf_result = scalars_df[columns].isnull().to_pandas()
+    pd_result = scalars_pandas_df[columns].isnull()
+
+    # One of dtype mismatches to be documented. Here, the `bf_result.dtype` is
+    # `BooleanDtype` but the `pd_result.dtype` is `bool`.
+    pd_result["int64_col"] = pd_result["int64_col"].astype(pd.BooleanDtype())
+    pd_result["int64_too"] = pd_result["int64_too"].astype(pd.BooleanDtype())
+    pd_result["string_col"] = pd_result["string_col"].astype(pd.BooleanDtype())
+    pd_result["bool_col"] = pd_result["bool_col"].astype(pd.BooleanDtype())
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+def test_df_notnull(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    columns = ["int64_col", "int64_too", "string_col", "bool_col"]
+    bf_result = scalars_df[columns].notnull().to_pandas()
+    pd_result = scalars_pandas_df[columns].notnull()
+
+    # One of dtype mismatches to be documented. Here, the `bf_result.dtype` is
+    # `BooleanDtype` but the `pd_result.dtype` is `bool`.
+    pd_result["int64_col"] = pd_result["int64_col"].astype(pd.BooleanDtype())
+    pd_result["int64_too"] = pd_result["int64_too"].astype(pd.BooleanDtype())
+    pd_result["string_col"] = pd_result["string_col"].astype(pd.BooleanDtype())
+    pd_result["bool_col"] = pd_result["bool_col"].astype(pd.BooleanDtype())
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+@pytest.mark.parametrize(
+    ("left_labels", "right_labels", "overwrite", "fill_value"),
+    [
+        (["a", "b", "c"], ["c", "a", "b"], True, None),
+        (["a", "b", "c"], ["c", "a", "b"], False, None),
+        (["a", "b", "c"], ["a", "b", "c"], False, 2),
+    ],
+    ids=[
+        "one_one_match_overwrite",
+        "one_one_match_no_overwrite",
+        "exact_match",
+    ],
+)
+def test_combine(
+    scalars_df_index,
+    scalars_df_2_index,
+    scalars_pandas_df_index,
+    left_labels,
+    right_labels,
+    overwrite,
+    fill_value,
+):
+    if pd.__version__.startswith("1."):
+        pytest.skip("pd.NA vs NaN not handled well in pandas 1.x.")
+    columns = ["int64_too", "int64_col", "float64_col"]
+
+    bf_df_a = scalars_df_index[columns]
+    bf_df_a.columns = left_labels
+    bf_df_b = scalars_df_2_index[columns]
+    bf_df_b.columns = right_labels
+    bf_result = bf_df_a.combine(
+        bf_df_b,
+        lambda x, y: x**2 + 2 * x * y + y**2,
+        overwrite=overwrite,
+        fill_value=fill_value,
+    ).to_pandas()
+
+    pd_df_a = scalars_pandas_df_index[columns]
+    pd_df_a.columns = left_labels
+    pd_df_b = scalars_pandas_df_index[columns]
+    pd_df_b.columns = right_labels
+    pd_result = pd_df_a.combine(
+        pd_df_b,
+        lambda x, y: x**2 + 2 * x * y + y**2,
+        overwrite=overwrite,
+        fill_value=fill_value,
+    )
+
+    # Some dtype inconsistency for all-NULL columns
+    pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    ("overwrite", "filter_func"),
+    [
+        (True, None),
+        (False, None),
+        (True, lambda x: x.isna() | (x % 2 == 0)),
+    ],
+    ids=[
+        "default",
+        "overwritefalse",
+        "customfilter",
+    ],
+)
+def test_df_update(overwrite, filter_func):
+    if pd.__version__.startswith("1."):
+        pytest.skip("dtype handled differently in pandas 1.x.")
+
+    index1: pandas.Index = pandas.Index([1, 2, 3, 4], dtype="Int64")
+
+    index2: pandas.Index = pandas.Index([1, 2, 4, 5], dtype="Int64")
+    pd_df1 = pandas.DataFrame(
+        {"a": [1, None, 3, 4], "b": [5, 6, None, 8]}, dtype="Int64", index=index1
+    )
+    pd_df2 = pandas.DataFrame(
+        {"a": [None, 20, 30, 40], "c": [90, None, 110, 120]},
+        dtype="Int64",
+        index=index2,
+    )
+
+    bf_df1 = dataframe.DataFrame(pd_df1)
+    bf_df2 = dataframe.DataFrame(pd_df2)
+
+    bf_df1.update(bf_df2, overwrite=overwrite, filter_func=filter_func)
+    pd_df1.update(pd_df2, overwrite=overwrite, filter_func=filter_func)
+
+    pd.testing.assert_frame_equal(bf_df1.to_pandas(), pd_df1)
+
+
+def test_df_idxmin():
+    pd_df = pd.DataFrame(
+        {"a": [1, 2, 3], "b": [7, None, 3], "c": [4, 4, 4]}, index=["x", "y", "z"]
+    )
+    bf_df = dataframe.DataFrame(pd_df)
+
+    bf_result = bf_df.idxmin().to_pandas()
+    pd_result = pd_df.idxmin()
+
+    pd.testing.assert_series_equal(
+        bf_result, pd_result, check_index_type=False, check_dtype=False
+    )
+
+
+def test_df_idxmax():
+    pd_df = pd.DataFrame(
+        {"a": [1, 2, 3], "b": [7, None, 3], "c": [4, 4, 4]}, index=["x", "y", "z"]
+    )
+    bf_df = dataframe.DataFrame(pd_df)
+
+    bf_result = bf_df.idxmax().to_pandas()
+    pd_result = pd_df.idxmax()
+
+    pd.testing.assert_series_equal(
+        bf_result, pd_result, check_index_type=False, check_dtype=False
+    )
+
+
+@pytest.mark.parametrize(
+    ("join", "axis"),
+    [
+        ("outer", None),
+        ("outer", 0),
+        ("outer", 1),
+        ("left", 0),
+        ("right", 1),
+        ("inner", None),
+        ("inner", 1),
+    ],
+)
+def test_df_align(join, axis):
+
+    index1: pandas.Index = pandas.Index([1, 2, 3, 4], dtype="Int64")
+
+    index2: pandas.Index = pandas.Index([1, 2, 4, 5], dtype="Int64")
+    pd_df1 = pandas.DataFrame(
+        {"a": [1, None, 3, 4], "b": [5, 6, None, 8]}, dtype="Int64", index=index1
+    )
+    pd_df2 = pandas.DataFrame(
+        {"a": [None, 20, 30, 40], "c": [90, None, 110, 120]},
+        dtype="Int64",
+        index=index2,
+    )
+
+    bf_df1 = dataframe.DataFrame(pd_df1)
+    bf_df2 = dataframe.DataFrame(pd_df2)
+
+    bf_result1, bf_result2 = bf_df1.align(bf_df2, join=join, axis=axis)
+    pd_result1, pd_result2 = pd_df1.align(pd_df2, join=join, axis=axis)
+
+    # Don't check dtype as pandas does unnecessary float conversion
+    assert isinstance(bf_result1, dataframe.DataFrame) and isinstance(
+        bf_result2, dataframe.DataFrame
+    )
+    pd.testing.assert_frame_equal(bf_result1.to_pandas(), pd_result1, check_dtype=False)
+    pd.testing.assert_frame_equal(bf_result2.to_pandas(), pd_result2, check_dtype=False)
+
+
+def test_combine_first(
+    scalars_df_index,
+    scalars_df_2_index,
+    scalars_pandas_df_index,
+):
+    if pd.__version__.startswith("1."):
+        pytest.skip("pd.NA vs NaN not handled well in pandas 1.x.")
+    columns = ["int64_too", "int64_col", "float64_col"]
+
+    bf_df_a = scalars_df_index[columns].iloc[0:6]
+    bf_df_a.columns = ["a", "b", "c"]
+    bf_df_b = scalars_df_2_index[columns].iloc[2:8]
+    bf_df_b.columns = ["b", "a", "d"]
+    bf_result = bf_df_a.combine_first(bf_df_b).to_pandas()
+
+    pd_df_a = scalars_pandas_df_index[columns].iloc[0:6]
+    pd_df_a.columns = ["a", "b", "c"]
+    pd_df_b = scalars_pandas_df_index[columns].iloc[2:8]
+    pd_df_b.columns = ["b", "a", "d"]
+    pd_result = pd_df_a.combine_first(pd_df_b)
+
+    # Some dtype inconsistency for all-NULL columns
+    pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    ("columns", "numeric_only"),
+    [
+        (["bool_col", "int64_col", "float64_col"], True),
+        (["bool_col", "int64_col", "float64_col"], False),
+        (["bool_col", "int64_col", "float64_col", "string_col"], True),
+        pytest.param(
+            ["bool_col", "int64_col", "float64_col", "string_col"],
+            False,
+            marks=pytest.mark.xfail(
+                raises=NotImplementedError,
+            ),
+        ),
+    ],
+)
+def test_df_corr_w_numeric_only(scalars_dfs_maybe_ordered, columns, numeric_only):
+    scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
+
+    bf_result = scalars_df[columns].corr(numeric_only=numeric_only).to_pandas()
+    pd_result = scalars_pandas_df[columns].corr(numeric_only=numeric_only)
+
+    # BigFrames and Pandas differ in their data type handling:
+    # - Column types: BigFrames uses Float64, Pandas uses float64.
+    # - Index types: BigFrames uses strign, Pandas uses object.
+    pd.testing.assert_index_equal(bf_result.columns, pd_result.columns)
+    # Only check row order in ordered mode.
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+        check_dtype=False,
+        check_index_type=False,
+        check_like=~scalars_df._block.session._strictly_ordered,
+    )
+
+
+def test_df_corr_w_invalid_parameters(scalars_dfs):
+    columns = ["int64_too", "int64_col", "float64_col"]
+    scalars_df, _ = scalars_dfs
+
+    with pytest.raises(NotImplementedError):
+        scalars_df[columns].corr(method="kendall")
+
+    with pytest.raises(NotImplementedError):
+        scalars_df[columns].corr(min_periods=1)
+
+
+@pytest.mark.parametrize(
+    ("columns", "numeric_only"),
+    [
+        (["bool_col", "int64_col", "float64_col"], True),
+        (["bool_col", "int64_col", "float64_col"], False),
+        (["bool_col", "int64_col", "float64_col", "string_col"], True),
+        pytest.param(
+            ["bool_col", "int64_col", "float64_col", "string_col"],
+            False,
+            marks=pytest.mark.xfail(
+                raises=NotImplementedError,
+            ),
+        ),
+    ],
+)
+def test_cov_w_numeric_only(scalars_dfs_maybe_ordered, columns, numeric_only):
+    scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
+    bf_result = scalars_df[columns].cov(numeric_only=numeric_only).to_pandas()
+    pd_result = scalars_pandas_df[columns].cov(numeric_only=numeric_only)
+    # BigFrames and Pandas differ in their data type handling:
+    # - Column types: BigFrames uses Float64, Pandas uses float64.
+    # - Index types: BigFrames uses strign, Pandas uses object.
+    pd.testing.assert_index_equal(bf_result.columns, pd_result.columns)
+    # Only check row order in ordered mode.
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+        check_dtype=False,
+        check_index_type=False,
+        check_like=~scalars_df._block.session._strictly_ordered,
+    )
+
+
+def test_df_corrwith_df(scalars_dfs_maybe_ordered):
+    scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
+
+    l_cols = ["int64_col", "float64_col", "int64_too"]
+    r_cols = ["int64_too", "float64_col"]
+
+    bf_result = scalars_df[l_cols].corrwith(scalars_df[r_cols]).to_pandas()
+    pd_result = scalars_pandas_df[l_cols].corrwith(scalars_pandas_df[r_cols])
+
+    # BigFrames and Pandas differ in their data type handling:
+    # - Column types: BigFrames uses Float64, Pandas uses float64.
+    # - Index types: BigFrames uses strign, Pandas uses object.
+    pd.testing.assert_series_equal(
+        bf_result, pd_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_df_corrwith_df_numeric_only(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    l_cols = ["int64_col", "float64_col", "int64_too", "string_col"]
+    r_cols = ["int64_too", "float64_col", "bool_col"]
+
+    bf_result = (
+        scalars_df[l_cols].corrwith(scalars_df[r_cols], numeric_only=True).to_pandas()
+    )
+    pd_result = scalars_pandas_df[l_cols].corrwith(
+        scalars_pandas_df[r_cols], numeric_only=True
+    )
+
+    # BigFrames and Pandas differ in their data type handling:
+    # - Column types: BigFrames uses Float64, Pandas uses float64.
+    # - Index types: BigFrames uses strign, Pandas uses object.
+    pd.testing.assert_series_equal(
+        bf_result, pd_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_df_corrwith_df_non_numeric_error(scalars_dfs):
+    scalars_df, _ = scalars_dfs
+
+    l_cols = ["int64_col", "float64_col", "int64_too", "string_col"]
+    r_cols = ["int64_too", "float64_col", "bool_col"]
+
+    with pytest.raises(NotImplementedError):
+        scalars_df[l_cols].corrwith(scalars_df[r_cols], numeric_only=False)
+
+
+def test_df_corrwith_series(scalars_dfs_maybe_ordered):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
+
+    l_cols = ["int64_col", "float64_col", "int64_too"]
+    r_col = "float64_col"
+
+    bf_result = scalars_df[l_cols].corrwith(scalars_df[r_col]).to_pandas()
+    pd_result = scalars_pandas_df[l_cols].corrwith(scalars_pandas_df[r_col])
+
+    # BigFrames and Pandas differ in their data type handling:
+    # - Column types: BigFrames uses Float64, Pandas uses float64.
+    # - Index types: BigFrames uses strign, Pandas uses object.
+    pd.testing.assert_series_equal(
+        bf_result, pd_result, check_dtype=False, check_index_type=False
+    )
+
+
+@pytest.mark.parametrize(
+    ("op"),
+    [
+        operator.add,
+        operator.sub,
+        operator.mul,
+        operator.truediv,
+        operator.floordiv,
+        operator.eq,
+        operator.ne,
+        operator.gt,
+        operator.ge,
+        operator.lt,
+        operator.le,
+    ],
+    ids=[
+        "add",
+        "subtract",
+        "multiply",
+        "true_divide",
+        "floor_divide",
+        "eq",
+        "ne",
+        "gt",
+        "ge",
+        "lt",
+        "le",
+    ],
+)
+# TODO(garrettwu): deal with NA values
+@pytest.mark.parametrize(("other_scalar"), [1, 2.5, 0, 0.0])
+@pytest.mark.parametrize(("reverse_operands"), [True, False])
+def test_scalar_binop(scalars_dfs, op, other_scalar, reverse_operands):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    columns = ["int64_col", "float64_col"]
+
+    maybe_reversed_op = (lambda x, y: op(y, x)) if reverse_operands else op
+
+    bf_result = maybe_reversed_op(scalars_df[columns], other_scalar).to_pandas()
+    pd_result = maybe_reversed_op(scalars_pandas_df[columns], other_scalar)
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+def test_dataframe_string_radd_const(scalars_dfs):
+    pytest.importorskip(
+        "pandas",
+        minversion="2.0.0",
+        reason="PyArrow string addition requires pandas 2.0+",
+    )
+
+    scalars_df, scalars_pandas_df = scalars_dfs
+    columns = ["string_col", "string_col"]
+
+    bf_result = ("prefix" + scalars_df[columns]).to_pandas()
+    pd_result = "prefix" + scalars_pandas_df[columns]
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+@pytest.mark.parametrize(("other_scalar"), [1, -2])
+def test_mod(scalars_dfs, other_scalar):
+    # Zero case excluded as pandas produces 0 result for Int64 inputs rather than NA/NaN.
+    # This is likely a pandas bug as mod 0 is undefined in other dtypes, and most programming languages.
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_result = (scalars_df[["int64_col", "int64_too"]] % other_scalar).to_pandas()
+    pd_result = scalars_pandas_df[["int64_col", "int64_too"]] % other_scalar
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+def test_scalar_binop_str_exception(scalars_dfs):
+    scalars_df, _ = scalars_dfs
+    columns = ["string_col"]
+    with pytest.raises(TypeError, match="Cannot add dtypes"):
+        (scalars_df[columns] + 1).to_pandas()
+
+
+@pytest.mark.parametrize(
+    ("op"),
+    [
+        (lambda x, y: x.add(y, axis="index")),
+        (lambda x, y: x.radd(y, axis="index")),
+        (lambda x, y: x.sub(y, axis="index")),
+        (lambda x, y: x.rsub(y, axis="index")),
+        (lambda x, y: x.mul(y, axis="index")),
+        (lambda x, y: x.rmul(y, axis="index")),
+        (lambda x, y: x.truediv(y, axis="index")),
+        (lambda x, y: x.rtruediv(y, axis="index")),
+        (lambda x, y: x.floordiv(y, axis="index")),
+        (lambda x, y: x.floordiv(y, axis="index")),
+        (lambda x, y: x.gt(y, axis="index")),
+        (lambda x, y: x.ge(y, axis="index")),
+        (lambda x, y: x.lt(y, axis="index")),
+        (lambda x, y: x.le(y, axis="index")),
+    ],
+    ids=[
+        "add",
+        "radd",
+        "sub",
+        "rsub",
+        "mul",
+        "rmul",
+        "truediv",
+        "rtruediv",
+        "floordiv",
+        "rfloordiv",
+        "gt",
+        "ge",
+        "lt",
+        "le",
+    ],
+)
+def test_series_binop_axis_index(
+    scalars_dfs,
+    op,
+):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    df_columns = ["int64_col", "float64_col"]
+    series_column = "int64_too"
+
+    bf_result = op(scalars_df[df_columns], scalars_df[series_column]).to_pandas()
+    pd_result = op(scalars_pandas_df[df_columns], scalars_pandas_df[series_column])
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+@pytest.mark.parametrize(
+    ("input"),
+    [
+        ((1000, 2000, 3000)),
+        (pd.Index([1000, 2000, 3000])),
+        (pd.Series((1000, 2000), index=["int64_too", "float64_col"])),
+    ],
+    ids=[
+        "tuple",
+        "pd_index",
+        "pd_series",
+    ],
+)
+def test_listlike_binop_axis_1_in_memory_data(scalars_dfs, input):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    df_columns = ["int64_col", "float64_col", "int64_too"]
+
+    bf_result = scalars_df[df_columns].add(input, axis=1).to_pandas()
+    if hasattr(input, "to_pandas"):
+        input = input.to_pandas()
+    pd_result = scalars_pandas_df[df_columns].add(input, axis=1)
+
+    assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)
+
+
+def test_df_reverse_binop_pandas(scalars_dfs):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    pd_series = pd.Series([100, 200, 300])
+
+    df_columns = ["int64_col", "float64_col", "int64_too"]
+
+    bf_result = pd_series + scalars_df[df_columns].to_pandas()
+    pd_result = pd_series + scalars_pandas_df[df_columns]
+
+    assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)
+
+
+def test_listlike_binop_axis_1_bf_index(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    df_columns = ["int64_col", "float64_col", "int64_too"]
+
+    bf_result = (
+        scalars_df[df_columns]
+        .add(bf_indexes.Index([1000, 2000, 3000]), axis=1)
+        .to_pandas()
+    )
+    pd_result = scalars_pandas_df[df_columns].add(pd.Index([1000, 2000, 3000]), axis=1)
+
+    assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)
+
+
+def test_binop_with_self_aggregate(scalars_dfs_maybe_ordered):
+    scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
+
+    df_columns = ["int64_col", "float64_col", "int64_too"]
+
+    # Ensure that this takes the optimized single-query path by counting executions
+    execution_count_before = scalars_df._session._metrics.execution_count
+    bf_df = scalars_df[df_columns]
+    bf_result = (bf_df - bf_df.mean()).to_pandas()
+    execution_count_after = scalars_df._session._metrics.execution_count
+
+    pd_df = scalars_pandas_df[df_columns]
+    pd_result = pd_df - pd_df.mean()
+
+    executions = execution_count_after - execution_count_before
+
+    assert executions == 1
+    assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)
+
+
+def test_binop_with_self_aggregate_w_index_reset(scalars_dfs_maybe_ordered):
+    scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
+
+    df_columns = ["int64_col", "float64_col", "int64_too"]
+
+    # Ensure that this takes the optimized single-query path by counting executions
+    execution_count_before = scalars_df._session._metrics.execution_count
+    bf_df = scalars_df[df_columns].reset_index(drop=True)
+    bf_result = (bf_df - bf_df.mean()).to_pandas()
+    execution_count_after = scalars_df._session._metrics.execution_count
+
+    pd_df = scalars_pandas_df[df_columns].reset_index(drop=True)
+    pd_result = pd_df - pd_df.mean()
+
+    executions = execution_count_after - execution_count_before
+
+    assert executions == 1
+    pd_result.index = pd_result.index.astype("Int64")
+    assert_pandas_df_equal(
+        bf_result, pd_result, check_dtype=False, check_index_type=False
+    )
+
+
+@pytest.mark.parametrize(
+    ("left_labels", "right_labels"),
+    [
+        (["a", "a", "b"], ["c", "c", "d"]),
+        (["a", "b", "c"], ["c", "a", "b"]),
+        (["a", "c", "c"], ["c", "a", "c"]),
+        (["a", "b", "c"], ["a", "b", "c"]),
+    ],
+    ids=[
+        "no_overlap",
+        "one_one_match",
+        "multi_match",
+        "exact_match",
+    ],
+)
+def test_binop_df_df_binary_op(
+    scalars_df_index,
+    scalars_df_2_index,
+    scalars_pandas_df_index,
+    left_labels,
+    right_labels,
+):
+    if pd.__version__.startswith("1."):
+        pytest.skip("pd.NA vs NaN not handled well in pandas 1.x.")
+    columns = ["int64_too", "int64_col", "float64_col"]
+
+    bf_df_a = scalars_df_index[columns]
+    bf_df_a.columns = left_labels
+    bf_df_b = scalars_df_2_index[columns]
+    bf_df_b.columns = right_labels
+    bf_result = (bf_df_a - bf_df_b).to_pandas()
+
+    pd_df_a = scalars_pandas_df_index[columns]
+    pd_df_a.columns = left_labels
+    pd_df_b = scalars_pandas_df_index[columns]
+    pd_df_b.columns = right_labels
+    pd_result = pd_df_a - pd_df_b
+
+    # Some dtype inconsistency for all-NULL columns
+    pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
+
+
+# Differnt table will only work for explicit index, since default index orders are arbitrary.
+@pytest.mark.parametrize(
+    ("ordered"),
+    [
+        (True),
+        (False),
+    ],
+)
+def test_series_binop_add_different_table(
+    scalars_df_index, scalars_pandas_df_index, scalars_df_2_index, ordered
+):
+    df_columns = ["int64_col", "float64_col"]
+    series_column = "int64_too"
+
+    bf_result = (
+        scalars_df_index[df_columns]
+        .add(scalars_df_2_index[series_column], axis="index")
+        .to_pandas(ordered=ordered)
+    )
+    pd_result = scalars_pandas_df_index[df_columns].add(
+        scalars_pandas_df_index[series_column], axis="index"
+    )
+
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=not ordered)
+
+
+# TODO(garrettwu): Test series binop with different index
+
+all_joins = pytest.mark.parametrize(
+    ("how",),
+    (("outer",), ("left",), ("right",), ("inner",), ("cross",)),
+)
+
+
+@all_joins
+def test_join_same_table(scalars_dfs_maybe_ordered, how):
+    bf_df, pd_df = scalars_dfs_maybe_ordered
+
+    bf_df_a = bf_df.set_index("int64_too")[["string_col", "int64_col"]]
+    bf_df_a = bf_df_a.sort_index()
+
+    bf_df_b = bf_df.set_index("int64_too")[["float64_col"]]
+    bf_df_b = bf_df_b[bf_df_b.float64_col > 0]
+    bf_df_b = bf_df_b.sort_values("float64_col")
+
+    bf_result = bf_df_a.join(bf_df_b, how=how).to_pandas()
+
+    pd_df_a = pd_df.set_index("int64_too")[["string_col", "int64_col"]].sort_index()
+    pd_df_a = pd_df_a.sort_index()
+
+    pd_df_b = pd_df.set_index("int64_too")[["float64_col"]]
+    pd_df_b = pd_df_b[pd_df_b.float64_col > 0]
+    pd_df_b = pd_df_b.sort_values("float64_col")
+
+    pd_result = pd_df_a.join(pd_df_b, how=how)
+
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
+
+
+def test_join_incompatible_key_type_error(scalars_dfs):
+    bf_df, _ = scalars_dfs
+
+    bf_df_a = bf_df.set_index("int64_too")[["string_col", "int64_col"]]
+    bf_df_a = bf_df_a.sort_index()
+
+    bf_df_b = bf_df.set_index("date_col")[["float64_col"]]
+    bf_df_b = bf_df_b[bf_df_b.float64_col > 0]
+    bf_df_b = bf_df_b.sort_values("float64_col")
+
+    with pytest.raises(TypeError):
+        # joining incompatible date, int columns
+        bf_df_a.join(bf_df_b, how="left")
+
+
+@all_joins
+def test_join_different_table(
+    scalars_df_index, scalars_df_2_index, scalars_pandas_df_index, how
+):
+    bf_df_a = scalars_df_index[["string_col", "int64_col"]]
+    bf_df_b = scalars_df_2_index.dropna()[["float64_col"]]
+    bf_result = bf_df_a.join(bf_df_b, how=how).to_pandas()
+    pd_df_a = scalars_pandas_df_index[["string_col", "int64_col"]]
+    pd_df_b = scalars_pandas_df_index.dropna()[["float64_col"]]
+    pd_result = pd_df_a.join(pd_df_b, how=how)
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
+
+
+@all_joins
+def test_join_different_table_with_duplicate_column_name(
+    scalars_df_index, scalars_pandas_df_index, how
+):
+    bf_df_a = scalars_df_index[["string_col", "int64_col", "int64_too"]].rename(
+        columns={"int64_too": "int64_col"}
+    )
+    bf_df_b = scalars_df_index.dropna()[
+        ["string_col", "int64_col", "int64_too"]
+    ].rename(columns={"int64_too": "int64_col"})
+    bf_result = bf_df_a.join(bf_df_b, how=how, lsuffix="_l", rsuffix="_r").to_pandas()
+    pd_df_a = scalars_pandas_df_index[["string_col", "int64_col", "int64_too"]].rename(
+        columns={"int64_too": "int64_col"}
+    )
+    pd_df_b = scalars_pandas_df_index.dropna()[
+        ["string_col", "int64_col", "int64_too"]
+    ].rename(columns={"int64_too": "int64_col"})
+    pd_result = pd_df_a.join(pd_df_b, how=how, lsuffix="_l", rsuffix="_r")
+
+    # Ensure no inplace changes
+    pd.testing.assert_index_equal(bf_df_a.columns, pd_df_a.columns)
+    pd.testing.assert_index_equal(bf_df_b.index.to_pandas(), pd_df_b.index)
+    pd.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False)
+
+
+@all_joins
+def test_join_param_on_with_duplicate_column_name_not_on_col(
+    scalars_df_index, scalars_pandas_df_index, how
+):
+    # This test is for duplicate column names, but the 'on' column is not duplicated.
+    if how == "cross":
+        return
+    bf_df_a = scalars_df_index[
+        ["string_col", "datetime_col", "timestamp_col", "int64_too"]
+    ].rename(columns={"timestamp_col": "datetime_col"})
+    bf_df_b = scalars_df_index.dropna()[
+        ["string_col", "datetime_col", "timestamp_col"]
+    ].rename(columns={"timestamp_col": "datetime_col"})
+    bf_result = bf_df_a.join(
+        bf_df_b, on="int64_too", how=how, lsuffix="_l", rsuffix="_r"
+    ).to_pandas()
+    pd_df_a = scalars_pandas_df_index[
+        ["string_col", "datetime_col", "timestamp_col", "int64_too"]
+    ].rename(columns={"timestamp_col": "datetime_col"})
+    pd_df_b = scalars_pandas_df_index.dropna()[
+        ["string_col", "datetime_col", "timestamp_col"]
+    ].rename(columns={"timestamp_col": "datetime_col"})
+    pd_result = pd_df_a.join(
+        pd_df_b, on="int64_too", how=how, lsuffix="_l", rsuffix="_r"
+    )
+    pd.testing.assert_frame_equal(
+        bf_result.sort_index(),
+        pd_result.sort_index(),
+        check_like=True,
+        check_index_type=False,
+        check_names=False,
+    )
+    pd.testing.assert_index_equal(bf_result.columns, pd_result.columns)
+
+
+@pytest.mark.skipif(
+    pandas.__version__.startswith("1."), reason="bad left join in pandas 1.x"
+)
+@all_joins
+def test_join_param_on_with_duplicate_column_name_on_col(
+    scalars_df_index, scalars_pandas_df_index, how
+):
+    # This test is for duplicate column names, and the 'on' column is duplicated.
+    if how == "cross":
+        return
+    bf_df_a = scalars_df_index[
+        ["string_col", "datetime_col", "timestamp_col", "int64_too"]
+    ].rename(columns={"timestamp_col": "datetime_col"})
+    bf_df_b = scalars_df_index.dropna()[
+        ["string_col", "datetime_col", "timestamp_col", "int64_too"]
+    ].rename(columns={"timestamp_col": "datetime_col"})
+    bf_result = bf_df_a.join(
+        bf_df_b, on="int64_too", how=how, lsuffix="_l", rsuffix="_r"
+    ).to_pandas()
+    pd_df_a = scalars_pandas_df_index[
+        ["string_col", "datetime_col", "timestamp_col", "int64_too"]
+    ].rename(columns={"timestamp_col": "datetime_col"})
+    pd_df_b = scalars_pandas_df_index.dropna()[
+        ["string_col", "datetime_col", "timestamp_col", "int64_too"]
+    ].rename(columns={"timestamp_col": "datetime_col"})
+    pd_result = pd_df_a.join(
+        pd_df_b, on="int64_too", how=how, lsuffix="_l", rsuffix="_r"
+    )
+    pd.testing.assert_frame_equal(
+        bf_result.sort_index(),
+        pd_result.sort_index(),
+        check_like=True,
+        check_index_type=False,
+        check_names=False,
+    )
+    pd.testing.assert_index_equal(bf_result.columns, pd_result.columns)
+
+
+@all_joins
+def test_join_param_on(scalars_dfs, how):
+    bf_df, pd_df = scalars_dfs
+
+    bf_df_a = bf_df[["string_col", "int64_col", "rowindex_2"]]
+    bf_df_a = bf_df_a.assign(rowindex_2=bf_df_a["rowindex_2"] + 2)
+    bf_df_b = bf_df[["float64_col"]]
+
+    if how == "cross":
+        with pytest.raises(ValueError):
+            bf_df_a.join(bf_df_b, on="rowindex_2", how=how)
+    else:
+        bf_result = bf_df_a.join(bf_df_b, on="rowindex_2", how=how).to_pandas()
+
+        pd_df_a = pd_df[["string_col", "int64_col", "rowindex_2"]]
+        pd_df_a = pd_df_a.assign(rowindex_2=pd_df_a["rowindex_2"] + 2)
+        pd_df_b = pd_df[["float64_col"]]
+        pd_result = pd_df_a.join(pd_df_b, on="rowindex_2", how=how)
+        assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
+
+
+@all_joins
+def test_df_join_series(scalars_dfs, how):
+    bf_df, pd_df = scalars_dfs
+
+    bf_df_a = bf_df[["string_col", "int64_col", "rowindex_2"]]
+    bf_df_a = bf_df_a.assign(rowindex_2=bf_df_a["rowindex_2"] + 2)
+    bf_series_b = bf_df["float64_col"]
+
+    if how == "cross":
+        with pytest.raises(ValueError):
+            bf_df_a.join(bf_series_b, on="rowindex_2", how=how)
+    else:
+        bf_result = bf_df_a.join(bf_series_b, on="rowindex_2", how=how).to_pandas()
+
+        pd_df_a = pd_df[["string_col", "int64_col", "rowindex_2"]]
+        pd_df_a = pd_df_a.assign(rowindex_2=pd_df_a["rowindex_2"] + 2)
+        pd_series_b = pd_df["float64_col"]
+        pd_result = pd_df_a.join(pd_series_b, on="rowindex_2", how=how)
+        assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
+
+
+@pytest.mark.parametrize(
+    ("by", "ascending", "na_position"),
+    [
+        ("int64_col", True, "first"),
+        (["bool_col", "int64_col"], True, "last"),
+        ("int64_col", False, "first"),
+        (["bool_col", "int64_col"], [False, True], "last"),
+        (["bool_col", "int64_col"], [True, False], "first"),
+    ],
+)
+def test_dataframe_sort_values(
+    scalars_df_index, scalars_pandas_df_index, by, ascending, na_position
+):
+    # Test needs values to be unique
+    bf_result = scalars_df_index.sort_values(
+        by, ascending=ascending, na_position=na_position
+    ).to_pandas()
+    pd_result = scalars_pandas_df_index.sort_values(
+        by, ascending=ascending, na_position=na_position
+    )
+
+    pandas.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+@pytest.mark.parametrize(
+    ("by", "ascending", "na_position"),
+    [
+        ("int64_col", True, "first"),
+        (["bool_col", "int64_col"], True, "last"),
+    ],
+)
+def test_dataframe_sort_values_inplace(
+    scalars_df_index, scalars_pandas_df_index, by, ascending, na_position
+):
+    # Test needs values to be unique
+    bf_sorted = scalars_df_index.copy()
+    bf_sorted.sort_values(
+        by, ascending=ascending, na_position=na_position, inplace=True
+    )
+    bf_result = bf_sorted.to_pandas()
+    pd_result = scalars_pandas_df_index.sort_values(
+        by, ascending=ascending, na_position=na_position
+    )
+
+    pandas.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_dataframe_sort_values_invalid_input(scalars_df_index):
+    with pytest.raises(KeyError):
+        scalars_df_index.sort_values(by=scalars_df_index["int64_col"])
+
+
+def test_dataframe_sort_values_stable(scalars_df_index, scalars_pandas_df_index):
+    bf_result = (
+        scalars_df_index.sort_values("int64_col", kind="stable")
+        .sort_values("bool_col", kind="stable")
+        .to_pandas()
+    )
+    pd_result = scalars_pandas_df_index.sort_values(
+        "int64_col", kind="stable"
+    ).sort_values("bool_col", kind="stable")
+
+    pandas.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+@pytest.mark.parametrize(
+    ("operator", "columns"),
+    [
+        pytest.param(lambda x: x.cumsum(), ["float64_col", "int64_too"]),
+        pytest.param(lambda x: x.cumprod(), ["float64_col", "int64_too"]),
+        pytest.param(
+            lambda x: x.cumprod(),
+            ["string_col"],
+            marks=pytest.mark.xfail(
+                raises=ValueError,
+            ),
+        ),
+    ],
+    ids=[
+        "cumsum",
+        "cumprod",
+        "non-numeric",
+    ],
+)
+def test_dataframe_numeric_analytic_op(
+    scalars_df_index, scalars_pandas_df_index, operator, columns
+):
+    # TODO: Add nullable ints (pandas 1.x has poor behavior on these)
+    bf_series = operator(scalars_df_index[columns])
+    pd_series = operator(scalars_pandas_df_index[columns])
+    bf_result = bf_series.to_pandas()
+    pd.testing.assert_frame_equal(pd_series, bf_result, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    ("operator"),
+    [
+        (lambda x: x.cummin()),
+        (lambda x: x.cummax()),
+        (lambda x: x.shift(2)),
+        (lambda x: x.shift(-2)),
+    ],
+    ids=[
+        "cummin",
+        "cummax",
+        "shiftpostive",
+        "shiftnegative",
+    ],
+)
+def test_dataframe_general_analytic_op(
+    scalars_df_index, scalars_pandas_df_index, operator
+):
+    col_names = ["int64_too", "float64_col", "int64_col", "bool_col"]
+    bf_series = operator(scalars_df_index[col_names])
+    pd_series = operator(scalars_pandas_df_index[col_names])
+    bf_result = bf_series.to_pandas()
+    pd.testing.assert_frame_equal(
+        pd_series,
+        bf_result,
+    )
+
+
+@pytest.mark.parametrize(
+    ("periods",),
+    [
+        (1,),
+        (2,),
+        (-1,),
+    ],
+)
+def test_dataframe_diff(scalars_df_index, scalars_pandas_df_index, periods):
+    col_names = ["int64_too", "float64_col", "int64_col"]
+    bf_result = scalars_df_index[col_names].diff(periods=periods).to_pandas()
+    pd_result = scalars_pandas_df_index[col_names].diff(periods=periods)
+    pd.testing.assert_frame_equal(
+        pd_result,
+        bf_result,
+    )
+
+
+@pytest.mark.parametrize(
+    ("periods",),
+    [
+        (1,),
+        (2,),
+        (-1,),
+    ],
+)
+def test_dataframe_pct_change(scalars_df_index, scalars_pandas_df_index, periods):
+    col_names = ["int64_too", "float64_col", "int64_col"]
+    bf_result = scalars_df_index[col_names].pct_change(periods=periods).to_pandas()
+    pd_result = scalars_pandas_df_index[col_names].pct_change(periods=periods)
+    pd.testing.assert_frame_equal(
+        pd_result,
+        bf_result,
+    )
+
+
+def test_dataframe_agg_single_string(scalars_dfs):
+    numeric_cols = ["int64_col", "int64_too", "float64_col"]
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_result = scalars_df[numeric_cols].agg("sum").to_pandas()
+    pd_result = scalars_pandas_df[numeric_cols].agg("sum")
+
+    assert bf_result.dtype == "Float64"
+    pd.testing.assert_series_equal(
+        pd_result, bf_result, check_dtype=False, check_index_type=False
+    )
+
+
+@pytest.mark.parametrize(
+    ("agg",),
+    (
+        ("sum",),
+        ("size",),
+    ),
+)
+def test_dataframe_agg_int_single_string(scalars_dfs, agg):
+    numeric_cols = ["int64_col", "int64_too", "bool_col"]
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_result = scalars_df[numeric_cols].agg(agg).to_pandas()
+    pd_result = scalars_pandas_df[numeric_cols].agg(agg)
+
+    assert bf_result.dtype == "Int64"
+    pd.testing.assert_series_equal(
+        pd_result, bf_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_dataframe_agg_multi_string(scalars_dfs_maybe_ordered):
+    numeric_cols = ["int64_col", "int64_too", "float64_col"]
+    aggregations = [
+        "sum",
+        "mean",
+        "median",
+        "std",
+        "var",
+        "min",
+        "max",
+        "nunique",
+        "count",
+    ]
+    scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
+    bf_result = scalars_df[numeric_cols].agg(aggregations)
+    pd_result = scalars_pandas_df[numeric_cols].agg(aggregations)
+
+    # Pandas may produce narrower numeric types, but bigframes always produces Float64
+    pd_result = pd_result.astype("Float64")
+
+    # Drop median, as it's an approximation.
+    bf_median = bf_result.loc["median", :]
+    bf_result = bf_result.drop(labels=["median"])
+    pd_result = pd_result.drop(labels=["median"])
+
+    assert_dfs_equivalent(pd_result, bf_result, check_index_type=False)
+
+    # Double-check that median is at least plausible.
+    assert (
+        (bf_result.loc["min", :] <= bf_median) & (bf_median <= bf_result.loc["max", :])
+    ).all()
+
+
+def test_dataframe_agg_int_multi_string(scalars_dfs):
+    numeric_cols = ["int64_col", "int64_too", "bool_col"]
+    aggregations = [
+        "sum",
+        "nunique",
+        "count",
+        "size",
+    ]
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df[numeric_cols].agg(aggregations).to_pandas()
+    pd_result = scalars_pandas_df[numeric_cols].agg(aggregations)
+
+    for dtype in bf_result.dtypes:
+        assert dtype == "Int64"
+
+    # Pandas may produce narrower numeric types
+    # Pandas has object index type
+    pd.testing.assert_frame_equal(
+        pd_result, bf_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_df_transpose():
+    # Include some floats to ensure type coercion
+    values = [[0, 3.5, True], [1, 4.5, False], [2, 6.5, None]]
+    # Test complex case of both axes being multi-indices with non-unique elements
+
+    columns: pandas.Index = pd.Index(
+        ["A", "B", "A"], dtype=pd.StringDtype(storage="pyarrow")
+    )
+    columns_multi = pd.MultiIndex.from_arrays([columns, columns], names=["c1", "c2"])
+
+    index: pandas.Index = pd.Index(
+        ["b", "a", "a"], dtype=pd.StringDtype(storage="pyarrow")
+    )
+    rows_multi = pd.MultiIndex.from_arrays([index, index], names=["r1", "r2"])
+
+    pd_df = pandas.DataFrame(values, index=rows_multi, columns=columns_multi)
+    bf_df = dataframe.DataFrame(values, index=rows_multi, columns=columns_multi)
+
+    pd_result = pd_df.T
+    bf_result = bf_df.T.to_pandas()
+
+    pd.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False)
+
+
+def test_df_transpose_error():
+    with pytest.raises(TypeError, match="Cannot coerce.*to a common type."):
+        dataframe.DataFrame([[1, "hello"], [2, "world"]]).transpose()
+
+
+def test_df_transpose_repeated_uses_cache():
+    bf_df = dataframe.DataFrame([[1, 2.5], [2, 3.5]])
+    pd_df = pandas.DataFrame([[1, 2.5], [2, 3.5]])
+    # Transposing many times so that operation will fail from complexity if not using cache
+    for i in range(10):
+        # Cache still works even with simple scalar binop
+        bf_df = bf_df.transpose() + i
+        pd_df = pd_df.transpose() + i
+
+    pd.testing.assert_frame_equal(
+        pd_df, bf_df.to_pandas(), check_dtype=False, check_index_type=False
+    )
+
+
+@pytest.mark.parametrize(
+    ("ordered"),
+    [
+        (True),
+        (False),
+    ],
+)
+def test_df_stack(scalars_dfs, ordered):
+    if pandas.__version__.startswith("1.") or pandas.__version__.startswith("2.0"):
+        pytest.skip("pandas <2.1 uses different stack implementation")
+    scalars_df, scalars_pandas_df = scalars_dfs
+    # To match bigquery dataframes
+    scalars_pandas_df = scalars_pandas_df.copy()
+    scalars_pandas_df.columns = scalars_pandas_df.columns.astype("string[pyarrow]")
+    # Can only stack identically-typed columns
+    columns = ["int64_col", "int64_too", "rowindex_2"]
+
+    bf_result = scalars_df[columns].stack().to_pandas(ordered=ordered)
+    pd_result = scalars_pandas_df[columns].stack(future_stack=True)
+
+    # Pandas produces NaN, where bq dataframes produces pd.NA
+    assert_series_equal(
+        bf_result, pd_result, check_dtype=False, ignore_order=not ordered
+    )
+
+
+def test_df_melt_default(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    # To match bigquery dataframes
+    scalars_pandas_df = scalars_pandas_df.copy()
+    scalars_pandas_df.columns = scalars_pandas_df.columns.astype("string[pyarrow]")
+    # Can only stack identically-typed columns
+    columns = ["int64_col", "int64_too", "rowindex_2"]
+
+    bf_result = scalars_df[columns].melt().to_pandas()
+    pd_result = scalars_pandas_df[columns].melt()
+
+    # Pandas produces int64 index, Bigframes produces Int64 (nullable)
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+        check_index_type=False,
+        check_dtype=False,
+    )
+
+
+def test_df_melt_parameterized(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    # To match bigquery dataframes
+    scalars_pandas_df = scalars_pandas_df.copy()
+    scalars_pandas_df.columns = scalars_pandas_df.columns.astype("string[pyarrow]")
+    # Can only stack identically-typed columns
+
+    bf_result = scalars_df.melt(
+        var_name="alice",
+        value_name="bob",
+        id_vars=["string_col"],
+        value_vars=["int64_col", "int64_too"],
+    ).to_pandas()
+    pd_result = scalars_pandas_df.melt(
+        var_name="alice",
+        value_name="bob",
+        id_vars=["string_col"],
+        value_vars=["int64_col", "int64_too"],
+    )
+
+    # Pandas produces int64 index, Bigframes produces Int64 (nullable)
+    pd.testing.assert_frame_equal(
+        bf_result, pd_result, check_index_type=False, check_dtype=False
+    )
+
+
+@pytest.mark.parametrize(
+    ("ordered"),
+    [
+        (True),
+        (False),
+    ],
+)
+def test_df_unstack(scalars_dfs, ordered):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    # To match bigquery dataframes
+    scalars_pandas_df = scalars_pandas_df.copy()
+    scalars_pandas_df.columns = scalars_pandas_df.columns.astype("string[pyarrow]")
+    # Can only stack identically-typed columns
+    columns = [
+        "rowindex_2",
+        "int64_col",
+        "int64_too",
+    ]
+
+    # unstack on mono-index produces series
+    bf_result = scalars_df[columns].unstack().to_pandas(ordered=ordered)
+    pd_result = scalars_pandas_df[columns].unstack()
+
+    # Pandas produces NaN, where bq dataframes produces pd.NA
+    assert_series_equal(
+        bf_result, pd_result, check_dtype=False, ignore_order=not ordered
+    )
+
+
+@pytest.mark.parametrize(
+    ("values", "index", "columns"),
+    [
+        ("int64_col", "int64_too", ["string_col"]),
+        (["int64_col"], "int64_too", ["string_col"]),
+        (["int64_col", "float64_col"], "int64_too", ["string_col"]),
+    ],
+)
+def test_df_pivot(scalars_dfs, values, index, columns):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_result = scalars_df.pivot(
+        values=values, index=index, columns=columns
+    ).to_pandas()
+    pd_result = scalars_pandas_df.pivot(values=values, index=index, columns=columns)
+
+    # Pandas produces NaN, where bq dataframes produces pd.NA
+    bf_result = bf_result.fillna(float("nan"))
+    pd_result = pd_result.fillna(float("nan"))
+    pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    ("values", "index", "columns"),
+    [
+        (["goals", "assists"], ["team_name", "season"], ["position"]),
+        (["goals", "assists"], ["season"], ["team_name", "position"]),
+    ],
+)
+def test_df_pivot_hockey(hockey_df, hockey_pandas_df, values, index, columns):
+    bf_result = (
+        hockey_df.reset_index()
+        .pivot(values=values, index=index, columns=columns)
+        .to_pandas()
+    )
+    pd_result = hockey_pandas_df.reset_index().pivot(
+        values=values, index=index, columns=columns
+    )
+
+    # Pandas produces NaN, where bq dataframes produces pd.NA
+    pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    ("values", "index", "columns", "aggfunc"),
+    [
+        (("culmen_length_mm", "body_mass_g"), "species", "sex", "std"),
+        (["body_mass_g", "culmen_length_mm"], ("species", "island"), "sex", "sum"),
+        ("body_mass_g", "sex", ["island", "species"], "mean"),
+        ("culmen_depth_mm", "island", "species", "max"),
+    ],
+)
+def test_df_pivot_table(
+    penguins_df_default_index,
+    penguins_pandas_df_default_index,
+    values,
+    index,
+    columns,
+    aggfunc,
+):
+    bf_result = penguins_df_default_index.pivot_table(
+        values=values, index=index, columns=columns, aggfunc=aggfunc
+    ).to_pandas()
+    pd_result = penguins_pandas_df_default_index.pivot_table(
+        values=values, index=index, columns=columns, aggfunc=aggfunc
+    )
+    pd.testing.assert_frame_equal(
+        bf_result, pd_result, check_dtype=False, check_column_type=False
+    )
+
+
+def test_ipython_key_completions_with_drop(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_names = "string_col"
+    bf_dataframe = scalars_df.drop(columns=col_names)
+    pd_dataframe = scalars_pandas_df.drop(columns=col_names)
+    expected = pd_dataframe.columns.tolist()
+
+    results = bf_dataframe._ipython_key_completions_()
+
+    assert col_names not in results
+    assert results == expected
+    # _ipython_key_completions_ is called with square brackets
+    # so only column names are relevant with tab completion
+    assert "to_gbq" not in results
+    assert "merge" not in results
+    assert "drop" not in results
+
+
+def test_ipython_key_completions_with_rename(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_name_dict = {"string_col": "a_renamed_column"}
+    bf_dataframe = scalars_df.rename(columns=col_name_dict)
+    pd_dataframe = scalars_pandas_df.rename(columns=col_name_dict)
+    expected = pd_dataframe.columns.tolist()
+
+    results = bf_dataframe._ipython_key_completions_()
+
+    assert "string_col" not in results
+    assert "a_renamed_column" in results
+    assert results == expected
+    # _ipython_key_completions_ is called with square brackets
+    # so only column names are relevant with tab completion
+    assert "to_gbq" not in results
+    assert "merge" not in results
+    assert "drop" not in results
+
+
+def test__dir__with_drop(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_names = "string_col"
+    bf_dataframe = scalars_df.drop(columns=col_names)
+    pd_dataframe = scalars_pandas_df.drop(columns=col_names)
+    expected = pd_dataframe.columns.tolist()
+
+    results = dir(bf_dataframe)
+
+    assert col_names not in results
+    assert frozenset(expected) <= frozenset(results)
+    # __dir__ is called with a '.' and displays all methods, columns names, etc.
+    assert "to_gbq" in results
+    assert "merge" in results
+    assert "drop" in results
+
+
+def test__dir__with_rename(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_name_dict = {"string_col": "a_renamed_column"}
+    bf_dataframe = scalars_df.rename(columns=col_name_dict)
+    pd_dataframe = scalars_pandas_df.rename(columns=col_name_dict)
+    expected = pd_dataframe.columns.tolist()
+
+    results = dir(bf_dataframe)
+
+    assert "string_col" not in results
+    assert "a_renamed_column" in results
+    assert frozenset(expected) <= frozenset(results)
+    # __dir__ is called with a '.' and displays all methods, columns names, etc.
+    assert "to_gbq" in results
+    assert "merge" in results
+    assert "drop" in results
+
+
+def test_loc_select_columns_w_repeats(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index[["int64_col", "int64_col", "int64_too"]].to_pandas()
+    pd_result = scalars_pandas_df_index[["int64_col", "int64_col", "int64_too"]]
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+@pytest.mark.parametrize(
+    ("start", "stop", "step"),
+    [
+        (0, 0, None),
+        (None, None, None),
+        (1, None, None),
+        (None, 4, None),
+        (None, None, 2),
+        (None, 50000000000, 1),
+        (5, 4, None),
+        (3, None, 2),
+        (1, 7, 2),
+        (1, 7, 50000000000),
+    ],
+)
+def test_iloc_slice(scalars_df_index, scalars_pandas_df_index, start, stop, step):
+    bf_result = scalars_df_index.iloc[start:stop:step].to_pandas()
+    pd_result = scalars_pandas_df_index.iloc[start:stop:step]
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+@pytest.mark.parametrize(
+    ("start", "stop", "step"),
+    [
+        (0, 0, None),
+    ],
+)
+def test_iloc_slice_after_cache(
+    scalars_df_index, scalars_pandas_df_index, start, stop, step
+):
+    scalars_df_index.cache()
+    bf_result = scalars_df_index.iloc[start:stop:step].to_pandas()
+    pd_result = scalars_pandas_df_index.iloc[start:stop:step]
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_iloc_slice_zero_step(scalars_df_index):
+    with pytest.raises(ValueError):
+        scalars_df_index.iloc[0:0:0]
+
+
+@pytest.mark.parametrize(
+    ("ordered"),
+    [
+        (True),
+        (False),
+    ],
+)
+def test_iloc_slice_nested(scalars_df_index, scalars_pandas_df_index, ordered):
+    bf_result = scalars_df_index.iloc[1:].iloc[1:].to_pandas(ordered=ordered)
+    pd_result = scalars_pandas_df_index.iloc[1:].iloc[1:]
+
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=not ordered)
+
+
+@pytest.mark.parametrize(
+    "index",
+    [0, 5, -2, (2,)],
+)
+def test_iloc_single_integer(scalars_df_index, scalars_pandas_df_index, index):
+    bf_result = scalars_df_index.iloc[index]
+    pd_result = scalars_pandas_df_index.iloc[index]
+
+    pd.testing.assert_series_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+@pytest.mark.parametrize(
+    "index",
+    [(2, 5), (5, 0), (0, 0)],
+)
+def test_iloc_tuple(scalars_df_index, scalars_pandas_df_index, index):
+    bf_result = scalars_df_index.iloc[index]
+    pd_result = scalars_pandas_df_index.iloc[index]
+
+    assert bf_result == pd_result
+
+
+@pytest.mark.parametrize(
+    "index",
+    [(slice(None), [1, 2, 3]), (slice(1, 7, 2), [2, 5, 3])],
+)
+def test_iloc_tuple_multi_columns(scalars_df_index, scalars_pandas_df_index, index):
+    bf_result = scalars_df_index.iloc[index].to_pandas()
+    pd_result = scalars_pandas_df_index.iloc[index]
+
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_iloc_tuple_multi_columns_single_row(scalars_df_index, scalars_pandas_df_index):
+    index = (2, [2, 1, 3, -4])
+    bf_result = scalars_df_index.iloc[index]
+    pd_result = scalars_pandas_df_index.iloc[index]
+    pd.testing.assert_series_equal(bf_result, pd_result)
+
+
+@pytest.mark.parametrize(
+    ("index", "error"),
+    [
+        ((1, 1, 1), pd.errors.IndexingError),
+        (("asd", "asd", "asd"), pd.errors.IndexingError),
+        (("asd"), TypeError),
+    ],
+)
+def test_iloc_tuple_errors(scalars_df_index, scalars_pandas_df_index, index, error):
+    with pytest.raises(error):
+        scalars_df_index.iloc[index]
+    with pytest.raises(error):
+        scalars_pandas_df_index.iloc[index]
+
+
+@pytest.mark.parametrize(
+    "index",
+    [(2, 5), (5, 0), (0, 0)],
+)
+def test_iat(scalars_df_index, scalars_pandas_df_index, index):
+    bf_result = scalars_df_index.iat[index]
+    pd_result = scalars_pandas_df_index.iat[index]
+
+    assert bf_result == pd_result
+
+
+@pytest.mark.parametrize(
+    ("index", "error"),
+    [
+        (0, TypeError),
+        ("asd", ValueError),
+        ((1, 2, 3), TypeError),
+        (("asd", "asd"), ValueError),
+    ],
+)
+def test_iat_errors(scalars_df_index, scalars_pandas_df_index, index, error):
+    with pytest.raises(error):
+        scalars_pandas_df_index.iat[index]
+    with pytest.raises(error):
+        scalars_df_index.iat[index]
+
+
+def test_iloc_single_integer_out_of_bound_error(scalars_df_index):
+    with pytest.raises(IndexError, match="single positional indexer is out-of-bounds"):
+        scalars_df_index.iloc[99]
+
+
+def test_loc_bool_series(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.loc[scalars_df_index.bool_col].to_pandas()
+    pd_result = scalars_pandas_df_index.loc[scalars_pandas_df_index.bool_col]
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_loc_list_select_rows_and_columns(scalars_df_index, scalars_pandas_df_index):
+    idx_list = [0, 3, 5]
+    bf_result = scalars_df_index.loc[idx_list, ["bool_col", "int64_col"]].to_pandas()
+    pd_result = scalars_pandas_df_index.loc[idx_list, ["bool_col", "int64_col"]]
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_loc_select_column(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.loc[:, "int64_col"].to_pandas()
+    pd_result = scalars_pandas_df_index.loc[:, "int64_col"]
+    pd.testing.assert_series_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_loc_select_with_column_condition(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.loc[:, scalars_df_index.dtypes == "Int64"].to_pandas()
+    pd_result = scalars_pandas_df_index.loc[
+        :, scalars_pandas_df_index.dtypes == "Int64"
+    ]
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_loc_select_with_column_condition_bf_series(
+    scalars_df_index, scalars_pandas_df_index
+):
+    # (b/347072677) GEOGRAPH type doesn't support DISTINCT op
+    columns = [
+        item for item in scalars_pandas_df_index.columns if item != "geography_col"
+    ]
+    scalars_df_index = scalars_df_index[columns]
+    scalars_pandas_df_index = scalars_pandas_df_index[columns]
+
+    size_half = len(scalars_pandas_df_index) / 2
+    bf_result = scalars_df_index.loc[
+        :, scalars_df_index.nunique() > size_half
+    ].to_pandas()
+    pd_result = scalars_pandas_df_index.loc[
+        :, scalars_pandas_df_index.nunique() > size_half
+    ]
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_loc_single_index_with_duplicate(scalars_df_index, scalars_pandas_df_index):
+    scalars_df_index = scalars_df_index.set_index("string_col", drop=False)
+    scalars_pandas_df_index = scalars_pandas_df_index.set_index(
+        "string_col", drop=False
+    )
+    index = "Hello, World!"
+    bf_result = scalars_df_index.loc[index]
+    pd_result = scalars_pandas_df_index.loc[index]
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
+def test_loc_single_index_no_duplicate(scalars_df_index, scalars_pandas_df_index):
+    scalars_df_index = scalars_df_index.set_index("int64_too", drop=False)
+    scalars_pandas_df_index = scalars_pandas_df_index.set_index("int64_too", drop=False)
+    index = -2345
+    bf_result = scalars_df_index.loc[index]
+    pd_result = scalars_pandas_df_index.loc[index]
+    pd.testing.assert_series_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_at_with_duplicate(scalars_df_index, scalars_pandas_df_index):
+    scalars_df_index = scalars_df_index.set_index("string_col", drop=False)
+    scalars_pandas_df_index = scalars_pandas_df_index.set_index(
+        "string_col", drop=False
+    )
+    index = "Hello, World!"
+    bf_result = scalars_df_index.at[index, "int64_too"]
+    pd_result = scalars_pandas_df_index.at[index, "int64_too"]
+    pd.testing.assert_series_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
+def test_at_no_duplicate(scalars_df_index, scalars_pandas_df_index):
+    scalars_df_index = scalars_df_index.set_index("int64_too", drop=False)
+    scalars_pandas_df_index = scalars_pandas_df_index.set_index("int64_too", drop=False)
+    index = -2345
+    bf_result = scalars_df_index.at[index, "string_col"]
+    pd_result = scalars_pandas_df_index.at[index, "string_col"]
+    assert bf_result == pd_result
+
+
+def test_loc_setitem_bool_series_scalar_new_col(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+    bf_df.loc[bf_df["int64_too"] == 0, "new_col"] = 99
+    pd_df.loc[pd_df["int64_too"] == 0, "new_col"] = 99
+
+    # pandas uses float64 instead
+    pd_df["new_col"] = pd_df["new_col"].astype("Float64")
+
+    pd.testing.assert_frame_equal(
+        bf_df.to_pandas(),
+        pd_df,
+    )
+
+
+@pytest.mark.parametrize(
+    ("col", "value"),
+    [
+        ("string_col", "hello"),
+        ("int64_col", 3),
+        ("float64_col", 3.5),
+    ],
+)
+def test_loc_setitem_bool_series_scalar_existing_col(scalars_dfs, col, value):
+    if pd.__version__.startswith("1."):
+        pytest.skip("this loc overload not supported in pandas 1.x.")
+
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+    bf_df.loc[bf_df["int64_too"] == 1, col] = value
+    pd_df.loc[pd_df["int64_too"] == 1, col] = value
+
+    pd.testing.assert_frame_equal(
+        bf_df.to_pandas(),
+        pd_df,
+    )
+
+
+def test_loc_setitem_bool_series_scalar_error(scalars_dfs):
+    if pd.__version__.startswith("1."):
+        pytest.skip("this loc overload not supported in pandas 1.x.")
+
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+
+    with pytest.raises(Exception):
+        bf_df.loc[bf_df["int64_too"] == 1, "string_col"] = 99
+    with pytest.raises(Exception):
+        pd_df.loc[pd_df["int64_too"] == 1, "string_col"] = 99
+
+
+@pytest.mark.parametrize(
+    ("col", "op"),
+    [
+        # Int aggregates
+        pytest.param("int64_col", lambda x: x.sum(), id="int-sum"),
+        pytest.param("int64_col", lambda x: x.min(), id="int-min"),
+        pytest.param("int64_col", lambda x: x.max(), id="int-max"),
+        pytest.param("int64_col", lambda x: x.count(), id="int-count"),
+        pytest.param("int64_col", lambda x: x.nunique(), id="int-nunique"),
+        # Float aggregates
+        pytest.param("float64_col", lambda x: x.count(), id="float-count"),
+        pytest.param("float64_col", lambda x: x.nunique(), id="float-nunique"),
+        # Bool aggregates
+        pytest.param("bool_col", lambda x: x.sum(), id="bool-sum"),
+        pytest.param("bool_col", lambda x: x.count(), id="bool-count"),
+        pytest.param("bool_col", lambda x: x.nunique(), id="bool-nunique"),
+        # String aggregates
+        pytest.param("string_col", lambda x: x.count(), id="string-count"),
+        pytest.param("string_col", lambda x: x.nunique(), id="string-nunique"),
+    ],
+)
+def test_dataframe_aggregate_int(scalars_df_index, scalars_pandas_df_index, col, op):
+    bf_result = op(scalars_df_index[[col]]).to_pandas()
+    pd_result = op(scalars_pandas_df_index[[col]])
+
+    # Check dtype separately
+    assert bf_result.dtype == "Int64"
+    # Is otherwise "object" dtype
+    pd_result.index = pd_result.index.astype("string[pyarrow]")
+    # Pandas may produce narrower numeric types
+    assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False)
+
+
+@pytest.mark.parametrize(
+    ("col", "op"),
+    [
+        pytest.param("bool_col", lambda x: x.min(), id="bool-min"),
+        pytest.param("bool_col", lambda x: x.max(), id="bool-max"),
+    ],
+)
+def test_dataframe_aggregate_bool(scalars_df_index, scalars_pandas_df_index, col, op):
+    bf_result = op(scalars_df_index[[col]]).to_pandas()
+    pd_result = op(scalars_pandas_df_index[[col]])
+
+    # Check dtype separately
+    assert bf_result.dtype == "boolean"
+
+    # Pandas may produce narrower numeric types
+    # Pandas has object index type
+    pd_result.index = pd_result.index.astype("string[pyarrow]")
+    assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False)
+
+
+@pytest.mark.parametrize(
+    ("op", "bf_dtype"),
+    [
+        (lambda x: x.sum(numeric_only=True), "Float64"),
+        (lambda x: x.mean(numeric_only=True), "Float64"),
+        (lambda x: x.min(numeric_only=True), "Float64"),
+        (lambda x: x.max(numeric_only=True), "Float64"),
+        (lambda x: x.std(numeric_only=True), "Float64"),
+        (lambda x: x.var(numeric_only=True), "Float64"),
+        (lambda x: x.count(numeric_only=False), "Int64"),
+        (lambda x: x.nunique(), "Int64"),
+    ],
+    ids=["sum", "mean", "min", "max", "std", "var", "count", "nunique"],
+)
+def test_dataframe_aggregates(scalars_dfs_maybe_ordered, op, bf_dtype):
+    scalars_df_index, scalars_pandas_df_index = scalars_dfs_maybe_ordered
+    col_names = ["int64_too", "float64_col", "string_col", "int64_col", "bool_col"]
+    bf_series = op(scalars_df_index[col_names])
+    bf_result = bf_series
+    pd_result = op(scalars_pandas_df_index[col_names])
+
+    # Check dtype separately
+    assert bf_result.dtype == bf_dtype
+
+    # Pandas may produce narrower numeric types, but bigframes always produces Float64
+    # Pandas has object index type
+    pd_result.index = pd_result.index.astype("string[pyarrow]")
+    assert_series_equivalent(
+        pd_result,
+        bf_result,
+        check_dtype=False,
+        check_index_type=False,
+    )
+
+
+@pytest.mark.parametrize(
+    ("op"),
+    [
+        (lambda x: x.sum(axis=1, numeric_only=True)),
+        (lambda x: x.mean(axis=1, numeric_only=True)),
+        (lambda x: x.min(axis=1, numeric_only=True)),
+        (lambda x: x.max(axis=1, numeric_only=True)),
+        (lambda x: x.std(axis=1, numeric_only=True)),
+        (lambda x: x.var(axis=1, numeric_only=True)),
+    ],
+    ids=["sum", "mean", "min", "max", "std", "var"],
+)
+def test_dataframe_aggregates_axis_1(scalars_df_index, scalars_pandas_df_index, op):
+    col_names = ["int64_too", "int64_col", "float64_col", "bool_col", "string_col"]
+    bf_result = op(scalars_df_index[col_names]).to_pandas()
+    pd_result = op(scalars_pandas_df_index[col_names])
+
+    # Pandas may produce narrower numeric types, but bigframes always produces Float64
+    pd_result = pd_result.astype("Float64")
+    # Pandas has object index type
+    pd.testing.assert_series_equal(pd_result, bf_result, check_index_type=False)
+
+
+def test_dataframe_aggregates_median(scalars_df_index, scalars_pandas_df_index):
+    col_names = ["int64_too", "float64_col", "int64_col", "bool_col"]
+    bf_result = scalars_df_index[col_names].median(numeric_only=True).to_pandas()
+    pd_result = scalars_pandas_df_index[col_names].agg(["min", "max"])
+
+    # Pandas may produce narrower numeric types, but bigframes always produces Float64
+    pd_result = pd_result.astype("Float64")
+
+    # Median is an approximation, but double-check that median is plausible.
+    for col in col_names:
+        assert (pd_result.loc["min", col] <= bf_result[col]) and (
+            bf_result[col] <= pd_result.loc["max", col]
+        )
+
+
+def test_dataframe_aggregates_quantile_mono(scalars_df_index, scalars_pandas_df_index):
+    q = 0.45
+    col_names = ["int64_too", "int64_col", "float64_col"]
+    bf_result = scalars_df_index[col_names].quantile(q=q).to_pandas()
+    pd_result = scalars_pandas_df_index[col_names].quantile(q=q)
+
+    # Pandas may produce narrower numeric types, but bigframes always produces Float64
+    pd_result = pd_result.astype("Float64")
+
+    pd.testing.assert_series_equal(bf_result, pd_result, check_index_type=False)
+
+
+def test_dataframe_aggregates_quantile_multi(scalars_df_index, scalars_pandas_df_index):
+    q = [0, 0.33, 0.67, 1.0]
+    col_names = ["int64_too", "int64_col", "float64_col"]
+    bf_result = scalars_df_index[col_names].quantile(q=q).to_pandas()
+    pd_result = scalars_pandas_df_index[col_names].quantile(q=q)
+
+    # Pandas may produce narrower numeric types, but bigframes always produces Float64
+    pd_result = pd_result.astype("Float64")
+    pd_result.index = pd_result.index.astype("Float64")
+
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
+@pytest.mark.parametrize(
+    ("op"),
+    [
+        (lambda x: x.all(bool_only=True)),
+        (lambda x: x.any(bool_only=True)),
+        (lambda x: x.all(axis=1, bool_only=True)),
+        (lambda x: x.any(axis=1, bool_only=True)),
+    ],
+    ids=["all_axis0", "any_axis0", "all_axis1", "any_axis1"],
+)
+def test_dataframe_bool_aggregates(scalars_df_index, scalars_pandas_df_index, op):
+    # Pandas will drop nullable 'boolean' dtype so we convert first to bool, then cast back later
+    scalars_df_index = scalars_df_index.assign(
+        bool_col=scalars_df_index.bool_col.fillna(False)
+    )
+    scalars_pandas_df_index = scalars_pandas_df_index.assign(
+        bool_col=scalars_pandas_df_index.bool_col.fillna(False).astype("bool")
+    )
+    bf_series = op(scalars_df_index)
+    pd_series = op(scalars_pandas_df_index).astype("boolean")
+    bf_result = bf_series.to_pandas()
+
+    pd_series.index = pd_series.index.astype(bf_result.index.dtype)
+    pd.testing.assert_series_equal(pd_series, bf_result, check_index_type=False)
+
+
+def test_dataframe_prod(scalars_df_index, scalars_pandas_df_index):
+    col_names = ["int64_too", "float64_col"]
+    bf_series = scalars_df_index[col_names].prod()
+    pd_series = scalars_pandas_df_index[col_names].prod()
+    bf_result = bf_series.to_pandas()
+
+    # Pandas may produce narrower numeric types, but bigframes always produces Float64
+    pd_series = pd_series.astype("Float64")
+    # Pandas has object index type
+    pd.testing.assert_series_equal(pd_series, bf_result, check_index_type=False)
+
+
+def test_df_skew_too_few_values(scalars_dfs):
+    columns = ["float64_col", "int64_col"]
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df[columns].head(2).skew().to_pandas()
+    pd_result = scalars_pandas_df[columns].head(2).skew()
+
+    # Pandas may produce narrower numeric types, but bigframes always produces Float64
+    pd_result = pd_result.astype("Float64")
+
+    pd.testing.assert_series_equal(pd_result, bf_result, check_index_type=False)
+
+
+@pytest.mark.parametrize(
+    ("ordered"),
+    [
+        (True),
+        (False),
+    ],
+)
+def test_df_skew(scalars_dfs, ordered):
+    columns = ["float64_col", "int64_col"]
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df[columns].skew().to_pandas(ordered=ordered)
+    pd_result = scalars_pandas_df[columns].skew()
+
+    # Pandas may produce narrower numeric types, but bigframes always produces Float64
+    pd_result = pd_result.astype("Float64")
+
+    assert_series_equal(
+        pd_result, bf_result, check_index_type=False, ignore_order=not ordered
+    )
+
+
+def test_df_kurt_too_few_values(scalars_dfs):
+    columns = ["float64_col", "int64_col"]
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df[columns].head(2).kurt().to_pandas()
+    pd_result = scalars_pandas_df[columns].head(2).kurt()
+
+    # Pandas may produce narrower numeric types, but bigframes always produces Float64
+    pd_result = pd_result.astype("Float64")
+
+    pd.testing.assert_series_equal(pd_result, bf_result, check_index_type=False)
+
+
+def test_df_kurt(scalars_dfs):
+    columns = ["float64_col", "int64_col"]
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df[columns].kurt().to_pandas()
+    pd_result = scalars_pandas_df[columns].kurt()
+
+    # Pandas may produce narrower numeric types, but bigframes always produces Float64
+    pd_result = pd_result.astype("Float64")
+
+    pd.testing.assert_series_equal(pd_result, bf_result, check_index_type=False)
+
+
+@pytest.mark.parametrize(
+    ("frac", "n", "random_state"),
+    [
+        (None, 4, None),
+        (0.5, None, None),
+        (None, 4, 10),
+        (0.5, None, 10),
+        (None, None, None),
+    ],
+    ids=[
+        "n_wo_random_state",
+        "frac_wo_random_state",
+        "n_w_random_state",
+        "frac_w_random_state",
+        "n_default",
+    ],
+)
+def test_sample(scalars_dfs, frac, n, random_state):
+    scalars_df, _ = scalars_dfs
+    df = scalars_df.sample(frac=frac, n=n, random_state=random_state)
+    bf_result = df.to_pandas()
+
+    n = 1 if n is None else n
+    expected_sample_size = round(frac * scalars_df.shape[0]) if frac is not None else n
+    assert bf_result.shape[0] == expected_sample_size
+    assert bf_result.shape[1] == scalars_df.shape[1]
+
+
+def test_sample_determinism(penguins_df_default_index):
+    df = penguins_df_default_index.sample(n=100, random_state=12345).head(15)
+    bf_result = df.to_pandas()
+    bf_result2 = df.to_pandas()
+
+    pandas.testing.assert_frame_equal(bf_result, bf_result2)
+
+
+def test_sample_raises_value_error(scalars_dfs):
+    scalars_df, _ = scalars_dfs
+    with pytest.raises(
+        ValueError, match="Only one of 'n' or 'frac' parameter can be specified."
+    ):
+        scalars_df.sample(frac=0.5, n=4)
+
+
+def test_sample_args_sort(scalars_dfs):
+    scalars_df, _ = scalars_dfs
+    index = [4, 3, 2, 5, 1, 0]
+    scalars_df = scalars_df.iloc[index]
+
+    kwargs = {"frac": 1.0, "random_state": 333}
+
+    df = scalars_df.sample(**kwargs).to_pandas()
+    assert df.index.values != index
+    assert df.index.values != sorted(index)
+
+    df = scalars_df.sample(sort="random", **kwargs).to_pandas()
+    assert df.index.values != index
+    assert df.index.values != sorted(index)
+
+    df = scalars_df.sample(sort=True, **kwargs).to_pandas()
+    assert df.index.values == sorted(index)
+
+    df = scalars_df.sample(sort=False, **kwargs).to_pandas()
+    assert df.index.values == index
+
+
+@pytest.mark.parametrize(
+    ("axis",),
+    [
+        (None,),
+        (0,),
+        (1,),
+    ],
+)
+def test_df_add_prefix(scalars_df_index, scalars_pandas_df_index, axis):
+    if pd.__version__.startswith("1."):
+        pytest.skip("add_prefix axis parameter not supported in pandas 1.x.")
+    bf_result = scalars_df_index.add_prefix("prefix_", axis).to_pandas()
+
+    pd_result = scalars_pandas_df_index.add_prefix("prefix_", axis)
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+        check_index_type=False,
+    )
+
+
+@pytest.mark.parametrize(
+    ("axis",),
+    [
+        (0,),
+        (1,),
+    ],
+)
+def test_df_add_suffix(scalars_df_index, scalars_pandas_df_index, axis):
+    if pd.__version__.startswith("1."):
+        pytest.skip("add_prefix axis parameter not supported in pandas 1.x.")
+    bf_result = scalars_df_index.add_suffix("_suffix", axis).to_pandas()
+
+    pd_result = scalars_pandas_df_index.add_suffix("_suffix", axis)
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+        check_index_type=False,
+    )
+
+
+def test_df_astype_error_error(session):
+    input = pd.DataFrame(["hello", "world", "3.11", "4000"])
+    with pytest.raises(ValueError):
+        session.read_pandas(input).astype("Float64", errors="bad_value")
+
+
+def test_df_columns_filter_items(scalars_df_index, scalars_pandas_df_index):
+    if pd.__version__.startswith("2.0") or pd.__version__.startswith("1."):
+        pytest.skip("pandas filter items behavior different pre-2.1")
+    bf_result = scalars_df_index.filter(items=["string_col", "int64_col"]).to_pandas()
+
+    pd_result = scalars_pandas_df_index.filter(items=["string_col", "int64_col"])
+    # Ignore column ordering as pandas order differently depending on version
+    pd.testing.assert_frame_equal(
+        bf_result.sort_index(axis=1),
+        pd_result.sort_index(axis=1),
+    )
+
+
+def test_df_columns_filter_like(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.filter(like="64_col").to_pandas()
+
+    pd_result = scalars_pandas_df_index.filter(like="64_col")
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_columns_filter_regex(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.filter(regex="^[^_]+$").to_pandas()
+
+    pd_result = scalars_pandas_df_index.filter(regex="^[^_]+$")
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_rows_filter_items(scalars_df_index, scalars_pandas_df_index):
+    if pd.__version__.startswith("2.0") or pd.__version__.startswith("1."):
+        pytest.skip("pandas filter items behavior different pre-2.1")
+    bf_result = scalars_df_index.filter(items=[5, 1, 3], axis=0).to_pandas()
+
+    pd_result = scalars_pandas_df_index.filter(items=[5, 1, 3], axis=0)
+
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+    # Ignore ordering as pandas order differently depending on version
+    assert_pandas_df_equal(
+        bf_result,
+        pd_result,
+        ignore_order=True,
+        check_names=False,
+    )
+
+
+def test_df_rows_filter_like(scalars_df_index, scalars_pandas_df_index):
+    scalars_df_index = scalars_df_index.copy().set_index("string_col")
+    scalars_pandas_df_index = scalars_pandas_df_index.copy().set_index("string_col")
+
+    bf_result = scalars_df_index.filter(like="ello", axis=0).to_pandas()
+
+    pd_result = scalars_pandas_df_index.filter(like="ello", axis=0)
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_rows_filter_regex(scalars_df_index, scalars_pandas_df_index):
+    scalars_df_index = scalars_df_index.copy().set_index("string_col")
+    scalars_pandas_df_index = scalars_pandas_df_index.copy().set_index("string_col")
+
+    bf_result = scalars_df_index.filter(regex="^[GH].*", axis=0).to_pandas()
+
+    pd_result = scalars_pandas_df_index.filter(regex="^[GH].*", axis=0)
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_reindex_rows_list(scalars_dfs_maybe_ordered):
+    scalars_df_index, scalars_pandas_df_index = scalars_dfs_maybe_ordered
+    bf_result = scalars_df_index.reindex(index=[5, 1, 3, 99, 1])
+
+    pd_result = scalars_pandas_df_index.reindex(index=[5, 1, 3, 99, 1])
+
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+    assert_dfs_equivalent(
+        pd_result,
+        bf_result,
+    )
+
+
+def test_df_reindex_rows_index(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.reindex(
+        index=pd.Index([5, 1, 3, 99, 1], name="newname")
+    ).to_pandas()
+
+    pd_result = scalars_pandas_df_index.reindex(
+        index=pd.Index([5, 1, 3, 99, 1], name="newname")
+    )
+
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_reindex_nonunique(scalars_df_index):
+    with pytest.raises(ValueError):
+        # int64_too is non-unique
+        scalars_df_index.set_index("int64_too").reindex(
+            index=[5, 1, 3, 99, 1], validate=True
+        )
+
+
+def test_df_reindex_columns(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.reindex(
+        columns=["not_a_col", "int64_col", "int64_too"]
+    ).to_pandas()
+
+    pd_result = scalars_pandas_df_index.reindex(
+        columns=["not_a_col", "int64_col", "int64_too"]
+    )
+
+    # Pandas uses float64 as default for newly created empty column, bf uses Float64
+    pd_result.not_a_col = pd_result.not_a_col.astype(pandas.Float64Dtype())
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_reindex_columns_with_same_order(scalars_df_index, scalars_pandas_df_index):
+    # First, make sure the two dataframes have the same columns in order.
+    columns = ["int64_col", "int64_too"]
+    bf = scalars_df_index[columns]
+    pd_df = scalars_pandas_df_index[columns]
+
+    bf_result = bf.reindex(columns=columns).to_pandas()
+    pd_result = pd_df.reindex(columns=columns)
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_equals_identical(scalars_df_index, scalars_pandas_df_index):
+    unsupported = [
+        "geography_col",
+    ]
+    scalars_df_index = scalars_df_index.drop(columns=unsupported)
+    scalars_pandas_df_index = scalars_pandas_df_index.drop(columns=unsupported)
+
+    bf_result = scalars_df_index.equals(scalars_df_index)
+    pd_result = scalars_pandas_df_index.equals(scalars_pandas_df_index)
+
+    assert pd_result == bf_result
+
+
+def test_df_equals_series(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index[["int64_col"]].equals(scalars_df_index["int64_col"])
+    pd_result = scalars_pandas_df_index[["int64_col"]].equals(
+        scalars_pandas_df_index["int64_col"]
+    )
+
+    assert pd_result == bf_result
+
+
+def test_df_equals_different_dtype(scalars_df_index, scalars_pandas_df_index):
+    columns = ["int64_col", "int64_too"]
+    scalars_df_index = scalars_df_index[columns]
+    scalars_pandas_df_index = scalars_pandas_df_index[columns]
+
+    bf_modified = scalars_df_index.copy()
+    bf_modified = bf_modified.astype("Float64")
+
+    pd_modified = scalars_pandas_df_index.copy()
+    pd_modified = pd_modified.astype("Float64")
+
+    bf_result = scalars_df_index.equals(bf_modified)
+    pd_result = scalars_pandas_df_index.equals(pd_modified)
+
+    assert pd_result == bf_result
+
+
+def test_df_equals_different_values(scalars_df_index, scalars_pandas_df_index):
+    columns = ["int64_col", "int64_too"]
+    scalars_df_index = scalars_df_index[columns]
+    scalars_pandas_df_index = scalars_pandas_df_index[columns]
+
+    bf_modified = scalars_df_index.copy()
+    bf_modified["int64_col"] = bf_modified.int64_col + 1
+
+    pd_modified = scalars_pandas_df_index.copy()
+    pd_modified["int64_col"] = pd_modified.int64_col + 1
+
+    bf_result = scalars_df_index.equals(bf_modified)
+    pd_result = scalars_pandas_df_index.equals(pd_modified)
+
+    assert pd_result == bf_result
+
+
+def test_df_equals_extra_column(scalars_df_index, scalars_pandas_df_index):
+    columns = ["int64_col", "int64_too"]
+    more_columns = ["int64_col", "int64_too", "float64_col"]
+
+    bf_result = scalars_df_index[columns].equals(scalars_df_index[more_columns])
+    pd_result = scalars_pandas_df_index[columns].equals(
+        scalars_pandas_df_index[more_columns]
+    )
+
+    assert pd_result == bf_result
+
+
+def test_df_reindex_like(scalars_df_index, scalars_pandas_df_index):
+    reindex_target_bf = scalars_df_index.reindex(
+        columns=["not_a_col", "int64_col", "int64_too"], index=[5, 1, 3, 99, 1]
+    )
+    bf_result = scalars_df_index.reindex_like(reindex_target_bf).to_pandas()
+
+    reindex_target_pd = scalars_pandas_df_index.reindex(
+        columns=["not_a_col", "int64_col", "int64_too"], index=[5, 1, 3, 99, 1]
+    )
+    pd_result = scalars_pandas_df_index.reindex_like(reindex_target_pd)
+
+    # Pandas uses float64 as default for newly created empty column, bf uses Float64
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+    # Pandas uses float64 as default for newly created empty column, bf uses Float64
+    pd_result.not_a_col = pd_result.not_a_col.astype(pandas.Float64Dtype())
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_values(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.values
+
+    pd_result = scalars_pandas_df_index.values
+    # Numpy isn't equipped to compare non-numeric objects, so convert back to dataframe
+    pd.testing.assert_frame_equal(
+        pd.DataFrame(bf_result), pd.DataFrame(pd_result), check_dtype=False
+    )
+
+
+def test_df_to_numpy(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.to_numpy()
+
+    pd_result = scalars_pandas_df_index.to_numpy()
+    # Numpy isn't equipped to compare non-numeric objects, so convert back to dataframe
+    pd.testing.assert_frame_equal(
+        pd.DataFrame(bf_result), pd.DataFrame(pd_result), check_dtype=False
+    )
+
+
+def test_df___array__(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.__array__()
+
+    pd_result = scalars_pandas_df_index.__array__()
+    # Numpy isn't equipped to compare non-numeric objects, so convert back to dataframe
+    pd.testing.assert_frame_equal(
+        pd.DataFrame(bf_result), pd.DataFrame(pd_result), check_dtype=False
+    )
+
+
+@pytest.mark.parametrize(
+    ("key",),
+    [
+        ("hello",),
+        (2,),
+        ("int64_col",),
+        (None,),
+    ],
+)
+def test_df_contains(scalars_df_index, scalars_pandas_df_index, key):
+    bf_result = key in scalars_df_index
+    pd_result = key in scalars_pandas_df_index
+
+    assert bf_result == pd_result
+
+
+def test_df_getattr_attribute_error_when_pandas_has(scalars_df_index):
+    # swapaxes is implemented in pandas but not in bigframes
+    with pytest.raises(AttributeError):
+        scalars_df_index.swapaxes()
+
+
+def test_df_getattr_attribute_error(scalars_df_index):
+    with pytest.raises(AttributeError):
+        scalars_df_index.not_a_method()
+
+
+def test_df_getattr_axes():
+    df = dataframe.DataFrame(
+        [[1, 1, 1], [1, 1, 1]], columns=["index", "columns", "my_column"]
+    )
+    assert isinstance(df.index, bigframes.core.indexes.Index)
+    assert isinstance(df.columns, pandas.Index)
+    assert isinstance(df.my_column, series.Series)
+
+
+def test_df_setattr_index():
+    pd_df = pandas.DataFrame(
+        [[1, 1, 1], [1, 1, 1]], columns=["index", "columns", "my_column"]
+    )
+    bf_df = dataframe.DataFrame(pd_df)
+
+    pd_df.index = pandas.Index([4, 5])
+    bf_df.index = [4, 5]
+
+    assert_pandas_df_equal(
+        pd_df, bf_df.to_pandas(), check_index_type=False, check_dtype=False
+    )
+
+
+def test_df_setattr_columns():
+    pd_df = pandas.DataFrame(
+        [[1, 1, 1], [1, 1, 1]], columns=["index", "columns", "my_column"]
+    )
+    bf_df = dataframe.DataFrame(pd_df)
+
+    pd_df.columns = typing.cast(pandas.Index, pandas.Index([4, 5, 6]))
+
+    bf_df.columns = pandas.Index([4, 5, 6])
+
+    assert_pandas_df_equal(
+        pd_df, bf_df.to_pandas(), check_index_type=False, check_dtype=False
+    )
+
+
+def test_df_setattr_modify_column():
+    pd_df = pandas.DataFrame(
+        [[1, 1, 1], [1, 1, 1]], columns=["index", "columns", "my_column"]
+    )
+    bf_df = dataframe.DataFrame(pd_df)
+    pd_df.my_column = [4, 5]
+    bf_df.my_column = [4, 5]
+
+    assert_pandas_df_equal(
+        pd_df, bf_df.to_pandas(), check_index_type=False, check_dtype=False
+    )
+
+
+def test_loc_list_string_index(scalars_df_index, scalars_pandas_df_index):
+    index_list = scalars_pandas_df_index.string_col.iloc[[0, 1, 1, 5]].values
+
+    scalars_df_index = scalars_df_index.set_index("string_col")
+    scalars_pandas_df_index = scalars_pandas_df_index.set_index("string_col")
+
+    bf_result = scalars_df_index.loc[index_list].to_pandas()
+    pd_result = scalars_pandas_df_index.loc[index_list]
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_loc_list_integer_index(scalars_df_index, scalars_pandas_df_index):
+    index_list = [3, 2, 1, 3, 2, 1]
+
+    bf_result = scalars_df_index.loc[index_list]
+    pd_result = scalars_pandas_df_index.loc[index_list]
+
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
+def test_loc_list_multiindex(scalars_dfs_maybe_ordered):
+    scalars_df_index, scalars_pandas_df_index = scalars_dfs_maybe_ordered
+    scalars_df_multiindex = scalars_df_index.set_index(["string_col", "int64_col"])
+    scalars_pandas_df_multiindex = scalars_pandas_df_index.set_index(
+        ["string_col", "int64_col"]
+    )
+    index_list = [("Hello, World!", -234892), ("Hello, World!", 123456789)]
+
+    bf_result = scalars_df_multiindex.loc[index_list]
+    pd_result = scalars_pandas_df_multiindex.loc[index_list]
+
+    assert_dfs_equivalent(
+        pd_result,
+        bf_result,
+    )
+
+
+@pytest.mark.parametrize(
+    "index_list",
+    [
+        [0, 1, 2, 3, 4, 4],
+        [0, 0, 0, 5, 4, 7, -2, -5, 3],
+        [-1, -2, -3, -4, -5, -5],
+    ],
+)
+def test_iloc_list(scalars_df_index, scalars_pandas_df_index, index_list):
+    bf_result = scalars_df_index.iloc[index_list]
+    pd_result = scalars_pandas_df_index.iloc[index_list]
+
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
+@pytest.mark.parametrize(
+    "index_list",
+    [
+        [0, 1, 2, 3, 4, 4],
+        [0, 0, 0, 5, 4, 7, -2, -5, 3],
+        [-1, -2, -3, -4, -5, -5],
+    ],
+)
+def test_iloc_list_partial_ordering(
+    scalars_df_partial_ordering, scalars_pandas_df_index, index_list
+):
+    bf_result = scalars_df_partial_ordering.iloc[index_list]
+    pd_result = scalars_pandas_df_index.iloc[index_list]
+
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
+def test_iloc_list_multiindex(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    scalars_df = scalars_df.copy()
+    scalars_pandas_df = scalars_pandas_df.copy()
+    scalars_df = scalars_df.set_index(["bytes_col", "numeric_col"])
+    scalars_pandas_df = scalars_pandas_df.set_index(["bytes_col", "numeric_col"])
+
+    index_list = [0, 0, 0, 5, 4, 7]
+
+    bf_result = scalars_df.iloc[index_list]
+    pd_result = scalars_pandas_df.iloc[index_list]
+
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
+def test_iloc_empty_list(scalars_df_index, scalars_pandas_df_index):
+
+    index_list: List[int] = []
+
+    bf_result = scalars_df_index.iloc[index_list]
+    pd_result = scalars_pandas_df_index.iloc[index_list]
+
+    bf_result = bf_result.to_pandas()
+    assert bf_result.shape == pd_result.shape  # types are known to be different
+
+
+def test_rename_axis(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.rename_axis("newindexname")
+    pd_result = scalars_pandas_df_index.rename_axis("newindexname")
+
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
+def test_rename_axis_nonstring(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.rename_axis((4,))
+    pd_result = scalars_pandas_df_index.rename_axis((4,))
+
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
+def test_loc_bf_series_string_index(scalars_df_index, scalars_pandas_df_index):
+    pd_string_series = scalars_pandas_df_index.string_col.iloc[[0, 5, 1, 1, 5]]
+    bf_string_series = scalars_df_index.string_col.iloc[[0, 5, 1, 1, 5]]
+
+    scalars_df_index = scalars_df_index.set_index("string_col")
+    scalars_pandas_df_index = scalars_pandas_df_index.set_index("string_col")
+
+    bf_result = scalars_df_index.loc[bf_string_series]
+    pd_result = scalars_pandas_df_index.loc[pd_string_series]
+
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
+def test_loc_bf_series_multiindex(scalars_df_index, scalars_pandas_df_index):
+    pd_string_series = scalars_pandas_df_index.string_col.iloc[[0, 5, 1, 1, 5]]
+    bf_string_series = scalars_df_index.string_col.iloc[[0, 5, 1, 1, 5]]
+
+    scalars_df_multiindex = scalars_df_index.set_index(["string_col", "int64_col"])
+    scalars_pandas_df_multiindex = scalars_pandas_df_index.set_index(
+        ["string_col", "int64_col"]
+    )
+
+    bf_result = scalars_df_multiindex.loc[bf_string_series]
+    pd_result = scalars_pandas_df_multiindex.loc[pd_string_series]
+
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
+def test_loc_bf_index_integer_index(scalars_df_index, scalars_pandas_df_index):
+    pd_index = scalars_pandas_df_index.iloc[[0, 5, 1, 1, 5]].index
+    bf_index = scalars_df_index.iloc[[0, 5, 1, 1, 5]].index
+
+    bf_result = scalars_df_index.loc[bf_index]
+    pd_result = scalars_pandas_df_index.loc[pd_index]
+
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
+def test_loc_bf_index_integer_index_renamed_col(
+    scalars_df_index, scalars_pandas_df_index
+):
+    scalars_df_index = scalars_df_index.rename(columns={"int64_col": "rename"})
+    scalars_pandas_df_index = scalars_pandas_df_index.rename(
+        columns={"int64_col": "rename"}
+    )
+
+    pd_index = scalars_pandas_df_index.iloc[[0, 5, 1, 1, 5]].index
+    bf_index = scalars_df_index.iloc[[0, 5, 1, 1, 5]].index
+
+    bf_result = scalars_df_index.loc[bf_index]
+    pd_result = scalars_pandas_df_index.loc[pd_index]
+
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
+@pytest.mark.parametrize(
+    ("subset"),
+    [
+        None,
+        "bool_col",
+        ["bool_col", "int64_too"],
+    ],
+)
+@pytest.mark.parametrize(
+    ("keep",),
+    [
+        ("first",),
+        ("last",),
+        (False,),
+    ],
+)
+def test_df_drop_duplicates(scalars_df_index, scalars_pandas_df_index, keep, subset):
+    columns = ["bool_col", "int64_too", "int64_col"]
+    bf_df = scalars_df_index[columns].drop_duplicates(subset, keep=keep).to_pandas()
+    pd_df = scalars_pandas_df_index[columns].drop_duplicates(subset, keep=keep)
+    pd.testing.assert_frame_equal(
+        pd_df,
+        bf_df,
+    )
+
+
+@pytest.mark.parametrize(
+    ("keep",),
+    [
+        ("first",),
+        ("last",),
+        (False,),
+    ],
+)
+def test_df_drop_duplicates_w_json(json_df, keep):
+    bf_df = json_df.drop_duplicates(keep=keep).to_pandas()
+
+    # drop_duplicates relies on pa.compute.dictionary_encode, which is incompatible
+    # with Arrow string extension types. Temporary conversion to standard Pandas
+    # strings is required.
+    json_pandas_df = json_df.to_pandas()
+    json_pandas_df["json_col"] = json_pandas_df["json_col"].astype(
+        pd.StringDtype(storage="pyarrow")
+    )
+
+    pd_df = json_pandas_df.drop_duplicates(keep=keep)
+    pd_df["json_col"] = pd_df["json_col"].astype(dtypes.JSON_DTYPE)
+    pd.testing.assert_frame_equal(
+        pd_df,
+        bf_df,
+    )
+
+
+@pytest.mark.parametrize(
+    ("subset"),
+    [
+        None,
+        ["bool_col"],
+    ],
+)
+@pytest.mark.parametrize(
+    ("keep",),
+    [
+        ("first",),
+        ("last",),
+        (False,),
+    ],
+)
+def test_df_duplicated(scalars_df_index, scalars_pandas_df_index, keep, subset):
+    columns = ["bool_col", "int64_too", "int64_col"]
+    bf_series = scalars_df_index[columns].duplicated(subset, keep=keep).to_pandas()
+    pd_series = scalars_pandas_df_index[columns].duplicated(subset, keep=keep)
+    pd.testing.assert_series_equal(pd_series, bf_series, check_dtype=False)
+
+
+def test_df_from_dict_columns_orient():
+    data = {"a": [1, 2], "b": [3.3, 2.4]}
+    bf_result = dataframe.DataFrame.from_dict(data, orient="columns").to_pandas()
+    pd_result = pd.DataFrame.from_dict(data, orient="columns")
+    assert_pandas_df_equal(
+        pd_result, bf_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_df_from_dict_index_orient():
+    data = {"a": [1, 2], "b": [3.3, 2.4]}
+    bf_result = dataframe.DataFrame.from_dict(
+        data, orient="index", columns=["col1", "col2"]
+    ).to_pandas()
+    pd_result = pd.DataFrame.from_dict(data, orient="index", columns=["col1", "col2"])
+    assert_pandas_df_equal(
+        pd_result, bf_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_df_from_dict_tight_orient():
+    data = {
+        "index": [("i1", "i2"), ("i3", "i4")],
+        "columns": ["col1", "col2"],
+        "data": [[1, 2.6], [3, 4.5]],
+        "index_names": ["in1", "in2"],
+        "column_names": ["column_axis"],
+    }
+
+    bf_result = dataframe.DataFrame.from_dict(data, orient="tight").to_pandas()
+    pd_result = pd.DataFrame.from_dict(data, orient="tight")
+    assert_pandas_df_equal(
+        pd_result, bf_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_df_from_records():
+    records = ((1, "a"), (2.5, "b"), (3.3, "c"), (4.9, "d"))
+
+    bf_result = dataframe.DataFrame.from_records(
+        records, columns=["c1", "c2"]
+    ).to_pandas()
+    pd_result = pd.DataFrame.from_records(records, columns=["c1", "c2"])
+    assert_pandas_df_equal(
+        pd_result, bf_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_df_to_dict(scalars_df_index, scalars_pandas_df_index):
+    unsupported = ["numeric_col"]  # formatted differently
+    bf_result = scalars_df_index.drop(columns=unsupported).to_dict()
+    pd_result = scalars_pandas_df_index.drop(columns=unsupported).to_dict()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_excel(scalars_df_index, scalars_pandas_df_index):
+    unsupported = ["timestamp_col"]
+    with tempfile.TemporaryFile() as bf_result_file, tempfile.TemporaryFile() as pd_result_file:
+        scalars_df_index.drop(columns=unsupported).to_excel(bf_result_file)
+        scalars_pandas_df_index.drop(columns=unsupported).to_excel(pd_result_file)
+        bf_result = bf_result_file.read()
+        pd_result = bf_result_file.read()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_latex(scalars_df_index, scalars_pandas_df_index):
+    unsupported = ["numeric_col"]  # formatted differently
+    bf_result = scalars_df_index.drop(columns=unsupported).to_latex()
+    pd_result = scalars_pandas_df_index.drop(columns=unsupported).to_latex()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_json_local_str(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.to_json()
+    # default_handler for arrow types that have no default conversion
+    pd_result = scalars_pandas_df_index.to_json(default_handler=str)
+
+    assert bf_result == pd_result
+
+
+def test_df_to_json_local_file(scalars_df_index, scalars_pandas_df_index):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    # duration not fully supported at pandas level
+    scalars_df_index = scalars_df_index.drop(columns="duration_col")
+    scalars_pandas_df_index = scalars_pandas_df_index.drop(columns="duration_col")
+    with tempfile.TemporaryFile() as bf_result_file, tempfile.TemporaryFile() as pd_result_file:
+        scalars_df_index.to_json(bf_result_file, orient="table")
+        # default_handler for arrow types that have no default conversion
+        scalars_pandas_df_index.to_json(
+            pd_result_file, orient="table", default_handler=str
+        )
+
+        bf_result = bf_result_file.read()
+        pd_result = pd_result_file.read()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_csv_local_str(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.to_csv()
+    # default_handler for arrow types that have no default conversion
+    pd_result = scalars_pandas_df_index.to_csv()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_csv_local_file(scalars_df_index, scalars_pandas_df_index):
+    with tempfile.TemporaryFile() as bf_result_file, tempfile.TemporaryFile() as pd_result_file:
+        scalars_df_index.to_csv(bf_result_file)
+        scalars_pandas_df_index.to_csv(pd_result_file)
+
+        bf_result = bf_result_file.read()
+        pd_result = pd_result_file.read()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_parquet_local_bytes(scalars_df_index, scalars_pandas_df_index):
+    # GEOGRAPHY not supported in parquet export.
+    unsupported = ["geography_col"]
+
+    bf_result = scalars_df_index.drop(columns=unsupported).to_parquet()
+    # default_handler for arrow types that have no default conversion
+    pd_result = scalars_pandas_df_index.drop(columns=unsupported).to_parquet()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_parquet_local_file(scalars_df_index, scalars_pandas_df_index):
+    # GEOGRAPHY not supported in parquet export.
+    unsupported = ["geography_col"]
+    with tempfile.TemporaryFile() as bf_result_file, tempfile.TemporaryFile() as pd_result_file:
+        scalars_df_index.drop(columns=unsupported).to_parquet(bf_result_file)
+        scalars_pandas_df_index.drop(columns=unsupported).to_parquet(pd_result_file)
+
+        bf_result = bf_result_file.read()
+        pd_result = pd_result_file.read()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_records(scalars_df_index, scalars_pandas_df_index):
+    unsupported = ["numeric_col"]
+    bf_result = scalars_df_index.drop(columns=unsupported).to_records()
+    pd_result = scalars_pandas_df_index.drop(columns=unsupported).to_records()
+
+    for bfi, pdi in zip(bf_result, pd_result):
+        for bfj, pdj in zip(bfi, pdi):
+            assert pd.isna(bfj) and pd.isna(pdj) or bfj == pdj
+
+
+def test_df_to_string(scalars_df_index, scalars_pandas_df_index):
+    unsupported = ["numeric_col"]  # formatted differently
+
+    bf_result = scalars_df_index.drop(columns=unsupported).to_string()
+    pd_result = scalars_pandas_df_index.drop(columns=unsupported).to_string()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_html(scalars_df_index, scalars_pandas_df_index):
+    unsupported = ["numeric_col"]  # formatted differently
+
+    bf_result = scalars_df_index.drop(columns=unsupported).to_html()
+    pd_result = scalars_pandas_df_index.drop(columns=unsupported).to_html()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_markdown(scalars_df_index, scalars_pandas_df_index):
+    # Nulls have bug from tabulate https://github.com/astanin/python-tabulate/issues/231
+    bf_result = scalars_df_index.dropna().to_markdown()
+    pd_result = scalars_pandas_df_index.dropna().to_markdown()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_pickle(scalars_df_index, scalars_pandas_df_index):
+    with tempfile.TemporaryFile() as bf_result_file, tempfile.TemporaryFile() as pd_result_file:
+        scalars_df_index.to_pickle(bf_result_file)
+        scalars_pandas_df_index.to_pickle(pd_result_file)
+        bf_result = bf_result_file.read()
+        pd_result = pd_result_file.read()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_orc(scalars_df_index, scalars_pandas_df_index):
+    unsupported = [
+        "numeric_col",
+        "bytes_col",
+        "date_col",
+        "datetime_col",
+        "time_col",
+        "timestamp_col",
+        "geography_col",
+        "duration_col",
+    ]
+
+    bf_result_file = tempfile.TemporaryFile()
+    pd_result_file = tempfile.TemporaryFile()
+    scalars_df_index.drop(columns=unsupported).to_orc(bf_result_file)
+    scalars_pandas_df_index.drop(columns=unsupported).reset_index().to_orc(
+        pd_result_file
+    )
+    bf_result = bf_result_file.read()
+    pd_result = bf_result_file.read()
+
+    assert bf_result == pd_result
+
+
+@pytest.mark.parametrize(
+    ("expr",),
+    [
+        ("new_col = int64_col + int64_too",),
+        ("new_col = (rowindex > 3) | bool_col",),
+        ("int64_too = bool_col\nnew_col2 = rowindex",),
+    ],
+)
+def test_df_eval(scalars_dfs, expr):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_result = scalars_df.eval(expr).to_pandas()
+    pd_result = scalars_pandas_df.eval(expr)
+
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
+@pytest.mark.parametrize(
+    ("expr",),
+    [
+        ("int64_col > int64_too",),
+        ("bool_col",),
+        ("((int64_col - int64_too) % @local_var) == 0",),
+    ],
+)
+def test_df_query(scalars_dfs, expr):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    # local_var is referenced in expressions
+    local_var = 3  # NOQA
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_result = scalars_df.query(expr).to_pandas()
+    pd_result = scalars_pandas_df.query(expr)
+
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
+@pytest.mark.parametrize(
+    ("subset", "normalize", "ascending", "dropna"),
+    [
+        (None, False, False, False),
+        (None, True, True, True),
+        ("bool_col", True, False, True),
+    ],
+)
+def test_df_value_counts(scalars_dfs, subset, normalize, ascending, dropna):
+    if pd.__version__.startswith("1."):
+        pytest.skip("pandas 1.x produces different column labels.")
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_result = (
+        scalars_df[["string_col", "bool_col"]]
+        .value_counts(subset, normalize=normalize, ascending=ascending, dropna=dropna)
+        .to_pandas()
+    )
+    pd_result = scalars_pandas_df[["string_col", "bool_col"]].value_counts(
+        subset, normalize=normalize, ascending=ascending, dropna=dropna
+    )
+
+    pd.testing.assert_series_equal(
+        bf_result, pd_result, check_dtype=False, check_index_type=False
+    )
+
+
+@pytest.mark.parametrize(
+    ("na_option", "method", "ascending", "numeric_only", "pct"),
+    [
+        ("keep", "average", True, True, True),
+        ("top", "min", False, False, False),
+        ("bottom", "max", False, False, True),
+        ("top", "first", False, False, False),
+        ("bottom", "dense", False, False, True),
+    ],
+)
+def test_df_rank_with_nulls(
+    scalars_df_index,
+    scalars_pandas_df_index,
+    na_option,
+    method,
+    ascending,
+    numeric_only,
+    pct,
+):
+    unsupported_columns = ["geography_col"]
+    bf_result = (
+        scalars_df_index.drop(columns=unsupported_columns)
+        .rank(
+            na_option=na_option,
+            method=method,
+            ascending=ascending,
+            numeric_only=numeric_only,
+            pct=pct,
+        )
+        .to_pandas()
+    )
+    pd_result = (
+        scalars_pandas_df_index.drop(columns=unsupported_columns)
+        .rank(
+            na_option=na_option,
+            method=method,
+            ascending=ascending,
+            numeric_only=numeric_only,
+            pct=pct,
+        )
+        .astype(pd.Float64Dtype())
+    )
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_bool_interpretation_error(scalars_df_index):
+    with pytest.raises(ValueError):
+        True if scalars_df_index else False
+
+
+def test_query_job_setters(scalars_df_default_index: dataframe.DataFrame):
+    # if allow_large_results=False, might not create query job
+    with bigframes.option_context("compute.allow_large_results", True):
+        job_ids = set()
+        repr(scalars_df_default_index)
+        assert scalars_df_default_index.query_job is not None
+        job_ids.add(scalars_df_default_index.query_job.job_id)
+        scalars_df_default_index.to_pandas(allow_large_results=True)
+        job_ids.add(scalars_df_default_index.query_job.job_id)
+
+        assert len(job_ids) == 2
+
+
+def test_df_cached(scalars_df_index):
+    df = scalars_df_index.set_index(["int64_too", "int64_col"]).sort_values(
+        "string_col"
+    )
+    df = df[df["rowindex_2"] % 2 == 0]
+
+    df_cached_copy = df.cache()
+    pandas.testing.assert_frame_equal(df.to_pandas(), df_cached_copy.to_pandas())
+
+
+def test_df_cached_many_index_cols(scalars_df_index):
+    index_cols = [
+        "int64_too",
+        "time_col",
+        "int64_col",
+        "bool_col",
+        "date_col",
+        "timestamp_col",
+        "string_col",
+    ]
+    df = scalars_df_index.set_index(index_cols)
+    df = df[df["rowindex_2"] % 2 == 0]
+
+    df_cached_copy = df.cache()
+    pandas.testing.assert_frame_equal(df.to_pandas(), df_cached_copy.to_pandas())
+
+
+def test_assign_after_binop_row_joins():
+    pd_df = pd.DataFrame(
+        {
+            "idx1": [1, 1, 1, 1, 2, 2, 2, 2],
+            "idx2": [10, 10, 20, 20, 10, 10, 20, 20],
+            "metric1": [10, 14, 2, 13, 6, 2, 9, 5],
+            "metric2": [25, -3, 8, 2, -1, 0, 0, -4],
+        },
+        dtype=pd.Int64Dtype(),
+    ).set_index(["idx1", "idx2"])
+    bf_df = dataframe.DataFrame(pd_df)
+
+    # Expect implicit joiner to be used, preserving input cardinality rather than getting relational join
+    bf_df["metric_diff"] = bf_df.metric1 - bf_df.metric2
+    pd_df["metric_diff"] = pd_df.metric1 - pd_df.metric2
+
+    assert_pandas_df_equal(bf_df.to_pandas(), pd_df)
+
+
+def test_df_cache_with_implicit_join(scalars_df_index):
+    """expectation is that cache will be used, but no explicit join will be performed"""
+    df = scalars_df_index[["int64_col", "int64_too"]].sort_index().reset_index() + 3
+    df.cache()
+    bf_result = df + (df * 2)
+    sql = bf_result.sql
+
+    # Very crude asserts, want sql to not use join and not use base table, only reference cached table
+    assert "JOIN" not in sql
+    assert "bigframes_testing" not in sql
+
+
+def test_df_dot_inline(session):
+    df1 = pd.DataFrame([[1, 2, 3], [2, 5, 7]])
+    df2 = pd.DataFrame([[2, 4, 8], [1, 5, 10], [3, 6, 9]])
+
+    bf1 = session.read_pandas(df1)
+    bf2 = session.read_pandas(df2)
+    bf_result = bf1.dot(bf2).to_pandas()
+    pd_result = df1.dot(df2)
+
+    # Patch pandas dtypes for testing parity
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    for name in pd_result.columns:
+        pd_result[name] = pd_result[name].astype(pd.Int64Dtype())
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_dot(
+    matrix_2by3_df, matrix_2by3_pandas_df, matrix_3by4_df, matrix_3by4_pandas_df
+):
+    bf_result = matrix_2by3_df.dot(matrix_3by4_df).to_pandas()
+    pd_result = matrix_2by3_pandas_df.dot(matrix_3by4_pandas_df)
+
+    # Patch pandas dtypes for testing parity
+    # Pandas result is object instead of Int64 (nullable) dtype.
+    for name in pd_result.columns:
+        pd_result[name] = pd_result[name].astype(pd.Int64Dtype())
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_dot_operator(
+    matrix_2by3_df, matrix_2by3_pandas_df, matrix_3by4_df, matrix_3by4_pandas_df
+):
+    bf_result = (matrix_2by3_df @ matrix_3by4_df).to_pandas()
+    pd_result = matrix_2by3_pandas_df @ matrix_3by4_pandas_df
+
+    # Patch pandas dtypes for testing parity
+    # Pandas result is object instead of Int64 (nullable) dtype.
+    for name in pd_result.columns:
+        pd_result[name] = pd_result[name].astype(pd.Int64Dtype())
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_dot_series_inline():
+    left = [[1, 2, 3], [2, 5, 7]]
+    right = [2, 1, 3]
+
+    bf1 = dataframe.DataFrame(left)
+    bf2 = series.Series(right)
+    bf_result = bf1.dot(bf2).to_pandas()
+
+    df1 = pd.DataFrame(left)
+    df2 = pd.Series(right)
+    pd_result = df1.dot(df2)
+
+    # Patch pandas dtypes for testing parity
+    # Pandas result is int64 instead of Int64 (nullable) dtype.
+    pd_result = pd_result.astype(pd.Int64Dtype())
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+
+    pd.testing.assert_series_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_dot_series(
+    matrix_2by3_df, matrix_2by3_pandas_df, matrix_3by4_df, matrix_3by4_pandas_df
+):
+    bf_result = matrix_2by3_df.dot(matrix_3by4_df["x"]).to_pandas()
+    pd_result = matrix_2by3_pandas_df.dot(matrix_3by4_pandas_df["x"])
+
+    # Patch pandas dtypes for testing parity
+    # Pandas result is object instead of Int64 (nullable) dtype.
+    pd_result = pd_result.astype(pd.Int64Dtype())
+
+    pd.testing.assert_series_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_dot_operator_series(
+    matrix_2by3_df, matrix_2by3_pandas_df, matrix_3by4_df, matrix_3by4_pandas_df
+):
+    bf_result = (matrix_2by3_df @ matrix_3by4_df["x"]).to_pandas()
+    pd_result = matrix_2by3_pandas_df @ matrix_3by4_pandas_df["x"]
+
+    # Patch pandas dtypes for testing parity
+    # Pandas result is object instead of Int64 (nullable) dtype.
+    pd_result = pd_result.astype(pd.Int64Dtype())
+
+    pd.testing.assert_series_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+# TODO(tswast): We may be able to re-enable this test after we break large
+# queries up in https://github.com/googleapis/python-bigquery-dataframes/pull/427
+@pytest.mark.skipif(
+    sys.version_info >= (3, 12),
+    # See: https://github.com/python/cpython/issues/112282
+    reason="setrecursionlimit has no effect on the Python C stack since Python 3.12.",
+)
+def test_recursion_limit(scalars_df_index):
+    scalars_df_index = scalars_df_index[["int64_too", "int64_col", "float64_col"]]
+    for i in range(400):
+        scalars_df_index = scalars_df_index + 4
+    scalars_df_index.to_pandas()
+
+
+@pytest.mark.skipif(
+    reason="b/366477265: Skip until query complexity error can be reliably triggered."
+)
+def test_query_complexity_error(scalars_df_index):
+    # This test requires automatic caching/query decomposition to be turned off
+    bf_df = scalars_df_index
+    for _ in range(8):
+        bf_df = bf_df.merge(bf_df, on="int64_col").head(30)
+        bf_df = bf_df[bf_df.columns[:20]]
+
+    with pytest.raises(
+        bigframes.exceptions.QueryComplexityError, match=r"Try using DataFrame\.cache"
+    ):
+        bf_df.to_pandas()
+
+
+def test_query_complexity_repeated_joins(
+    scalars_df_index, scalars_pandas_df_index, with_multiquery_execution
+):
+    pd_df = scalars_pandas_df_index
+    bf_df = scalars_df_index
+    for _ in range(8):
+        # recursively join, resuling in 2^8 - 1 = 255 joins
+        pd_df = pd_df.merge(pd_df, on="int64_col").head(30)
+        pd_df = pd_df[pd_df.columns[:20]]
+        bf_df = bf_df.merge(bf_df, on="int64_col").head(30)
+        bf_df = bf_df[bf_df.columns[:20]]
+
+    bf_result = bf_df.to_pandas()
+    pd_result = pd_df
+    assert_pandas_df_equal(bf_result, pd_result, check_index_type=False)
+
+
+def test_query_complexity_repeated_subtrees(
+    scalars_df_index, scalars_pandas_df_index, with_multiquery_execution
+):
+    # Recursively union the data, if fully inlined has 10^5 identical root tables.
+    pd_df = scalars_pandas_df_index
+    bf_df = scalars_df_index
+    for _ in range(5):
+        pd_df = pd.concat(10 * [pd_df]).head(5)
+        bf_df = bpd.concat(10 * [bf_df]).head(5)
+    bf_result = bf_df.to_pandas()
+    pd_result = pd_df
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+@pytest.mark.skipif(
+    sys.version_info >= (3, 12),
+    # See: https://github.com/python/cpython/issues/112282
+    reason="setrecursionlimit has no effect on the Python C stack since Python 3.12.",
+)
+def test_query_complexity_repeated_analytic(scalars_df_index, scalars_pandas_df_index):
+    bf_df = scalars_df_index[["int64_col", "int64_too"]]
+    pd_df = scalars_pandas_df_index[["int64_col", "int64_too"]]
+    # Uses LAG analytic operator, each in a new SELECT
+    for _ in range(50):
+        bf_df = bf_df.diff()
+        pd_df = pd_df.diff()
+    bf_result = bf_df.to_pandas()
+    pd_result = pd_df
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+def test_to_gbq_and_create_dataset(session, scalars_df_index, dataset_id_not_created):
+    dataset_id = dataset_id_not_created
+    destination_table = f"{dataset_id}.scalars_df"
+
+    result_table = scalars_df_index.to_gbq(destination_table)
+    assert (
+        result_table == destination_table
+        if destination_table
+        else result_table is not None
+    )
+
+    loaded_scalars_df_index = session.read_gbq(result_table)
+    assert not loaded_scalars_df_index.empty
+
+
+def test_read_gbq_to_pandas_no_exec(unordered_session: bigframes.Session):
+    metrics = unordered_session._metrics
+    execs_pre = metrics.execution_count
+    df = unordered_session.read_gbq("bigquery-public-data.ml_datasets.penguins")
+    df.to_pandas()
+    execs_post = metrics.execution_count
+    assert df.shape == (344, 7)
+    assert execs_pre == execs_post
+
+
+def test_to_gbq_table_labels(scalars_df_index):
+    destination_table = "bigframes-dev.bigframes_tests_sys.table_labels"
+    result_table = scalars_df_index.to_gbq(
+        destination_table, labels={"test": "labels"}, if_exists="replace"
+    )
+    client = scalars_df_index._session.bqclient
+    table = client.get_table(result_table)
+    assert table.labels
+    assert table.labels["test"] == "labels"
+
+
+@pytest.mark.parametrize(
+    ("col_names", "ignore_index"),
+    [
+        pytest.param(["A"], False, id="one_array_false"),
+        pytest.param(["A"], True, id="one_array_true"),
+        pytest.param(["B"], False, id="one_float_false"),
+        pytest.param(["B"], True, id="one_float_true"),
+        pytest.param(["A", "C"], False, id="two_arrays_false"),
+        pytest.param(["A", "C"], True, id="two_arrays_true"),
+    ],
+)
+def test_dataframe_explode(col_names, ignore_index, session):
+    data = {
+        "A": [[0, 1, 2], [], [3, 4]],
+        "B": 3,
+        "C": [["a", "b", "c"], np.nan, ["d", "e"]],
+    }
+
+    metrics = session._metrics
+    df = bpd.DataFrame(data, session=session)
+    pd_df = df.to_pandas()
+    pd_result = pd_df.explode(col_names, ignore_index=ignore_index)
+    bf_result = df.explode(col_names, ignore_index=ignore_index)
+
+    # Check that to_pandas() results in at most a single query execution
+    execs_pre = metrics.execution_count
+    bf_materialized = bf_result.to_pandas()
+    execs_post = metrics.execution_count
+
+    pd.testing.assert_frame_equal(
+        bf_materialized,
+        pd_result,
+        check_index_type=False,
+        check_dtype=False,
+    )
+    # we test this property on this method in particular as compilation
+    # is non-deterministic and won't use the query cache as implemented
+    assert execs_post - execs_pre <= 1
+
+
+@pytest.mark.parametrize(
+    ("ignore_index", "ordered"),
+    [
+        pytest.param(True, True, id="include_index_ordered"),
+        pytest.param(True, False, id="include_index_unordered"),
+        pytest.param(False, True, id="ignore_index_ordered"),
+    ],
+)
+def test_dataframe_explode_reserve_order(ignore_index, ordered):
+    data = {
+        "a": [np.random.randint(0, 10, 10) for _ in range(10)],
+        "b": [np.random.randint(0, 10, 10) for _ in range(10)],
+    }
+    df = bpd.DataFrame(data)
+    pd_df = pd.DataFrame(data)
+
+    res = df.explode(["a", "b"], ignore_index=ignore_index).to_pandas(ordered=ordered)
+    pd_res = pd_df.explode(["a", "b"], ignore_index=ignore_index).astype(
+        pd.Int64Dtype()
+    )
+    pd.testing.assert_frame_equal(
+        res if ordered else res.sort_index(),
+        pd_res,
+        check_index_type=False,
+    )
+
+
+@pytest.mark.parametrize(
+    ("col_names"),
+    [
+        pytest.param([], id="empty", marks=pytest.mark.xfail(raises=ValueError)),
+        pytest.param(
+            ["A", "A"], id="duplicate", marks=pytest.mark.xfail(raises=ValueError)
+        ),
+        pytest.param("unknown", id="unknown", marks=pytest.mark.xfail(raises=KeyError)),
+    ],
+)
+def test_dataframe_explode_xfail(col_names):
+    df = bpd.DataFrame({"A": [[0, 1, 2], [], [3, 4]]})
+    df.explode(col_names)
+
+
+@pytest.mark.parametrize(
+    ("on", "rule", "origin"),
+    [
+        pytest.param("datetime_col", "100D", "start"),
+        pytest.param("datetime_col", "30W", "start"),
+        pytest.param("datetime_col", "5M", "epoch"),
+        pytest.param("datetime_col", "3Q", "start_day"),
+        pytest.param("datetime_col", "3YE", "start"),
+        pytest.param(
+            "int64_col", "100D", "start", marks=pytest.mark.xfail(raises=TypeError)
+        ),
+        pytest.param(
+            "datetime_col", "100D", "end", marks=pytest.mark.xfail(raises=ValueError)
+        ),
+    ],
+)
+def test__resample_with_column(
+    scalars_df_index, scalars_pandas_df_index, on, rule, origin
+):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    bf_result = (
+        scalars_df_index._resample(rule=rule, on=on, origin=origin)[
+            ["int64_col", "int64_too"]
+        ]
+        .max()
+        .to_pandas()
+    )
+    pd_result = scalars_pandas_df_index.resample(rule=rule, on=on, origin=origin)[
+        ["int64_col", "int64_too"]
+    ].max()
+    pd.testing.assert_frame_equal(
+        bf_result, pd_result, check_dtype=False, check_index_type=False
+    )
+
+
+@pytest.mark.parametrize(
+    ("append", "level", "col", "rule"),
+    [
+        pytest.param(False, None, "timestamp_col", "100d"),
+        pytest.param(True, 1, "timestamp_col", "1200h"),
+        pytest.param(False, None, "datetime_col", "100d"),
+    ],
+)
+def test__resample_with_index(
+    scalars_df_index, scalars_pandas_df_index, append, level, col, rule
+):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    scalars_df_index = scalars_df_index.set_index(col, append=append)
+    scalars_pandas_df_index = scalars_pandas_df_index.set_index(col, append=append)
+    bf_result = (
+        scalars_df_index[["int64_col", "int64_too"]]
+        ._resample(rule=rule, level=level)
+        .min()
+        .to_pandas()
+    )
+    pd_result = (
+        scalars_pandas_df_index[["int64_col", "int64_too"]]
+        .resample(rule=rule, level=level)
+        .min()
+    )
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+@pytest.mark.parametrize(
+    ("rule", "origin", "data"),
+    [
+        (
+            "5h",
+            "epoch",
+            {
+                "timestamp_col": pd.date_range(
+                    start="2021-01-01 13:00:00", periods=30, freq="1h"
+                ),
+                "int64_col": range(30),
+                "int64_too": range(10, 40),
+            },
+        ),
+        (
+            "75min",
+            "start_day",
+            {
+                "timestamp_col": pd.date_range(
+                    start="2021-01-01 13:00:00", periods=30, freq="10min"
+                ),
+                "int64_col": range(30),
+                "int64_too": range(10, 40),
+            },
+        ),
+        (
+            "7s",
+            "epoch",
+            {
+                "timestamp_col": pd.date_range(
+                    start="2021-01-01 13:00:00", periods=30, freq="1s"
+                ),
+                "int64_col": range(30),
+                "int64_too": range(10, 40),
+            },
+        ),
+    ],
+)
+def test__resample_start_time(rule, origin, data):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    col = "timestamp_col"
+    scalars_df_index = bpd.DataFrame(data).set_index(col)
+    scalars_pandas_df_index = pd.DataFrame(data).set_index(col)
+    scalars_pandas_df_index.index.name = None
+
+    bf_result = scalars_df_index._resample(rule=rule, origin=origin).min().to_pandas()
+
+    pd_result = scalars_pandas_df_index.resample(rule=rule, origin=origin).min()
+
+    pd.testing.assert_frame_equal(
+        bf_result, pd_result, check_dtype=False, check_index_type=False
+    )
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        pytest.param("string[pyarrow]", id="type-string"),
+        pytest.param(pd.StringDtype(storage="pyarrow"), id="type-literal"),
+        pytest.param(
+            {"bool_col": "string[pyarrow]", "int64_col": pd.Float64Dtype()},
+            id="multiple-types",
+        ),
+    ],
+)
+def test_df_astype(scalars_dfs, dtype):
+    bf_df, pd_df = scalars_dfs
+    target_cols = ["bool_col", "int64_col"]
+    bf_df = bf_df[target_cols]
+    pd_df = pd_df[target_cols]
+
+    bf_result = bf_df.astype(dtype).to_pandas()
+    pd_result = pd_df.astype(dtype)
+
+    pd.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False)
+
+
+def test_df_astype_python_types(scalars_dfs):
+    bf_df, pd_df = scalars_dfs
+    target_cols = ["bool_col", "int64_col"]
+    bf_df = bf_df[target_cols]
+    pd_df = pd_df[target_cols]
+
+    bf_result = bf_df.astype({"bool_col": str, "int64_col": float}).to_pandas()
+    pd_result = pd_df.astype(
+        {"bool_col": "string[pyarrow]", "int64_col": pd.Float64Dtype()}
+    )
+
+    pd.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False)
+
+
+def test_astype_invalid_type_fail(scalars_dfs):
+    bf_df, _ = scalars_dfs
+
+    with pytest.raises(TypeError, match=r".*Share your use case with.*"):
+        bf_df.astype(123)
+
+
+def test_agg_with_dict_lists_strings(scalars_dfs):
+    bf_df, pd_df = scalars_dfs
+    agg_funcs = {
+        "int64_too": ["min", "max"],
+        "int64_col": ["min", "count"],
+    }
+
+    bf_result = bf_df.agg(agg_funcs).to_pandas()
+    pd_result = pd_df.agg(agg_funcs)
+
+    pd.testing.assert_frame_equal(
+        bf_result, pd_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_agg_with_dict_lists_callables(scalars_dfs):
+    bf_df, pd_df = scalars_dfs
+    agg_funcs = {
+        "int64_too": [np.min, np.max],
+        "int64_col": [np.min, np.var],
+    }
+
+    bf_result = bf_df.agg(agg_funcs).to_pandas()
+    pd_result = pd_df.agg(agg_funcs)
+
+    pd.testing.assert_frame_equal(
+        bf_result, pd_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_agg_with_dict_list_and_str(scalars_dfs):
+    bf_df, pd_df = scalars_dfs
+    agg_funcs = {
+        "int64_too": ["min", "max"],
+        "int64_col": "sum",
+    }
+
+    bf_result = bf_df.agg(agg_funcs).to_pandas()
+    pd_result = pd_df.agg(agg_funcs)
+
+    pd.testing.assert_frame_equal(
+        bf_result, pd_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_agg_with_dict_strs(scalars_dfs):
+    bf_df, pd_df = scalars_dfs
+    agg_funcs = {
+        "int64_too": "min",
+        "int64_col": "sum",
+        "float64_col": "max",
+    }
+
+    bf_result = bf_df.agg(agg_funcs).to_pandas()
+    pd_result = pd_df.agg(agg_funcs)
+    pd_result.index = pd_result.index.astype("string[pyarrow]")
+
+    pd.testing.assert_series_equal(
+        bf_result, pd_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_agg_with_dict_containing_non_existing_col_raise_key_error(scalars_dfs):
+    bf_df, _ = scalars_dfs
+    agg_funcs = {
+        "int64_too": ["min", "max"],
+        "nonexisting_col": ["count"],
+    }
+
+    with pytest.raises(KeyError):
+        bf_df.agg(agg_funcs)

From 6801ca4dfef8928e8a056df46dcade5e55859f4c Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Mon, 27 Oct 2025 23:49:05 +0000
Subject: [PATCH 33/37] notebook update

---
 notebooks/dataframes/anywidget_mode.ipynb | 66 +++++++----------------
 1 file changed, 19 insertions(+), 47 deletions(-)

diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb
index 154afea7e1..62caa4c7ee 100644
--- a/notebooks/dataframes/anywidget_mode.ipynb
+++ b/notebooks/dataframes/anywidget_mode.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "id": "d10bfca4",
    "metadata": {},
    "outputs": [],
@@ -32,7 +32,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "id": "ca22f059",
    "metadata": {},
    "outputs": [],
@@ -50,7 +50,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "id": "1bc5aaf3",
    "metadata": {},
    "outputs": [],
@@ -69,7 +69,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "id": "f289d250",
    "metadata": {},
    "outputs": [
@@ -96,7 +96,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "id": "42bb02ab",
    "metadata": {},
    "outputs": [
@@ -123,7 +123,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "id": "ce250157",
    "metadata": {},
    "outputs": [
@@ -142,7 +142,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6e46f6d1352043a4baee57fa089f2b0c",
+       "model_id": "1d718cdbafcb42898120637cdb3fa267",
        "version_major": 2,
        "version_minor": 0
       },
@@ -160,7 +160,7 @@
        "Computation deferred. Computation will process 171.4 MB"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -179,22 +179,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
    "id": "6920d49b",
    "metadata": {},
    "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "✅ Completed. "
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
     {
      "data": {
       "text/html": [
@@ -217,7 +205,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "88d370b617b545809eb7bb8e5c66ea0e",
+       "model_id": "519297c3ad19403aa844cbeabcd5eb44",
        "version_major": 2,
        "version_minor": 0
       },
@@ -251,7 +239,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
    "id": "12b68f15",
    "metadata": {},
    "outputs": [
@@ -288,24 +276,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 9,
    "id": "a9d5d13a",
    "metadata": {},
    "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "✅ Completed. \n",
-       "    Query processed 171.4 MB in a moment of slot time.\n",
-       "    "
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
     {
      "data": {
       "text/html": [
@@ -330,7 +304,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "dec19e8788b74219b88bccfc65e3b9c0",
+       "model_id": "37ba207603aa40a38c9786a210e712fd",
        "version_major": 2,
        "version_minor": 0
       },
@@ -361,7 +335,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 10,
    "id": "added-cell-1",
    "metadata": {},
    "outputs": [
@@ -369,7 +343,7 @@
      "data": {
       "text/html": [
        "✅ Completed. \n",
-       "    Query processed 85.9 kB in 21 seconds of slot time.\n",
+       "    Query processed 85.9 kB in 23 seconds of slot time.\n",
        "    "
       ],
       "text/plain": [
@@ -383,11 +357,11 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n",
+      "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:969: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n",
       "instead of using `db_dtypes` in the future when available in pandas\n",
       "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n",
       "  warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n",
-      "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dataframe.py:869: UserWarning: Converting JSON columns to strings for display. This is temporary and will be removed when the frontend supports JSON types.\n",
+      "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dataframe.py:867: UserWarning: Converting JSON columns to strings for display. This is temporary and will be removed when the frontend supports JSON types.\n",
       "  warnings.warn(\n"
      ]
     },
@@ -408,7 +382,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "774357b4083c47c8a5e1fd33bb6af188",
+       "model_id": "379998ea9a744e7b8afd9c1bcb36548d",
        "version_major": 2,
        "version_minor": 0
       },
@@ -426,7 +400,7 @@
        "Computation deferred. Computation will process 0 Bytes"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -447,7 +421,6 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "3.10.18",
    "display_name": "3.10.18",
    "language": "python",
    "name": "python3"
@@ -463,7 +436,6 @@
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.10.18"
-   "version": "3.10.18"
   }
  },
  "nbformat": 4,

From 6c3567b7d573dc36e136841c0a2fac6453a3fa76 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Tue, 28 Oct 2025 00:07:05 +0000
Subject: [PATCH 34/37] call API on local data for complier.py

---
 bigframes/core/compile/polars/compiler.py | 31 ++++-------------------
 1 file changed, 5 insertions(+), 26 deletions(-)

diff --git a/bigframes/core/compile/polars/compiler.py b/bigframes/core/compile/polars/compiler.py
index 0a6605b222..754294ec2f 100644
--- a/bigframes/core/compile/polars/compiler.py
+++ b/bigframes/core/compile/polars/compiler.py
@@ -622,32 +622,11 @@ def compile_readlocal(self, node: nodes.ReadLocalNode):
                 for scan_item in node.scan_list.items
             }
 
-            # Workaround for PyArrow bug https://github.com/apache/arrow/issues/45262
-            # Convert JSON columns to strings before Polars processing
-            arrow_data = node.local_data_source.data
-            schema = arrow_data.schema
-
-            # Check if any columns are JSON type
-            json_field_indices = [
-                i
-                for i, field in enumerate(schema)
-                if pa.types.is_extension_type(field.type)
-                and field.type.extension_name == "google:sqlType:json"
-            ]
-
-            if json_field_indices:
-                # Convert JSON columns to string columns
-                new_arrays = []
-                new_fields = []
-                for i, field in enumerate(schema):
-                    if i in json_field_indices:
-                        # Cast JSON to string
-                        new_arrays.append(arrow_data.column(i).cast(pa.string()))
-                        new_fields.append(pa.field(field.name, pa.string()))
-                    else:
-                        new_arrays.append(arrow_data.column(i))
-                        new_fields.append(field)
-                arrow_data = pa.table(new_arrays, schema=pa.schema(new_fields))
+            if hasattr(node.local_data_source, "to_arrow"):
+                schema, batches = node.local_data_source.to_arrow(json_type="string")
+                arrow_data = pa.Table.from_batches(batches, schema)
+            else:
+                arrow_data = node.local_data_source.data
 
             lazy_frame = cast(pl.DataFrame, pl.from_arrow(arrow_data)).lazy()
             lazy_frame = lazy_frame.select(cols_to_read.keys()).rename(cols_to_read)

From dba9051306312ced3b05ca253f189e73ad688021 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Tue, 28 Oct 2025 00:32:06 +0000
Subject: [PATCH 35/37] add more testcase

---
 bigframes/display/anywidget.py            |   2 +
 notebooks/dataframes/anywidget_mode.ipynb |  18 ++-
 tests/system/small/test_anywidget.py      | 131 ++++++++++++++++++++++
 tests/unit/test_dataframe.py              |  23 ++++
 tests/unit/test_polars_compiler.py        |  86 ++++++++++++++
 5 files changed, 255 insertions(+), 5 deletions(-)
 create mode 100644 tests/unit/test_polars_compiler.py

diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py
index cf5d4e6310..8930c611e9 100644
--- a/bigframes/display/anywidget.py
+++ b/bigframes/display/anywidget.py
@@ -231,6 +231,8 @@ def _set_table_html(self) -> None:
                 cached_data = self._cached_data
             else:
                 break
+
+        # Get the data for the current page
         page_data = cached_data.iloc[start:end]
 
         # Generate HTML table
diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb
index 62caa4c7ee..744971f69e 100644
--- a/notebooks/dataframes/anywidget_mode.ipynb
+++ b/notebooks/dataframes/anywidget_mode.ipynb
@@ -142,7 +142,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "1d718cdbafcb42898120637cdb3fa267",
+       "model_id": "93dd10072d564a02a0278817d14855a9",
        "version_major": 2,
        "version_minor": 0
       },
@@ -205,7 +205,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "519297c3ad19403aa844cbeabcd5eb44",
+       "model_id": "6e2538d446e344ac8505e4706730243e",
        "version_major": 2,
        "version_minor": 0
       },
@@ -304,7 +304,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "37ba207603aa40a38c9786a210e712fd",
+       "model_id": "d6faf367ea5d44ad9d275506d870557a",
        "version_major": 2,
        "version_minor": 0
       },
@@ -333,6 +333,14 @@
     "The `AI.GENERATE` function in BigQuery returns results in a JSON column. While BigQuery's JSON type is not natively supported by the underlying Arrow `to_pandas_batches()` method used in anywidget mode ([Apache Arrow issue #45262](https://github.com/apache/arrow/issues/45262)), BigQuery Dataframes automatically converts JSON columns to strings for display. This allows you to view the results of generative AI functions seamlessly."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fdadcad6",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "code",
    "execution_count": 10,
@@ -343,7 +351,7 @@
      "data": {
       "text/html": [
        "✅ Completed. \n",
-       "    Query processed 85.9 kB in 23 seconds of slot time.\n",
+       "    Query processed 85.9 kB in 24 seconds of slot time.\n",
        "    "
       ],
       "text/plain": [
@@ -382,7 +390,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "379998ea9a744e7b8afd9c1bcb36548d",
+       "model_id": "b6d6f3bacc2c43fc9a335e6039db12a5",
        "version_major": 2,
        "version_minor": 0
       },
diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py
index 890d591de5..0587e13916 100644
--- a/tests/system/small/test_anywidget.py
+++ b/tests/system/small/test_anywidget.py
@@ -527,6 +527,137 @@ def test_json_column_anywidget_mode(mock_display, json_df: bf.dataframe.DataFram
             assert result == ""
 
 
+def mock_execute_result_with_params(
+    self, schema, total_rows_val, arrow_batches_val, *args, **kwargs
+):
+    """
+    Mocks an execution result with configurable total_rows and arrow_batches.
+    """
+    from bigframes.session.executor import ExecuteResult
+
+    return ExecuteResult(
+        iter(arrow_batches_val),
+        schema=schema,
+        query_job=None,
+        total_bytes=None,
+        total_rows=total_rows_val,
+    )
+
+
+def test_widget_row_count_should_be_immutable_after_creation(
+    paginated_bf_df: bf.dataframe.DataFrame,
+):
+    """
+    Given a widget created with a specific configuration when global display
+    options are changed later, the widget's original row_count should remain
+    unchanged.
+    """
+    from bigframes.display.anywidget import TableWidget
+
+    # Use a context manager to ensure the option is reset
+    with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 2):
+        widget = TableWidget(paginated_bf_df)
+        initial_row_count = widget.row_count
+
+    # Change a global option that could influence row count
+    bf.options.display.max_rows = 10
+
+    # Verify the row count remains immutable.
+    assert widget.row_count == initial_row_count
+
+
+class FaultyIterator:
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        raise ValueError("Simulated read error")
+
+
+def test_widget_should_fallback_to_zero_rows_with_invalid_total_rows(
+    paginated_bf_df: bf.dataframe.DataFrame,
+    monkeypatch: pytest.MonkeyPatch,
+):
+    """
+    Given an internal component fails to return valid execution data,
+    when the TableWidget is created, its error_message should be set and displayed.
+    """
+    # Patch the executor's 'execute' method to simulate an error.
+    monkeypatch.setattr(
+        "bigframes.session.bq_caching_executor.BigQueryCachingExecutor.execute",
+        lambda self, *args, **kwargs: mock_execute_result_with_params(
+            self, paginated_bf_df._block.expr.schema, None, [], *args, **kwargs
+        ),
+    )
+
+    # Create the TableWidget under the error condition.
+    with bf.option_context("display.repr_mode", "anywidget"):
+        from bigframes.display.anywidget import TableWidget
+
+        # The widget should handle the faulty data from the mock without crashing.
+        widget = TableWidget(paginated_bf_df)
+
+    # The widget should have an error message and display it in the HTML.
+    assert widget.row_count == 0
+    assert widget._error_message is not None
+    assert "Could not determine total row count" in widget._error_message
+    assert widget._error_message in widget.table_html
+
+
+def test_widget_row_count_reflects_actual_data_available(
+    paginated_bf_df: bf.dataframe.DataFrame,
+):
+    """
+    Test that widget row_count reflects the actual data available,
+    regardless of theoretical limits.
+    """
+    from bigframes.display.anywidget import TableWidget
+
+    # Set up display options that define a page size.
+    with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 2):
+        widget = TableWidget(paginated_bf_df)
+
+        # The widget should report the total rows in the DataFrame,
+        # not limited by page_size (which only affects pagination)
+        assert widget.row_count == EXPECTED_ROW_COUNT
+        assert widget.page_size == 2  # Respects the display option
+
+
 # TODO(shuowei): Add tests for custom index and multiindex
 # This may not be necessary for the SQL Cell use case but should be
 # considered for completeness.
+
+
+@pytest.fixture(scope="module")
+def empty_json_df(session: bf.Session) -> bf.dataframe.DataFrame:
+    """Create an empty DataFrame with a JSON column for testing."""
+    import bigframes.dtypes
+
+    pandas_df = pd.DataFrame(
+        {
+            "a": pd.Series(dtype="int64"),
+            "b": pd.Series(dtype=bigframes.dtypes.JSON_DTYPE),
+        }
+    )
+    return session.read_pandas(pandas_df)
+
+
+def test_empty_widget_with_json_column(empty_json_df: bf.dataframe.DataFrame):
+    """Given an empty DataFrame with a JSON column, the widget should render table headers."""
+    with bf.option_context("display.repr_mode", "anywidget"):
+        from bigframes.display.anywidget import TableWidget
+
+        widget = TableWidget(empty_json_df)
+        html = widget.table_html
+
+        assert widget.row_count == 0
+        assert "<table" in html
+        assert "a" in html
+        assert "b" in html
+
+
+def test_json_column_conversion_warning(json_df: bf.dataframe.DataFrame):
+    """Test that a warning is shown when converting JSON columns."""
+    with bf.option_context("display.repr_mode", "anywidget"):
+        with pytest.warns(UserWarning, match="Converting JSON columns to strings"):
+            json_df._repr_html_()
diff --git a/tests/unit/test_dataframe.py b/tests/unit/test_dataframe.py
index 2326f2595b..f223d69a76 100644
--- a/tests/unit/test_dataframe.py
+++ b/tests/unit/test_dataframe.py
@@ -181,3 +181,26 @@ def test_dataframe_ai_property_future_warning(
 
     with pytest.warns(FutureWarning):
         dataframe.ai
+
+
+@pytest.fixture()
+def json_df(polars_session: bigframes.session.Session) -> bigframes.dataframe.DataFrame:
+    """Create a DataFrame with a JSON column for testing."""
+    import bigframes.dtypes
+
+    pandas_df = pd.DataFrame(
+        {
+            "a": [1],
+            "b": ['{"c": 2, "d": 3}'],
+        }
+    )
+    pandas_df["b"] = pandas_df["b"].astype(bigframes.dtypes.JSON_DTYPE)
+    return polars_session.read_pandas(pandas_df)
+
+
+def test_to_pandas_batches_with_json_column(json_df: bigframes.dataframe.DataFrame):
+    """Test that JSON columns are converted to strings in to_pandas_batches."""
+    batches = list(json_df._to_pandas_batches(page_size=10))
+    assert len(batches) > 0
+    # Verify the JSON column is now string type
+    assert batches[0]["b"].dtype == pd.StringDtype(storage="pyarrow")
diff --git a/tests/unit/test_polars_compiler.py b/tests/unit/test_polars_compiler.py
new file mode 100644
index 0000000000..fd620825cc
--- /dev/null
+++ b/tests/unit/test_polars_compiler.py
@@ -0,0 +1,86 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pandas as pd
+import polars as pl
+import pytest
+
+import bigframes as bf
+import bigframes.core.compile.polars.compiler as polars_compiler
+import bigframes.core.nodes as nodes
+import bigframes.operations.json_ops as json_ops
+
+
+def test_polars_to_json_string():
+    """Test ToJSONString operation in Polars compiler."""
+    compiler = polars_compiler.PolarsExpressionCompiler()
+    op = json_ops.ToJSONString()
+    # Polars doesn't have a native JSON type, it uses strings.
+    # The operation is a cast to string.
+    input_expr = pl.lit('{"b": 2}', dtype=pl.String)
+    result = compiler.compile_op(op, input_expr)
+
+    df = pl.DataFrame({"a": ['{"b": 2}']}).lazy()
+    result_df = df.with_columns(result.alias("b")).collect()
+    assert result_df["b"][0] == '{"b": 2}'
+    assert result_df["b"].dtype == pl.String
+
+
+def test_polars_parse_json():
+    """Test ParseJSON operation in Polars compiler."""
+    compiler = polars_compiler.PolarsExpressionCompiler()
+    op = json_ops.ParseJSON()
+    input_expr = pl.lit('{"b": 2}', dtype=pl.String)
+    result = compiler.compile_op(op, input_expr)
+
+    df = pl.DataFrame({"a": ['{"b": 2}']}).lazy()
+    result_df = df.with_columns(result.alias("b")).collect()
+    # The result of json_decode is a struct
+    assert isinstance(result_df["b"][0], dict)
+    assert result_df["b"][0] == {"b": 2}
+
+
+@pytest.mark.skip(reason="Polars does not have json_extract on string expressions")
+def test_polars_json_extract():
+    """Test JSONExtract operation in Polars compiler."""
+    compiler = polars_compiler.PolarsExpressionCompiler()
+    op = json_ops.JSONExtract(json_path="$.b")
+    input_expr = pl.lit('{"a": 1, "b": "hello"}', dtype=pl.String)
+    result = compiler.compile_op(op, input_expr)
+
+    df = pl.DataFrame({"a": ['{"b": "world"}']}).lazy()
+    result_df = df.with_columns(result.alias("b")).collect()
+    # json_extract returns a JSON encoded string
+    assert result_df["b"][0] == '"world"'
+
+
+def test_readlocal_with_json_column(polars_session):
+    """Test ReadLocalNode compilation with JSON columns."""
+    pandas_df = pd.DataFrame({"data": ['{"key": "value"}']})
+    pandas_df["data"] = pandas_df["data"].astype(bf.dtypes.JSON_DTYPE)
+    bf_df = polars_session.read_pandas(pandas_df)
+
+    node = bf_df._block.expr.node
+    # Traverse the node tree to find the ReadLocalNode
+    while not isinstance(node, nodes.ReadLocalNode):
+        node = node.child
+    assert isinstance(node, nodes.ReadLocalNode)
+
+    compiler = polars_compiler.PolarsCompiler()
+    lazy_frame = compiler.compile_node(node)
+    result_df = lazy_frame.collect()
+
+    # The compiler should have converted the JSON column to string.
+    assert result_df.schema["column_0"] == pl.String
+    assert result_df["column_0"][0] == '{"key":"value"}'

From 0420c64a9020ab4f97fc8c471176507e93b7173b Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Tue, 28 Oct 2025 00:51:33 +0000
Subject: [PATCH 36/37] modfiy polars import

---
 tests/unit/test_polars_compiler.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/tests/unit/test_polars_compiler.py b/tests/unit/test_polars_compiler.py
index fd620825cc..95be7d5d00 100644
--- a/tests/unit/test_polars_compiler.py
+++ b/tests/unit/test_polars_compiler.py
@@ -13,9 +13,19 @@
 # limitations under the License.
 
 import pandas as pd
-import polars as pl
 import pytest
 
+try:
+    import polars as pl
+
+    POLARS_INSTALLED = True
+except ImportError:
+    POLARS_INSTALLED = False
+
+if not POLARS_INSTALLED:
+    pytest.skip("polars is not installed", allow_module_level=True)
+
+
 import bigframes as bf
 import bigframes.core.compile.polars.compiler as polars_compiler
 import bigframes.core.nodes as nodes
@@ -48,10 +58,9 @@ def test_polars_parse_json():
     result_df = df.with_columns(result.alias("b")).collect()
     # The result of json_decode is a struct
     assert isinstance(result_df["b"][0], dict)
-    assert result_df["b"][0] == {"b": 2}
+    assert result_df["b"][0]["b"] == 2
 
 
-@pytest.mark.skip(reason="Polars does not have json_extract on string expressions")
 def test_polars_json_extract():
     """Test JSONExtract operation in Polars compiler."""
     compiler = polars_compiler.PolarsExpressionCompiler()
@@ -59,10 +68,10 @@ def test_polars_json_extract():
     input_expr = pl.lit('{"a": 1, "b": "hello"}', dtype=pl.String)
     result = compiler.compile_op(op, input_expr)
 
-    df = pl.DataFrame({"a": ['{"b": "world"}']}).lazy()
+    df = pl.DataFrame({"a": ['{"a": 1, "b": "hello"}']}).lazy()
     result_df = df.with_columns(result.alias("b")).collect()
-    # json_extract returns a JSON encoded string
-    assert result_df["b"][0] == '"world"'
+    # json_path_match returns the raw string value
+    assert result_df["b"][0] == "hello"
 
 
 def test_readlocal_with_json_column(polars_session):

From 907cf2c1728a95ddf3dd5b05e2b7917dbbd21ff1 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Wed, 29 Oct 2025 07:07:46 +0000
Subject: [PATCH 37/37] fix failed tests

---
 bigframes/bigquery/_operations/ai.py      |  7 +++++++
 bigframes/core/compile/polars/compiler.py |  6 +++---
 bigframes/ml/llm.py                       | 11 ++++++++++-
 bigframes/series.py                       |  8 --------
 tests/system/small/test_dataframe.py      | 12 ++++++++++++
 5 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py
index 8579f7f298..07f81d87f5 100644
--- a/bigframes/bigquery/_operations/ai.py
+++ b/bigframes/bigquery/_operations/ai.py
@@ -123,6 +123,13 @@ def generate(
     if output_schema is None:
         output_schema_str = None
     else:
+        # Validate output schema types
+        for col_name, col_type in output_schema.items():
+            if col_type.upper() == "JSON":
+                raise ValueError(
+                    "JSON type is not supported in output_schema. "
+                    "Supported types are: STRING, INT64, FLOAT64, BOOL, ARRAY, and STRUCT."
+                )
         output_schema_str = ", ".join(
             [f"{name} {sql_type}" for name, sql_type in output_schema.items()]
         )
diff --git a/bigframes/core/compile/polars/compiler.py b/bigframes/core/compile/polars/compiler.py
index 754294ec2f..e939f80120 100644
--- a/bigframes/core/compile/polars/compiler.py
+++ b/bigframes/core/compile/polars/compiler.py
@@ -434,13 +434,13 @@ def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
 
         @compile_op.register(json_ops.ParseJSON)
         def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
-            # Parse string as JSON - this should decode, not encode
-            return input.str.json_decode()
+            # In Polars, JSON is stored as string, so no decoding needed
+            return input
 
         @compile_op.register(json_ops.JSONExtract)
         def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
             assert isinstance(op, json_ops.JSONExtract)
-            return input.str.json_extract(json_path=op.json_path)
+            return input.str.json_path_match(op.json_path)
 
         @compile_op.register(arr_ops.ToArrayOp)
         def _(self, op: ops.ToArrayOp, *inputs: pl.Expr) -> pl.Expr:
diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py
index 531a043c45..edede34e8f 100644
--- a/bigframes/ml/llm.py
+++ b/bigframes/ml/llm.py
@@ -731,8 +731,17 @@ def predict(
             "ground_with_google_search": ground_with_google_search,
         }
         if output_schema:
+            supported_dtypes = (
+                "int64",
+                "float64",
+                "bool",
+                "string",
+                "array<type>",
+                "struct<column type>",
+            )
             output_schema = {
-                k: utils.standardize_type(v) for k, v in output_schema.items()
+                k: utils.standardize_type(v, supported_dtypes=supported_dtypes)
+                for k, v in output_schema.items()
             }
             options["output_schema"] = output_schema
             return self._predict_and_retry(
diff --git a/bigframes/series.py b/bigframes/series.py
index 5448045092..5177bd0f33 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -611,14 +611,6 @@ def astype(
             raise ValueError("Argument 'errors' must be one of 'raise' or 'null'")
         dtype = bigframes.dtypes.bigframes_type(dtype)
 
-        # BigQuery doesn't support CAST(json_col AS STRING), but it does support
-        # TO_JSON_STRING(json_col).
-        if (
-            self.dtype == bigframes.dtypes.JSON_DTYPE
-            and dtype == bigframes.dtypes.STRING_DTYPE
-        ):
-            return self._apply_unary_op(ops.json_ops.ToJSONString())
-
         return self._apply_unary_op(
             bigframes.operations.AsTypeOp(to_type=dtype, safe=(errors == "null"))
         )
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 79f8efd00f..a0c0e41a1b 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -6142,3 +6142,15 @@ def test_agg_with_dict_containing_non_existing_col_raise_key_error(scalars_dfs):
 
     with pytest.raises(KeyError):
         bf_df.agg(agg_funcs)
+
+
+def test_to_pandas_batches_with_json_columns(session):
+    """Test that JSON columns are properly handled in to_pandas_batches."""
+    # Create a DataFrame with JSON column
+    df = session.read_gbq('SELECT JSON \'{"key": "value"}\' as json_col')
+
+    # This should not raise an error
+    batches = df._to_pandas_batches(page_size=10)
+    next(batches)
+
+    # TODO