diff --git a/.asf.yaml b/.asf.yaml
index d71e7def36ad..99fd6fac22c7 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -41,6 +41,7 @@ github:
     - sql
   enabled_merge_buttons:
     squash: true
+    squash_commit_message: PR_TITLE_AND_DESC
     merge: false
     rebase: false
   features:
diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml
index 5d5e9e270a65..3685bb2f9a78 100644
--- a/.github/workflows/audit.yml
+++ b/.github/workflows/audit.yml
@@ -42,8 +42,13 @@ jobs:
     steps:
       - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
       - name: Install cargo-audit
-        uses: taiki-e/install-action@5b5de1b4da26ad411330c0454bdd72929bfcbeb2  # v2.62.29
+        uses: taiki-e/install-action@c5b1b6f479c32f356cc6f4ba672a47f63853b13b  # v2.62.38
         with:
           tool: cargo-audit
       - name: Run audit check
-        run: cargo audit
+        # RUSTSEC-2025-0111: tokio-tar is by testcontainers for orchestration
+        # of testing, so does not impact DataFusion's security
+        # See https://github.com/apache/datafusion/issues/18288
+        # NOTE: can remove this once testcontainers releases a version that includes
+        # https://github.com/testcontainers/testcontainers-rs/pull/852
+        run: cargo audit --ignore RUSTSEC-2025-0111
diff --git a/.github/workflows/extended.yml b/.github/workflows/extended.yml
index 9343997e0568..23bd66a0cf35 100644
--- a/.github/workflows/extended.yml
+++ b/.github/workflows/extended.yml
@@ -36,6 +36,14 @@ on:
       # it is not expected to have many changes in these branches,
       # so running extended tests is not a burden
       - 'branch-*'
+  # Also run for changes to some critical areas that are most likely
+  # to trigger errors in extended tests
+  pull_request:
+    branches: [ '**' ]
+    paths:
+      - 'datafusion/physical*/**/*.rs'
+      - 'datafusion/expr*/**/*.rs'
+      - 'datafusion/optimizer/**/*.rs'
   workflow_dispatch:
     inputs:
       pr_number:
diff --git a/.github/workflows/labeler/labeler-config.yml b/.github/workflows/labeler/labeler-config.yml
index e40813072521..38d88059dab7 100644
--- a/.github/workflows/labeler/labeler-config.yml
+++ b/.github/workflows/labeler/labeler-config.yml
@@ -58,7 +58,7 @@ execution:
 
 datasource:
   - changed-files:
-      - any-glob-to-any-file: ['datafusion/datasource/**/*', 'datafusion/datasource-avro/**/*', 'datafusion/datasource-csv/**/*', 'datafusion/datasource-json/**/*', 'datafusion/datasource-parquet/**/*']
+      - any-glob-to-any-file: ['datafusion/datasource/**/*', 'datafusion/datasource-avro/**/*', 'datafusion/datasource-arrow/**/*', 'datafusion/datasource-csv/**/*', 'datafusion/datasource-json/**/*', 'datafusion/datasource-parquet/**/*']
 
 functions:
   - changed-files:
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index ecdbf031b45b..4b61a04bfb14 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -353,6 +353,19 @@ jobs:
         with:
           save-if: ${{ github.ref_name == 'main' }}
           shared-key: "amd-ci-linux-test-example"
+      - name: Remove unnecessary preinstalled software
+        run: |
+          echo "Disk space before cleanup:"
+          df -h
+          apt-get clean
+          rm -rf /__t/CodeQL
+          rm -rf /__t/PyPy
+          rm -rf /__t/Java_Temurin-Hotspot_jdk
+          rm -rf /__t/Python
+          rm -rf /__t/go
+          rm -rf /__t/Ruby
+          echo "Disk space after cleanup:"
+          df -h
       - name: Run examples
         run: |
           # test datafusion-sql examples
@@ -412,7 +425,7 @@ jobs:
           sudo apt-get update -qq
           sudo apt-get install -y -qq clang
       - name: Setup wasm-pack
-        uses: taiki-e/install-action@5b5de1b4da26ad411330c0454bdd72929bfcbeb2  # v2.62.29
+        uses: taiki-e/install-action@c5b1b6f479c32f356cc6f4ba672a47f63853b13b  # v2.62.38
         with:
           tool: wasm-pack
       - name: Run tests with headless mode
@@ -739,7 +752,7 @@ jobs:
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
       - name: Install cargo-msrv
-        uses: taiki-e/install-action@5b5de1b4da26ad411330c0454bdd72929bfcbeb2  # v2.62.29
+        uses: taiki-e/install-action@c5b1b6f479c32f356cc6f4ba672a47f63853b13b  # v2.62.38
         with:
           tool: cargo-msrv
 
diff --git a/Cargo.lock b/Cargo.lock
index 00bd64f21eb1..aaa75ecf3247 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -50,15 +50,6 @@ dependencies = [
  "core_extensions",
 ]
 
-[[package]]
-name = "addr2line"
-version = "0.24.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1"
-dependencies = [
- "gimli",
-]
-
 [[package]]
 name = "adler2"
 version = "2.0.1"
@@ -84,7 +75,7 @@ checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
 dependencies = [
  "cfg-if",
  "const-random",
- "getrandom 0.3.3",
+ "getrandom 0.3.4",
  "once_cell",
  "version_check",
  "zerocopy",
@@ -199,7 +190,7 @@ checksum = "3a033b4ced7c585199fb78ef50fca7fe2f444369ec48080c5fd072efa1a03cc7"
 dependencies = [
  "bigdecimal",
  "bon",
- "bzip2 0.6.0",
+ "bzip2 0.6.1",
  "crc32fast",
  "digest",
  "log",
@@ -234,9 +225,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 
 [[package]]
 name = "arrow"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e833808ff2d94ed40d9379848a950d995043c7fb3e81a30b383f4c6033821cc"
+checksum = "4df8bb5b0bd64c0b9bc61317fcc480bad0f00e56d3bc32c69a4c8dada4786bae"
 dependencies = [
  "arrow-arith",
  "arrow-array",
@@ -258,23 +249,23 @@ dependencies = [
 
 [[package]]
 name = "arrow-arith"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8"
+checksum = "a1a640186d3bd30a24cb42264c2dafb30e236a6f50d510e56d40b708c9582491"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
  "arrow-data",
  "arrow-schema",
  "chrono",
- "num",
+ "num-traits",
 ]
 
 [[package]]
 name = "arrow-array"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d"
+checksum = "219fe420e6800979744c8393b687afb0252b3f8a89b91027d27887b72aa36d31"
 dependencies = [
  "ahash 0.8.12",
  "arrow-buffer",
@@ -284,25 +275,28 @@ dependencies = [
  "chrono-tz",
  "half",
  "hashbrown 0.16.0",
- "num",
+ "num-complex",
+ "num-integer",
+ "num-traits",
 ]
 
 [[package]]
 name = "arrow-buffer"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc"
+checksum = "76885a2697a7edf6b59577f568b456afc94ce0e2edc15b784ce3685b6c3c5c27"
 dependencies = [
  "bytes",
  "half",
- "num",
+ "num-bigint",
+ "num-traits",
 ]
 
 [[package]]
 name = "arrow-cast"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "919418a0681298d3a77d1a315f625916cb5678ad0d74b9c60108eb15fd083023"
+checksum = "9c9ebb4c987e6b3b236fb4a14b20b34835abfdd80acead3ccf1f9bf399e1f168"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -315,15 +309,15 @@ dependencies = [
  "comfy-table",
  "half",
  "lexical-core",
- "num",
+ "num-traits",
  "ryu",
 ]
 
 [[package]]
 name = "arrow-csv"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bfa9bf02705b5cf762b6f764c65f04ae9082c7cfc4e96e0c33548ee3f67012eb"
+checksum = "92386159c8d4bce96f8bd396b0642a0d544d471bdc2ef34d631aec80db40a09c"
 dependencies = [
  "arrow-array",
  "arrow-cast",
@@ -336,21 +330,22 @@ dependencies = [
 
 [[package]]
 name = "arrow-data"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0"
+checksum = "727681b95de313b600eddc2a37e736dcb21980a40f640314dcf360e2f36bc89b"
 dependencies = [
  "arrow-buffer",
  "arrow-schema",
  "half",
- "num",
+ "num-integer",
+ "num-traits",
 ]
 
 [[package]]
 name = "arrow-flight"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c8b0ba0784d56bc6266b79f5de7a24b47024e7b3a0045d2ad4df3d9b686099f"
+checksum = "f70bb56412a007b0cfc116d15f24dda6adeed9611a213852a004cda20085a3b9"
 dependencies = [
  "arrow-arith",
  "arrow-array",
@@ -368,16 +363,17 @@ dependencies = [
  "futures",
  "once_cell",
  "paste",
- "prost 0.13.5",
- "prost-types 0.13.5",
+ "prost",
+ "prost-types",
  "tonic",
+ "tonic-prost",
 ]
 
 [[package]]
 name = "arrow-ipc"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1d3594dcddccc7f20fd069bc8e9828ce37220372680ff638c5e00dea427d88f5"
+checksum = "da9ba92e3de170295c98a84e5af22e2b037f0c7b32449445e6c493b5fca27f27"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -391,9 +387,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-json"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "88cf36502b64a127dc659e3b305f1d993a544eab0d48cce704424e62074dc04b"
+checksum = "b969b4a421ae83828591c6bf5450bd52e6d489584142845ad6a861f42fe35df8"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -402,20 +398,22 @@ dependencies = [
  "arrow-schema",
  "chrono",
  "half",
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
+ "itoa",
  "lexical-core",
  "memchr",
- "num",
- "serde",
+ "num-traits",
+ "ryu",
+ "serde_core",
  "serde_json",
  "simdutf8",
 ]
 
 [[package]]
 name = "arrow-ord"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f"
+checksum = "141c05298b21d03e88062317a1f1a73f5ba7b6eb041b350015b1cd6aabc0519b"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -426,9 +424,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-pyarrow"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7d924b32e96f8bb74d94cd82bd97b313c432fcb0ea331689ef9e7c6b8be4b258"
+checksum = "cfcfb2be2e9096236f449c11f425cddde18c4cc540f516d90f066f10a29ed515"
 dependencies = [
  "arrow-array",
  "arrow-data",
@@ -438,9 +436,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-row"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753"
+checksum = "c5f3c06a6abad6164508ed283c7a02151515cef3de4b4ff2cebbcaeb85533db2"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -451,34 +449,35 @@ dependencies = [
 
 [[package]]
 name = "arrow-schema"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe"
+checksum = "9cfa7a03d1eee2a4d061476e1840ad5c9867a544ca6c4c59256496af5d0a8be5"
 dependencies = [
  "bitflags 2.9.4",
  "serde",
+ "serde_core",
  "serde_json",
 ]
 
 [[package]]
 name = "arrow-select"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a"
+checksum = "bafa595babaad59f2455f4957d0f26448fb472722c186739f4fac0823a1bdb47"
 dependencies = [
  "ahash 0.8.12",
  "arrow-array",
  "arrow-buffer",
  "arrow-data",
  "arrow-schema",
- "num",
+ "num-traits",
 ]
 
 [[package]]
 name = "arrow-string"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d"
+checksum = "32f46457dbbb99f2650ff3ac23e46a929e0ab81db809b02aa5511c258348bef2"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -486,7 +485,7 @@ dependencies = [
  "arrow-schema",
  "arrow-select",
  "memchr",
- "num",
+ "num-traits",
  "regex",
  "regex-syntax",
 ]
@@ -537,7 +536,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -548,7 +547,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -965,21 +964,6 @@ dependencies = [
  "tower-service",
 ]
 
-[[package]]
-name = "backtrace"
-version = "0.3.75"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002"
-dependencies = [
- "addr2line",
- "cfg-if",
- "libc",
- "miniz_oxide",
- "object",
- "rustc-demangle",
- "windows-targets 0.52.6",
-]
-
 [[package]]
 name = "base64"
 version = "0.21.7"
@@ -1033,7 +1017,7 @@ dependencies = [
  "regex",
  "rustc-hash",
  "shlex",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -1163,7 +1147,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -1186,7 +1170,7 @@ dependencies = [
  "proc-macro-crate",
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -1281,9 +1265,9 @@ dependencies = [
 
 [[package]]
 name = "bzip2"
-version = "0.6.0"
+version = "0.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bea8dcd42434048e4f7a304411d9273a411f647446c1234a65ce0554923f4cff"
+checksum = "f3a53fac24f34a81bc9954b5d6cfce0c21e18ec6959f44f56e8e90e4bb7c346c"
 dependencies = [
  "libbz2-rs-sys",
 ]
@@ -1412,9 +1396,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.5.48"
+version = "4.5.50"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2134bb3ea021b78629caa971416385309e0131b351b25e01dc16fb54e1b5fae"
+checksum = "0c2cfd7bf8a6017ddaa4e32ffe7403d547790db06bd171c1c53926faab501623"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -1422,9 +1406,9 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.5.48"
+version = "4.5.50"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c2ba64afa3c0a6df7fa517765e31314e983f51dda798ffba27b988194fb65dc9"
+checksum = "0a4c05b9e80c5ccd3a7ef080ad7b6ba7d6fc00a985b8b157197075677c82c7a0"
 dependencies = [
  "anstream",
  "anstyle",
@@ -1434,14 +1418,14 @@ dependencies = [
 
 [[package]]
 name = "clap_derive"
-version = "4.5.47"
+version = "4.5.49"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bbfd7eae0b0f1a6e63d4b13c9c478de77c2eb546fba158ad50b4203dc24b9f9c"
+checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671"
 dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -1613,7 +1597,7 @@ dependencies = [
  "anes",
  "cast",
  "ciborium",
- "clap 4.5.48",
+ "clap 4.5.50",
  "criterion-plot",
  "futures",
  "is-terminal",
@@ -1756,7 +1740,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "strsim",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -1767,7 +1751,7 @@ checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81"
 dependencies = [
  "darling_core",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -1786,14 +1770,13 @@ dependencies = [
 
 [[package]]
 name = "datafusion"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
- "arrow-ipc",
  "arrow-schema",
  "async-trait",
  "bytes",
- "bzip2 0.6.0",
+ "bzip2 0.6.1",
  "chrono",
  "criterion",
  "ctor",
@@ -1803,6 +1786,7 @@ dependencies = [
  "datafusion-common",
  "datafusion-common-runtime",
  "datafusion-datasource",
+ "datafusion-datasource-arrow",
  "datafusion-datasource-avro",
  "datafusion-datasource-csv",
  "datafusion-datasource-json",
@@ -1858,7 +1842,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-benchmarks"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "datafusion",
@@ -1883,7 +1867,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1906,19 +1890,22 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog-listing"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "async-trait",
  "datafusion-catalog",
  "datafusion-common",
  "datafusion-datasource",
+ "datafusion-datasource-parquet",
  "datafusion-execution",
  "datafusion-expr",
  "datafusion-physical-expr",
+ "datafusion-physical-expr-adapter",
  "datafusion-physical-expr-common",
  "datafusion-physical-plan",
  "futures",
+ "itertools 0.14.0",
  "log",
  "object_store",
  "tokio",
@@ -1926,16 +1913,17 @@ dependencies = [
 
 [[package]]
 name = "datafusion-cli"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "async-trait",
  "aws-config",
  "aws-credential-types",
  "chrono",
- "clap 4.5.48",
+ "clap 4.5.50",
  "ctor",
  "datafusion",
+ "datafusion-common",
  "dirs",
  "env_logger",
  "futures",
@@ -1957,7 +1945,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "ahash 0.8.12",
  "apache-avro",
@@ -1967,7 +1955,7 @@ dependencies = [
  "half",
  "hashbrown 0.14.5",
  "hex",
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
  "insta",
  "libc",
  "log",
@@ -1984,7 +1972,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common-runtime"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "futures",
  "log",
@@ -1993,13 +1981,13 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "async-compression",
  "async-trait",
  "bytes",
- "bzip2 0.6.0",
+ "bzip2 0.6.1",
  "chrono",
  "criterion",
  "datafusion-common",
@@ -2026,9 +2014,32 @@ dependencies = [
  "zstd",
 ]
 
+[[package]]
+name = "datafusion-datasource-arrow"
+version = "50.3.0"
+dependencies = [
+ "arrow",
+ "arrow-ipc",
+ "async-trait",
+ "bytes",
+ "chrono",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-datasource",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-plan",
+ "datafusion-session",
+ "futures",
+ "itertools 0.14.0",
+ "object_store",
+ "tokio",
+]
+
 [[package]]
 name = "datafusion-datasource-avro"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "apache-avro",
  "arrow",
@@ -2047,7 +2058,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-csv"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2068,7 +2079,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-json"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2088,7 +2099,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-parquet"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2117,11 +2128,11 @@ dependencies = [
 
 [[package]]
 name = "datafusion-doc"
-version = "50.2.0"
+version = "50.3.0"
 
 [[package]]
 name = "datafusion-examples"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "arrow-flight",
@@ -2140,7 +2151,7 @@ dependencies = [
  "mimalloc",
  "nix",
  "object_store",
- "prost 0.13.5",
+ "prost",
  "rand 0.9.2",
  "serde_json",
  "tempfile",
@@ -2155,7 +2166,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-execution"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2176,7 +2187,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2189,7 +2200,7 @@ dependencies = [
  "datafusion-functions-window-common",
  "datafusion-physical-expr-common",
  "env_logger",
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
  "insta",
  "itertools 0.14.0",
  "paste",
@@ -2200,18 +2211,18 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr-common"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "datafusion-common",
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
  "itertools 0.14.0",
  "paste",
 ]
 
 [[package]]
 name = "datafusion-ffi"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "abi_stable",
  "arrow",
@@ -2226,14 +2237,14 @@ dependencies = [
  "doc-comment",
  "futures",
  "log",
- "prost 0.13.5",
+ "prost",
  "semver",
  "tokio",
 ]
 
 [[package]]
 name = "datafusion-functions"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -2264,7 +2275,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "ahash 0.8.12",
  "arrow",
@@ -2285,7 +2296,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate-common"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "ahash 0.8.12",
  "arrow",
@@ -2298,7 +2309,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-nested"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "arrow-ord",
@@ -2321,7 +2332,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-table"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2335,7 +2346,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2351,7 +2362,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window-common"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "datafusion-common",
  "datafusion-physical-expr-common",
@@ -2359,16 +2370,16 @@ dependencies = [
 
 [[package]]
 name = "datafusion-macros"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "datafusion-doc",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
 name = "datafusion-optimizer"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2384,7 +2395,7 @@ dependencies = [
  "datafusion-physical-expr",
  "datafusion-sql",
  "env_logger",
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
  "insta",
  "itertools 0.14.0",
  "log",
@@ -2395,7 +2406,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "ahash 0.8.12",
  "arrow",
@@ -2408,7 +2419,7 @@ dependencies = [
  "datafusion-physical-expr-common",
  "half",
  "hashbrown 0.14.5",
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
  "insta",
  "itertools 0.14.0",
  "parking_lot",
@@ -2420,7 +2431,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-adapter"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2433,7 +2444,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-common"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "ahash 0.8.12",
  "arrow",
@@ -2445,13 +2456,14 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-optimizer"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "datafusion-common",
  "datafusion-execution",
  "datafusion-expr",
  "datafusion-expr-common",
+ "datafusion-functions",
  "datafusion-physical-expr",
  "datafusion-physical-expr-common",
  "datafusion-physical-plan",
@@ -2464,7 +2476,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-plan"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "ahash 0.8.12",
  "arrow",
@@ -2486,7 +2498,7 @@ dependencies = [
  "futures",
  "half",
  "hashbrown 0.14.5",
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
  "insta",
  "itertools 0.14.0",
  "log",
@@ -2500,22 +2512,35 @@ dependencies = [
 
 [[package]]
 name = "datafusion-proto"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "chrono",
  "datafusion",
+ "datafusion-catalog",
+ "datafusion-catalog-listing",
  "datafusion-common",
+ "datafusion-datasource",
+ "datafusion-datasource-arrow",
+ "datafusion-datasource-avro",
+ "datafusion-datasource-csv",
+ "datafusion-datasource-json",
+ "datafusion-datasource-parquet",
+ "datafusion-execution",
  "datafusion-expr",
  "datafusion-functions",
  "datafusion-functions-aggregate",
+ "datafusion-functions-table",
  "datafusion-functions-window-common",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-plan",
  "datafusion-proto-common",
  "doc-comment",
  "object_store",
  "pbjson",
  "pretty_assertions",
- "prost 0.13.5",
+ "prost",
  "serde",
  "serde_json",
  "tokio",
@@ -2523,19 +2548,19 @@ dependencies = [
 
 [[package]]
 name = "datafusion-proto-common"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "datafusion-common",
  "doc-comment",
  "pbjson",
- "prost 0.13.5",
+ "prost",
  "serde",
 ]
 
 [[package]]
 name = "datafusion-pruning"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2553,7 +2578,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-session"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "async-trait",
  "datafusion-common",
@@ -2565,7 +2590,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-spark"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "bigdecimal",
@@ -2585,7 +2610,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sql"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "bigdecimal",
@@ -2598,7 +2623,7 @@ dependencies = [
  "datafusion-functions-nested",
  "datafusion-functions-window",
  "env_logger",
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
  "insta",
  "itertools 0.14.0",
  "log",
@@ -2611,14 +2636,14 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sqllogictest"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "async-trait",
  "bigdecimal",
  "bytes",
  "chrono",
- "clap 4.5.48",
+ "clap 4.5.50",
  "datafusion",
  "datafusion-spark",
  "datafusion-substrait",
@@ -2645,18 +2670,19 @@ dependencies = [
 
 [[package]]
 name = "datafusion-substrait"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "async-recursion",
  "async-trait",
  "chrono",
  "datafusion",
  "datafusion-functions-aggregate",
+ "half",
  "insta",
  "itertools 0.14.0",
  "object_store",
  "pbjson-types",
- "prost 0.13.5",
+ "prost",
  "serde_json",
  "substrait",
  "tokio",
@@ -2666,7 +2692,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-wasmtest"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "chrono",
  "console_error_panic_hook",
@@ -2677,7 +2703,7 @@ dependencies = [
  "datafusion-optimizer",
  "datafusion-physical-plan",
  "datafusion-sql",
- "getrandom 0.3.3",
+ "getrandom 0.3.4",
  "object_store",
  "tokio",
  "url",
@@ -2741,7 +2767,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -2797,7 +2823,7 @@ dependencies = [
  "enum-ordinalize",
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -2835,7 +2861,7 @@ checksum = "0d28318a75d4aead5c4db25382e8ef717932d0346600cacae6357eb5941bc5ff"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -3096,7 +3122,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -3139,16 +3165,16 @@ dependencies = [
 name = "gen"
 version = "0.1.0"
 dependencies = [
- "pbjson-build 0.8.0",
- "prost-build 0.14.1",
+ "pbjson-build",
+ "prost-build",
 ]
 
 [[package]]
 name = "gen-common"
 version = "0.1.0"
 dependencies = [
- "pbjson-build 0.8.0",
- "prost-build 0.14.1",
+ "pbjson-build",
+ "prost-build",
 ]
 
 [[package]]
@@ -3179,30 +3205,24 @@ dependencies = [
  "cfg-if",
  "js-sys",
  "libc",
- "wasi 0.11.1+wasi-snapshot-preview1",
+ "wasi",
  "wasm-bindgen",
 ]
 
 [[package]]
 name = "getrandom"
-version = "0.3.3"
+version = "0.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
+checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
 dependencies = [
  "cfg-if",
  "js-sys",
  "libc",
  "r-efi",
- "wasi 0.14.7+wasi-0.2.4",
+ "wasip2",
  "wasm-bindgen",
 ]
 
-[[package]]
-name = "gimli"
-version = "0.31.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
-
 [[package]]
 name = "glob"
 version = "0.3.3"
@@ -3234,7 +3254,7 @@ dependencies = [
  "futures-core",
  "futures-sink",
  "http 1.3.1",
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
  "slab",
  "tokio",
  "tokio-util",
@@ -3243,9 +3263,9 @@ dependencies = [
 
 [[package]]
 name = "half"
-version = "2.7.0"
+version = "2.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e54c115d4f30f52c67202f079c5f9d8b49db4691f460fdb0b4c2e838261b2ba5"
+checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
 dependencies = [
  "cfg-if",
  "crunchy",
@@ -3494,7 +3514,7 @@ dependencies = [
  "libc",
  "percent-encoding",
  "pin-project-lite",
- "socket2 0.6.0",
+ "socket2",
  "tokio",
  "tower-service",
  "tracing",
@@ -3665,9 +3685,9 @@ dependencies = [
 
 [[package]]
 name = "indexmap"
-version = "2.11.4"
+version = "2.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4b0f83760fb341a774ed326568e19f5a863af4a952def8c39f9ab92fd95b88e5"
+checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f"
 dependencies = [
  "equivalent",
  "hashbrown 0.16.0",
@@ -3726,17 +3746,6 @@ version = "3.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02"
 
-[[package]]
-name = "io-uring"
-version = "0.7.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b"
-dependencies = [
- "bitflags 2.9.4",
- "cfg-if",
- "libc",
-]
-
 [[package]]
 name = "ipnet"
 version = "2.11.0"
@@ -3824,7 +3833,7 @@ checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -3833,7 +3842,7 @@ version = "0.1.34"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33"
 dependencies = [
- "getrandom 0.3.3",
+ "getrandom 0.3.4",
  "libc",
 ]
 
@@ -3978,7 +3987,7 @@ checksum = "5297962ef19edda4ce33aaa484386e0a5b3d7f2f4e037cbeee00503ef6b29d33"
 dependencies = [
  "anstream",
  "anstyle",
- "clap 4.5.48",
+ "clap 4.5.50",
  "escape8259",
 ]
 
@@ -4124,7 +4133,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c"
 dependencies = [
  "libc",
- "wasi 0.11.1+wasi-snapshot-preview1",
+ "wasi",
  "windows-sys 0.59.0",
 ]
 
@@ -4183,20 +4192,6 @@ dependencies = [
  "windows-sys 0.52.0",
 ]
 
-[[package]]
-name = "num"
-version = "0.4.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23"
-dependencies = [
- "num-bigint",
- "num-complex",
- "num-integer",
- "num-iter",
- "num-rational",
- "num-traits",
-]
-
 [[package]]
 name = "num-bigint"
 version = "0.4.6"
@@ -4232,28 +4227,6 @@ dependencies = [
  "num-traits",
 ]
 
-[[package]]
-name = "num-iter"
-version = "0.1.45"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf"
-dependencies = [
- "autocfg",
- "num-integer",
- "num-traits",
-]
-
-[[package]]
-name = "num-rational"
-version = "0.4.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
-dependencies = [
- "num-bigint",
- "num-integer",
- "num-traits",
-]
-
 [[package]]
 name = "num-traits"
 version = "0.2.19"
@@ -4283,15 +4256,6 @@ dependencies = [
  "objc2-core-foundation",
 ]
 
-[[package]]
-name = "object"
-version = "0.36.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87"
-dependencies = [
- "memchr",
-]
-
 [[package]]
 name = "object_store"
 version = "0.12.4"
@@ -4405,9 +4369,9 @@ dependencies = [
 
 [[package]]
 name = "parquet"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0dbd48ad52d7dccf8ea1b90a3ddbfaea4f69878dd7683e51c507d4bc52b5b27"
+checksum = "7a0f31027ef1af7549f7cec603a9a21dce706d3f8d7c2060a68f43c1773be95a"
 dependencies = [
  "ahash 0.8.12",
  "arrow-array",
@@ -4426,8 +4390,9 @@ dependencies = [
  "half",
  "hashbrown 0.16.0",
  "lz4_flex",
- "num",
  "num-bigint",
+ "num-integer",
+ "num-traits",
  "object_store",
  "paste",
  "ring",
@@ -4462,7 +4427,7 @@ dependencies = [
  "regex",
  "regex-syntax",
  "structmeta",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -4473,26 +4438,14 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
 
 [[package]]
 name = "pbjson"
-version = "0.7.0"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c7e6349fa080353f4a597daffd05cb81572a9c031a6d4fff7e504947496fcc68"
+checksum = "898bac3fa00d0ba57a4e8289837e965baa2dee8c3749f3b11d45a64b4223d9c3"
 dependencies = [
- "base64 0.21.7",
+ "base64 0.22.1",
  "serde",
 ]
 
-[[package]]
-name = "pbjson-build"
-version = "0.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6eea3058763d6e656105d1403cb04e0a41b7bbac6362d413e7c33be0c32279c9"
-dependencies = [
- "heck 0.5.0",
- "itertools 0.13.0",
- "prost 0.13.5",
- "prost-types 0.13.5",
-]
-
 [[package]]
 name = "pbjson-build"
 version = "0.8.0"
@@ -4501,22 +4454,22 @@ checksum = "af22d08a625a2213a78dbb0ffa253318c5c79ce3133d32d296655a7bdfb02095"
 dependencies = [
  "heck 0.5.0",
  "itertools 0.14.0",
- "prost 0.14.1",
- "prost-types 0.14.1",
+ "prost",
+ "prost-types",
 ]
 
 [[package]]
 name = "pbjson-types"
-version = "0.7.0"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e54e5e7bfb1652f95bc361d76f3c780d8e526b134b85417e774166ee941f0887"
+checksum = "8e748e28374f10a330ee3bb9f29b828c0ac79831a32bab65015ad9b661ead526"
 dependencies = [
  "bytes",
  "chrono",
  "pbjson",
- "pbjson-build 0.7.0",
- "prost 0.13.5",
- "prost-build 0.13.5",
+ "pbjson-build",
+ "prost",
+ "prost-build",
  "serde",
 ]
 
@@ -4533,7 +4486,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
 dependencies = [
  "fixedbitset",
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
 ]
 
 [[package]]
@@ -4544,7 +4497,7 @@ checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455"
 dependencies = [
  "fixedbitset",
  "hashbrown 0.15.5",
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
  "serde",
 ]
 
@@ -4602,7 +4555,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -4675,7 +4628,7 @@ dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -4750,7 +4703,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
 dependencies = [
  "proc-macro2",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -4795,16 +4748,6 @@ dependencies = [
  "unicode-ident",
 ]
 
-[[package]]
-name = "prost"
-version = "0.13.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5"
-dependencies = [
- "bytes",
- "prost-derive 0.13.5",
-]
-
 [[package]]
 name = "prost"
 version = "0.14.1"
@@ -4812,27 +4755,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d"
 dependencies = [
  "bytes",
- "prost-derive 0.14.1",
-]
-
-[[package]]
-name = "prost-build"
-version = "0.13.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf"
-dependencies = [
- "heck 0.5.0",
- "itertools 0.14.0",
- "log",
- "multimap",
- "once_cell",
- "petgraph 0.7.1",
- "prettyplease",
- "prost 0.13.5",
- "prost-types 0.13.5",
- "regex",
- "syn 2.0.106",
- "tempfile",
+ "prost-derive",
 ]
 
 [[package]]
@@ -4848,26 +4771,13 @@ dependencies = [
  "once_cell",
  "petgraph 0.7.1",
  "prettyplease",
- "prost 0.14.1",
- "prost-types 0.14.1",
+ "prost",
+ "prost-types",
  "regex",
- "syn 2.0.106",
+ "syn 2.0.108",
  "tempfile",
 ]
 
-[[package]]
-name = "prost-derive"
-version = "0.13.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
-dependencies = [
- "anyhow",
- "itertools 0.14.0",
- "proc-macro2",
- "quote",
- "syn 2.0.106",
-]
-
 [[package]]
 name = "prost-derive"
 version = "0.14.1"
@@ -4878,16 +4788,7 @@ dependencies = [
  "itertools 0.14.0",
  "proc-macro2",
  "quote",
- "syn 2.0.106",
-]
-
-[[package]]
-name = "prost-types"
-version = "0.13.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16"
-dependencies = [
- "prost 0.13.5",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -4896,7 +4797,7 @@ version = "0.14.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b9b4db3d6da204ed77bb26ba83b6122a73aeb2e87e25fbf7ad2e84c4ccbf8f72"
 dependencies = [
- "prost 0.14.1",
+ "prost",
 ]
 
 [[package]]
@@ -4939,9 +4840,9 @@ dependencies = [
 
 [[package]]
 name = "pyo3"
-version = "0.25.1"
+version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8970a78afe0628a3e3430376fc5fd76b6b45c4d43360ffd6cdd40bdde72b682a"
+checksum = "7ba0117f4212101ee6544044dae45abe1083d30ce7b29c4b5cbdfa2354e07383"
 dependencies = [
  "indoc",
  "libc",
@@ -4956,19 +4857,18 @@ dependencies = [
 
 [[package]]
 name = "pyo3-build-config"
-version = "0.25.1"
+version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "458eb0c55e7ece017adeba38f2248ff3ac615e53660d7c71a238d7d2a01c7598"
+checksum = "4fc6ddaf24947d12a9aa31ac65431fb1b851b8f4365426e182901eabfb87df5f"
 dependencies = [
- "once_cell",
  "target-lexicon",
 ]
 
 [[package]]
 name = "pyo3-ffi"
-version = "0.25.1"
+version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7114fe5457c61b276ab77c5055f206295b812608083644a5c5b2640c3102565c"
+checksum = "025474d3928738efb38ac36d4744a74a400c901c7596199e20e45d98eb194105"
 dependencies = [
  "libc",
  "pyo3-build-config",
@@ -4976,27 +4876,27 @@ dependencies = [
 
 [[package]]
 name = "pyo3-macros"
-version = "0.25.1"
+version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a8725c0a622b374d6cb051d11a0983786448f7785336139c3c94f5aa6bef7e50"
+checksum = "2e64eb489f22fe1c95911b77c44cc41e7c19f3082fc81cce90f657cdc42ffded"
 dependencies = [
  "proc-macro2",
  "pyo3-macros-backend",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
 name = "pyo3-macros-backend"
-version = "0.25.1"
+version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4109984c22491085343c05b0dbc54ddc405c3cf7b4374fc533f5c3313a572ccc"
+checksum = "100246c0ecf400b475341b8455a9213344569af29a3c841d29270e53102e0fcf"
 dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "pyo3-build-config",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -5028,7 +4928,7 @@ dependencies = [
  "quinn-udp",
  "rustc-hash",
  "rustls",
- "socket2 0.6.0",
+ "socket2",
  "thiserror",
  "tokio",
  "tracing",
@@ -5042,7 +4942,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31"
 dependencies = [
  "bytes",
- "getrandom 0.3.3",
+ "getrandom 0.3.4",
  "lru-slab",
  "rand 0.9.2",
  "ring",
@@ -5065,7 +4965,7 @@ dependencies = [
  "cfg_aliases",
  "libc",
  "once_cell",
- "socket2 0.6.0",
+ "socket2",
  "tracing",
  "windows-sys 0.60.2",
 ]
@@ -5157,7 +5057,7 @@ version = "0.9.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
 dependencies = [
- "getrandom 0.3.3",
+ "getrandom 0.3.4",
 ]
 
 [[package]]
@@ -5207,7 +5107,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b"
 dependencies = [
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -5256,14 +5156,14 @@ checksum = "1165225c21bff1f3bbce98f5a1f889949bc902d3575308cc7b0de30b4f6d27c7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
 name = "regex"
-version = "1.11.3"
+version = "1.12.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b5288124840bee7b386bc413c487869b360b2b4ec421ea56425128692f2a82c"
+checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -5273,9 +5173,9 @@ dependencies = [
 
 [[package]]
 name = "regex-automata"
-version = "0.4.11"
+version = "0.4.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "833eb9ce86d40ef33cb1306d8accf7bc8ec2bfea4355cbdebb3df68b40925cad"
+checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -5439,7 +5339,7 @@ dependencies = [
  "regex",
  "relative-path",
  "rustc_version",
- "syn 2.0.106",
+ "syn 2.0.108",
  "unicode-ident",
 ]
 
@@ -5451,7 +5351,7 @@ checksum = "b3a8fb4672e840a587a66fc577a5491375df51ddb88f2a2c2a792598c326fe14"
 dependencies = [
  "quote",
  "rand 0.8.5",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -5471,12 +5371,6 @@ dependencies = [
  "serde_json",
 ]
 
-[[package]]
-name = "rustc-demangle"
-version = "0.1.26"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace"
-
 [[package]]
 name = "rustc-hash"
 version = "2.1.1"
@@ -5660,7 +5554,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "serde_derive_internals",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -5751,7 +5645,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -5762,7 +5656,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -5786,7 +5680,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -5798,7 +5692,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "serde",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -5823,7 +5717,7 @@ dependencies = [
  "chrono",
  "hex",
  "indexmap 1.9.3",
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
  "schemars 0.9.0",
  "schemars 1.0.4",
  "serde",
@@ -5842,7 +5736,7 @@ dependencies = [
  "darling",
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -5851,7 +5745,7 @@ version = "0.9.34+deprecated"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
 dependencies = [
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
  "itoa",
  "ryu",
  "serde",
@@ -5964,16 +5858,6 @@ dependencies = [
  "cmake",
 ]
 
-[[package]]
-name = "socket2"
-version = "0.5.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
-dependencies = [
- "libc",
- "windows-sys 0.52.0",
-]
-
 [[package]]
 name = "socket2"
 version = "0.6.0"
@@ -6028,7 +5912,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -6076,7 +5960,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "structmeta-derive",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -6087,7 +5971,7 @@ checksum = "152a0b65a590ff6c3da95cabe2353ee04e6167c896b28e3b14478c2636c922fc"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -6136,7 +6020,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -6148,7 +6032,7 @@ dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -6163,18 +6047,18 @@ dependencies = [
 
 [[package]]
 name = "substrait"
-version = "0.58.0"
+version = "0.59.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de6d24c270c6c672a86c183c3a8439ba46c1936f93cf7296aa692de3b0ff0228"
+checksum = "540683f325ab9ab1a2008bc24588f3e76f63b6a3f52bc47e121122376a063639"
 dependencies = [
  "heck 0.5.0",
  "pbjson",
- "pbjson-build 0.7.0",
+ "pbjson-build",
  "pbjson-types",
  "prettyplease",
- "prost 0.13.5",
- "prost-build 0.13.5",
- "prost-types 0.13.5",
+ "prost",
+ "prost-build",
+ "prost-types",
  "protobuf-src",
  "regress",
  "schemars 0.8.22",
@@ -6182,7 +6066,7 @@ dependencies = [
  "serde",
  "serde_json",
  "serde_yaml",
- "syn 2.0.106",
+ "syn 2.0.108",
  "typify",
  "walkdir",
 ]
@@ -6206,9 +6090,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.106"
+version = "2.0.108"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
+checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -6232,7 +6116,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -6268,7 +6152,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16"
 dependencies = [
  "fastrand",
- "getrandom 0.3.3",
+ "getrandom 0.3.4",
  "once_cell",
  "rustix",
  "windows-sys 0.61.0",
@@ -6349,7 +6233,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -6449,33 +6333,30 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.47.1"
+version = "1.48.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038"
+checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408"
 dependencies = [
- "backtrace",
  "bytes",
- "io-uring",
  "libc",
  "mio",
  "parking_lot",
  "pin-project-lite",
  "signal-hook-registry",
- "slab",
- "socket2 0.6.0",
+ "socket2",
  "tokio-macros",
- "windows-sys 0.59.0",
+ "windows-sys 0.61.0",
 ]
 
 [[package]]
 name = "tokio-macros"
-version = "2.5.0"
+version = "2.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
+checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -6498,7 +6379,7 @@ dependencies = [
  "postgres-protocol",
  "postgres-types",
  "rand 0.9.2",
- "socket2 0.6.0",
+ "socket2",
  "tokio",
  "tokio-util",
  "whoami",
@@ -6568,7 +6449,7 @@ version = "0.23.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f3effe7c0e86fdff4f69cdd2ccc1b96f933e24811c5441d44904e8683e27184b"
 dependencies = [
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
  "toml_datetime",
  "toml_parser",
  "winnow",
@@ -6585,9 +6466,9 @@ dependencies = [
 
 [[package]]
 name = "tonic"
-version = "0.13.1"
+version = "0.14.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7e581ba15a835f4d9ea06c55ab1bd4dce26fc53752c69a04aac00703bfb49ba9"
+checksum = "eb7613188ce9f7df5bfe185db26c5814347d110db17920415cf2fbcad85e7203"
 dependencies = [
  "async-trait",
  "axum",
@@ -6602,8 +6483,8 @@ dependencies = [
  "hyper-util",
  "percent-encoding",
  "pin-project",
- "prost 0.13.5",
- "socket2 0.5.10",
+ "socket2",
+ "sync_wrapper",
  "tokio",
  "tokio-stream",
  "tower",
@@ -6612,6 +6493,17 @@ dependencies = [
  "tracing",
 ]
 
+[[package]]
+name = "tonic-prost"
+version = "0.14.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "66bd50ad6ce1252d87ef024b3d64fe4c3cf54a86fb9ef4c631fdd0ded7aeaa67"
+dependencies = [
+ "bytes",
+ "prost",
+ "tonic",
+]
+
 [[package]]
 name = "tower"
 version = "0.5.2"
@@ -6620,7 +6512,7 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
 dependencies = [
  "futures-core",
  "futures-util",
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
  "pin-project-lite",
  "slab",
  "sync_wrapper",
@@ -6680,7 +6572,7 @@ checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -6788,7 +6680,7 @@ dependencies = [
  "semver",
  "serde",
  "serde_json",
- "syn 2.0.106",
+ "syn 2.0.108",
  "thiserror",
  "unicode-ident",
 ]
@@ -6806,7 +6698,7 @@ dependencies = [
  "serde",
  "serde_json",
  "serde_tokenstream",
- "syn 2.0.106",
+ "syn 2.0.108",
  "typify-impl",
 ]
 
@@ -6915,7 +6807,7 @@ version = "1.18.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2"
 dependencies = [
- "getrandom 0.3.3",
+ "getrandom 0.3.4",
  "js-sys",
  "serde",
  "wasm-bindgen",
@@ -6964,15 +6856,6 @@ version = "0.11.1+wasi-snapshot-preview1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
 
-[[package]]
-name = "wasi"
-version = "0.14.7+wasi-0.2.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c"
-dependencies = [
- "wasip2",
-]
-
 [[package]]
 name = "wasip2"
 version = "1.0.1+wasi-0.2.4"
@@ -7011,7 +6894,7 @@ dependencies = [
  "log",
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
  "wasm-bindgen-shared",
 ]
 
@@ -7046,7 +6929,7 @@ checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -7081,7 +6964,7 @@ checksum = "b673bca3298fe582aeef8352330ecbad91849f85090805582400850f8270a2e8"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -7226,7 +7109,7 @@ checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -7237,7 +7120,7 @@ checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -7553,7 +7436,7 @@ checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
  "synstructure",
 ]
 
@@ -7574,7 +7457,7 @@ checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -7594,7 +7477,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
  "synstructure",
 ]
 
@@ -7634,7 +7517,7 @@ checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
diff --git a/Cargo.toml b/Cargo.toml
index dd0b20de528a..1cfb23bb183d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -22,6 +22,7 @@ members = [
     "datafusion/catalog",
     "datafusion/catalog-listing",
     "datafusion/datasource",
+    "datafusion/datasource-arrow",
     "datafusion/datasource-avro",
     "datafusion/datasource-csv",
     "datafusion/datasource-json",
@@ -78,7 +79,7 @@ repository = "https://github.com/apache/datafusion"
 # Define Minimum Supported Rust Version (MSRV)
 rust-version = "1.87.0"
 # Define DataFusion version
-version = "50.2.0"
+version = "50.3.0"
 
 [workspace.dependencies]
 # We turn off default-features for some dependencies here so the workspaces which inherit them can
@@ -90,19 +91,19 @@ ahash = { version = "0.8", default-features = false, features = [
     "runtime-rng",
 ] }
 apache-avro = { version = "0.20", default-features = false }
-arrow = { version = "56.2.0", features = [
+arrow = { version = "57.0.0", features = [
     "prettyprint",
     "chrono-tz",
 ] }
-arrow-buffer = { version = "56.2.0", default-features = false }
-arrow-flight = { version = "56.2.0", features = [
+arrow-buffer = { version = "57.0.0", default-features = false }
+arrow-flight = { version = "57.0.0", features = [
     "flight-sql-experimental",
 ] }
-arrow-ipc = { version = "56.2.0", default-features = false, features = [
+arrow-ipc = { version = "57.0.0", default-features = false, features = [
     "lz4",
 ] }
-arrow-ord = { version = "56.2.0", default-features = false }
-arrow-schema = { version = "56.2.0", default-features = false }
+arrow-ord = { version = "57.0.0", default-features = false }
+arrow-schema = { version = "57.0.0", default-features = false }
 async-trait = "0.1.89"
 bigdecimal = "0.4.8"
 bytes = "1.10"
@@ -110,73 +111,75 @@ chrono = { version = "0.4.42", default-features = false }
 criterion = "0.5.1"
 ctor = "0.4.3"
 dashmap = "6.0.1"
-datafusion = { path = "datafusion/core", version = "50.2.0", default-features = false }
-datafusion-catalog = { path = "datafusion/catalog", version = "50.2.0" }
-datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "50.2.0" }
-datafusion-common = { path = "datafusion/common", version = "50.2.0", default-features = false }
-datafusion-common-runtime = { path = "datafusion/common-runtime", version = "50.2.0" }
-datafusion-datasource = { path = "datafusion/datasource", version = "50.2.0", default-features = false }
-datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "50.2.0", default-features = false }
-datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "50.2.0", default-features = false }
-datafusion-datasource-json = { path = "datafusion/datasource-json", version = "50.2.0", default-features = false }
-datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "50.2.0", default-features = false }
-datafusion-doc = { path = "datafusion/doc", version = "50.2.0" }
-datafusion-execution = { path = "datafusion/execution", version = "50.2.0", default-features = false }
-datafusion-expr = { path = "datafusion/expr", version = "50.2.0", default-features = false }
-datafusion-expr-common = { path = "datafusion/expr-common", version = "50.2.0" }
-datafusion-ffi = { path = "datafusion/ffi", version = "50.2.0" }
-datafusion-functions = { path = "datafusion/functions", version = "50.2.0" }
-datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "50.2.0" }
-datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "50.2.0" }
-datafusion-functions-nested = { path = "datafusion/functions-nested", version = "50.2.0", default-features = false }
-datafusion-functions-table = { path = "datafusion/functions-table", version = "50.2.0" }
-datafusion-functions-window = { path = "datafusion/functions-window", version = "50.2.0" }
-datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "50.2.0" }
-datafusion-macros = { path = "datafusion/macros", version = "50.2.0" }
-datafusion-optimizer = { path = "datafusion/optimizer", version = "50.2.0", default-features = false }
-datafusion-physical-expr = { path = "datafusion/physical-expr", version = "50.2.0", default-features = false }
-datafusion-physical-expr-adapter = { path = "datafusion/physical-expr-adapter", version = "50.2.0", default-features = false }
-datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "50.2.0", default-features = false }
-datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "50.2.0" }
-datafusion-physical-plan = { path = "datafusion/physical-plan", version = "50.2.0" }
-datafusion-proto = { path = "datafusion/proto", version = "50.2.0" }
-datafusion-proto-common = { path = "datafusion/proto-common", version = "50.2.0" }
-datafusion-pruning = { path = "datafusion/pruning", version = "50.2.0" }
-datafusion-session = { path = "datafusion/session", version = "50.2.0" }
-datafusion-spark = { path = "datafusion/spark", version = "50.2.0" }
-datafusion-sql = { path = "datafusion/sql", version = "50.2.0" }
-datafusion-substrait = { path = "datafusion/substrait", version = "50.2.0" }
+datafusion = { path = "datafusion/core", version = "50.3.0", default-features = false }
+datafusion-catalog = { path = "datafusion/catalog", version = "50.3.0" }
+datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "50.3.0" }
+datafusion-common = { path = "datafusion/common", version = "50.3.0", default-features = false }
+datafusion-common-runtime = { path = "datafusion/common-runtime", version = "50.3.0" }
+datafusion-datasource = { path = "datafusion/datasource", version = "50.3.0", default-features = false }
+datafusion-datasource-arrow = { path = "datafusion/datasource-arrow", version = "50.3.0", default-features = false }
+datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "50.3.0", default-features = false }
+datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "50.3.0", default-features = false }
+datafusion-datasource-json = { path = "datafusion/datasource-json", version = "50.3.0", default-features = false }
+datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "50.3.0", default-features = false }
+datafusion-doc = { path = "datafusion/doc", version = "50.3.0" }
+datafusion-execution = { path = "datafusion/execution", version = "50.3.0", default-features = false }
+datafusion-expr = { path = "datafusion/expr", version = "50.3.0", default-features = false }
+datafusion-expr-common = { path = "datafusion/expr-common", version = "50.3.0" }
+datafusion-ffi = { path = "datafusion/ffi", version = "50.3.0" }
+datafusion-functions = { path = "datafusion/functions", version = "50.3.0" }
+datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "50.3.0" }
+datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "50.3.0" }
+datafusion-functions-nested = { path = "datafusion/functions-nested", version = "50.3.0", default-features = false }
+datafusion-functions-table = { path = "datafusion/functions-table", version = "50.3.0" }
+datafusion-functions-window = { path = "datafusion/functions-window", version = "50.3.0" }
+datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "50.3.0" }
+datafusion-macros = { path = "datafusion/macros", version = "50.3.0" }
+datafusion-optimizer = { path = "datafusion/optimizer", version = "50.3.0", default-features = false }
+datafusion-physical-expr = { path = "datafusion/physical-expr", version = "50.3.0", default-features = false }
+datafusion-physical-expr-adapter = { path = "datafusion/physical-expr-adapter", version = "50.3.0", default-features = false }
+datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "50.3.0", default-features = false }
+datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "50.3.0" }
+datafusion-physical-plan = { path = "datafusion/physical-plan", version = "50.3.0" }
+datafusion-proto = { path = "datafusion/proto", version = "50.3.0" }
+datafusion-proto-common = { path = "datafusion/proto-common", version = "50.3.0" }
+datafusion-pruning = { path = "datafusion/pruning", version = "50.3.0" }
+datafusion-session = { path = "datafusion/session", version = "50.3.0" }
+datafusion-spark = { path = "datafusion/spark", version = "50.3.0" }
+datafusion-sql = { path = "datafusion/sql", version = "50.3.0" }
+datafusion-substrait = { path = "datafusion/substrait", version = "50.3.0" }
+
 doc-comment = "0.3"
 env_logger = "0.11"
 futures = "0.3"
 half = { version = "2.7.0", default-features = false }
 hashbrown = { version = "0.14.5", features = ["raw"] }
 hex = { version = "0.4.3" }
-indexmap = "2.11.4"
+indexmap = "2.12.0"
+insta = { version = "1.43.2", features = ["glob", "filters"] }
 itertools = "0.14"
 log = "^0.4"
 object_store = { version = "0.12.4", default-features = false }
 parking_lot = "0.12"
-parquet = { version = "56.2.0", default-features = false, features = [
+parquet = { version = "57.0.0", default-features = false, features = [
     "arrow",
     "async",
     "object_store",
 ] }
-pbjson = { version = "0.7.0" }
-pbjson-types = "0.7"
+pbjson = { version = "0.8.0" }
+pbjson-types = "0.8"
 # Should match arrow-flight's version of prost.
-insta = { version = "1.43.2", features = ["glob", "filters"] }
-prost = "0.13.1"
+prost = "0.14.1"
 rand = "0.9"
 recursive = "0.1.1"
-regex = "1.11"
+regex = "1.12"
 rstest = "0.25.0"
 serde_json = "1"
 sqlparser = { version = "0.59.0", default-features = false, features = ["std", "visitor"] }
 tempfile = "3"
 testcontainers = { version = "0.24", features = ["default"] }
 testcontainers-modules = { version = "0.12" }
-tokio = { version = "1.47", features = ["macros", "rt", "sync"] }
+tokio = { version = "1.48", features = ["macros", "rt", "sync"] }
 url = "2.5.7"
 
 [workspace.lints.clippy]
diff --git a/README.md b/README.md
index 4c4b955176b2..5191496eaafe 100644
--- a/README.md
+++ b/README.md
@@ -64,7 +64,7 @@ See [use cases] for examples. The following related subprojects target end users
   DataFusion.
 
 "Out of the box,"
-DataFusion offers [SQL] and [`Dataframe`] APIs, excellent [performance],
+DataFusion offers [SQL](https://datafusion.apache.org/user-guide/sql/index.html) and [Dataframe](https://datafusion.apache.org/user-guide/dataframe.html) APIs, excellent [performance],
 built-in support for CSV, Parquet, JSON, and Avro, extensive customization, and
 a great community.
 
diff --git a/benchmarks/src/imdb/run.rs b/benchmarks/src/imdb/run.rs
index 3d58d5f54d4b..11bd424ba686 100644
--- a/benchmarks/src/imdb/run.rs
+++ b/benchmarks/src/imdb/run.rs
@@ -534,7 +534,7 @@ mod tests {
             let plan = ctx.sql(&query).await?;
             let plan = plan.into_optimized_plan()?;
             let bytes = logical_plan_to_bytes(&plan)?;
-            let plan2 = logical_plan_from_bytes(&bytes, &ctx)?;
+            let plan2 = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
             let plan_formatted = format!("{}", plan.display_indent());
             let plan2_formatted = format!("{}", plan2.display_indent());
             assert_eq!(plan_formatted, plan2_formatted);
diff --git a/benchmarks/src/nlj.rs b/benchmarks/src/nlj.rs
index e412c0ade8a8..7d1e14f69439 100644
--- a/benchmarks/src/nlj.rs
+++ b/benchmarks/src/nlj.rs
@@ -146,6 +146,45 @@ const NLJ_QUERIES: &[&str] = &[
         FULL JOIN range(30000) AS t2
         ON (t1.value > t2.value);
     "#,
+    // Q13: LEFT SEMI 30K x 30K | HIGH 99.9%
+    r#"
+        SELECT t1.*
+        FROM range(30000) AS t1
+        LEFT SEMI JOIN range(30000) AS t2
+        ON t1.value < t2.value;
+    "#,
+    // Q14: LEFT ANTI 30K x 30K | LOW 0.003%
+    r#"
+        SELECT t1.*
+        FROM range(30000) AS t1
+        LEFT ANTI JOIN range(30000) AS t2
+        ON t1.value < t2.value;
+    "#,
+    // Q15: RIGHT SEMI 30K x 30K | HIGH 99.9%
+    r#"
+        SELECT t1.*
+        FROM range(30000) AS t2
+        RIGHT SEMI JOIN range(30000) AS t1
+        ON t2.value < t1.value;
+    "#,
+    // Q16: RIGHT ANTI 30K x 30K | LOW 0.003%
+    r#"
+        SELECT t1.*
+        FROM range(30000) AS t2
+        RIGHT ANTI JOIN range(30000) AS t1
+        ON t2.value < t1.value;
+    "#,
+    // Q17: LEFT MARK | HIGH 99.9%
+    r#"
+        SELECT *
+        FROM range(30000) AS t2(k2)
+        WHERE k2 > 0
+        OR EXISTS (
+            SELECT 1
+            FROM range(30000) AS t1(k1)
+            WHERE t2.k2 > t1.k1
+        );
+    "#,
 ];
 
 impl RunOpt {
diff --git a/benchmarks/src/tpch/run.rs b/benchmarks/src/tpch/run.rs
index b93bdf254a27..cc59b7803036 100644
--- a/benchmarks/src/tpch/run.rs
+++ b/benchmarks/src/tpch/run.rs
@@ -92,6 +92,15 @@ pub struct RunOpt {
     #[structopt(short = "j", long = "prefer_hash_join", default_value = "true")]
     prefer_hash_join: BoolDefaultTrue,
 
+    /// If true then Piecewise Merge Join can be used, if false then it will opt for Nested Loop Join
+    /// True by default.
+    #[structopt(
+        short = "j",
+        long = "enable_piecewise_merge_join",
+        default_value = "false"
+    )]
+    enable_piecewise_merge_join: BoolDefaultTrue,
+
     /// Mark the first column of each table as sorted in ascending order.
     /// The tables should have been created with the `--sort` option for this to have any effect.
     #[structopt(short = "t", long = "sorted")]
@@ -112,6 +121,8 @@ impl RunOpt {
             .config()?
             .with_collect_statistics(!self.disable_statistics);
         config.options_mut().optimizer.prefer_hash_join = self.prefer_hash_join;
+        config.options_mut().optimizer.enable_piecewise_merge_join =
+            self.enable_piecewise_merge_join;
         let rt_builder = self.common.runtime_env_builder()?;
         let ctx = SessionContext::new_with_config_rt(config, rt_builder.build_arc()?);
         // register tables
@@ -379,6 +390,7 @@ mod tests {
             output_path: None,
             disable_statistics: false,
             prefer_hash_join: true,
+            enable_piecewise_merge_join: false,
             sorted: false,
         };
         opt.register_tables(&ctx).await?;
@@ -387,7 +399,7 @@ mod tests {
             let plan = ctx.sql(&query).await?;
             let plan = plan.into_optimized_plan()?;
             let bytes = logical_plan_to_bytes(&plan)?;
-            let plan2 = logical_plan_from_bytes(&bytes, &ctx)?;
+            let plan2 = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
             let plan_formatted = format!("{}", plan.display_indent());
             let plan2_formatted = format!("{}", plan2.display_indent());
             assert_eq!(plan_formatted, plan2_formatted);
@@ -416,6 +428,7 @@ mod tests {
             output_path: None,
             disable_statistics: false,
             prefer_hash_join: true,
+            enable_piecewise_merge_join: false,
             sorted: false,
         };
         opt.register_tables(&ctx).await?;
diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index d186cd711945..f3069b492352 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -40,7 +40,7 @@ async-trait = { workspace = true }
 aws-config = "1.8.7"
 aws-credential-types = "1.2.7"
 chrono = { workspace = true }
-clap = { version = "4.5.47", features = ["derive", "cargo"] }
+clap = { version = "4.5.50", features = ["cargo", "derive"] }
 datafusion = { workspace = true, features = [
     "avro",
     "compression",
@@ -55,6 +55,7 @@ datafusion = { workspace = true, features = [
     "sql",
     "unicode_expressions",
 ] }
+datafusion-common = { workspace = true }
 dirs = "6.0.0"
 env_logger = { workspace = true }
 futures = { workspace = true }
@@ -65,7 +66,7 @@ parking_lot = { workspace = true }
 parquet = { workspace = true, default-features = false }
 regex = { workspace = true }
 rustyline = "17.0"
-tokio = { workspace = true, features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot", "signal"] }
+tokio = { workspace = true, features = ["macros", "parking_lot", "rt", "rt-multi-thread", "signal", "sync"] }
 url = { workspace = true }
 
 [dev-dependencies]
diff --git a/datafusion-cli/src/command.rs b/datafusion-cli/src/command.rs
index 48fb37e8a888..3fbfe5680cfc 100644
--- a/datafusion-cli/src/command.rs
+++ b/datafusion-cli/src/command.rs
@@ -128,7 +128,7 @@ impl Command {
                     let profile_mode = mode
                         .parse()
                         .map_err(|_|
-                            exec_datafusion_err!("Failed to parse input: {mode}. Valid options are disabled, enabled")
+                            exec_datafusion_err!("Failed to parse input: {mode}. Valid options are disabled, summary, trace")
                         )?;
                     print_options
                         .instrumented_registry
@@ -165,7 +165,7 @@ impl Command {
                 ("\\pset [NAME [VALUE]]", "set table output option\n(format)")
             }
             Self::ObjectStoreProfileMode(_) => (
-                "\\object_store_profiling (disabled|enabled)",
+                "\\object_store_profiling (disabled|summary|trace)",
                 "print or set object store profile mode",
             ),
         }
@@ -312,13 +312,22 @@ mod tests {
             InstrumentedObjectStoreMode::default()
         );
 
-        cmd = "object_store_profiling enabled"
+        cmd = "object_store_profiling summary"
             .parse()
             .expect("expected parse to succeed");
         assert!(cmd.execute(&ctx, &mut print_options).await.is_ok());
         assert_eq!(
             print_options.instrumented_registry.instrument_mode(),
-            InstrumentedObjectStoreMode::Enabled
+            InstrumentedObjectStoreMode::Summary
+        );
+
+        cmd = "object_store_profiling trace"
+            .parse()
+            .expect("expected parse to succeed");
+        assert!(cmd.execute(&ctx, &mut print_options).await.is_ok());
+        assert_eq!(
+            print_options.instrumented_registry.instrument_mode(),
+            InstrumentedObjectStoreMode::Trace
         );
 
         cmd = "object_store_profiling does_not_exist"
diff --git a/datafusion-cli/src/functions.rs b/datafusion-cli/src/functions.rs
index 3ec446c51583..d23b12469e38 100644
--- a/datafusion-cli/src/functions.rs
+++ b/datafusion-cli/src/functions.rs
@@ -419,7 +419,9 @@ impl TableFunctionImpl for ParquetMetadataFunc {
                     stats_max_value_arr.push(None);
                 };
                 compression_arr.push(format!("{:?}", column.compression()));
-                encodings_arr.push(format!("{:?}", column.encodings()));
+                // need to collect into Vec to format
+                let encodings: Vec<_> = column.encodings().collect();
+                encodings_arr.push(format!("{:?}", encodings));
                 index_page_offset_arr.push(column.index_page_offset());
                 dictionary_page_offset_arr.push(column.dictionary_page_offset());
                 data_page_offset_arr.push(column.data_page_offset());
diff --git a/datafusion-cli/src/helper.rs b/datafusion-cli/src/helper.rs
index 64c34c473736..219637b3460e 100644
--- a/datafusion-cli/src/helper.rs
+++ b/datafusion-cli/src/helper.rs
@@ -24,6 +24,7 @@ use crate::highlighter::{NoSyntaxHighlighter, SyntaxHighlighter};
 
 use datafusion::sql::parser::{DFParser, Statement};
 use datafusion::sql::sqlparser::dialect::dialect_from_str;
+use datafusion_common::config::Dialect;
 
 use rustyline::completion::{Completer, FilenameCompleter, Pair};
 use rustyline::error::ReadlineError;
@@ -34,12 +35,12 @@ use rustyline::{Context, Helper, Result};
 
 pub struct CliHelper {
     completer: FilenameCompleter,
-    dialect: String,
+    dialect: Dialect,
     highlighter: Box<dyn Highlighter>,
 }
 
 impl CliHelper {
-    pub fn new(dialect: &str, color: bool) -> Self {
+    pub fn new(dialect: &Dialect, color: bool) -> Self {
         let highlighter: Box<dyn Highlighter> = if !color {
             Box::new(NoSyntaxHighlighter {})
         } else {
@@ -47,20 +48,20 @@ impl CliHelper {
         };
         Self {
             completer: FilenameCompleter::new(),
-            dialect: dialect.into(),
+            dialect: *dialect,
             highlighter,
         }
     }
 
-    pub fn set_dialect(&mut self, dialect: &str) {
-        if dialect != self.dialect {
-            self.dialect = dialect.to_string();
+    pub fn set_dialect(&mut self, dialect: &Dialect) {
+        if *dialect != self.dialect {
+            self.dialect = *dialect;
         }
     }
 
     fn validate_input(&self, input: &str) -> Result<ValidationResult> {
         if let Some(sql) = input.strip_suffix(';') {
-            let dialect = match dialect_from_str(&self.dialect) {
+            let dialect = match dialect_from_str(self.dialect) {
                 Some(dialect) => dialect,
                 None => {
                     return Ok(ValidationResult::Invalid(Some(format!(
@@ -97,7 +98,7 @@ impl CliHelper {
 
 impl Default for CliHelper {
     fn default() -> Self {
-        Self::new("generic", false)
+        Self::new(&Dialect::Generic, false)
     }
 }
 
@@ -289,7 +290,7 @@ mod tests {
         );
 
         // valid in postgresql dialect
-        validator.set_dialect("postgresql");
+        validator.set_dialect(&Dialect::PostgreSQL);
         let result =
             readline_direct(Cursor::new(r"select 1 # 2;".as_bytes()), &validator)?;
         assert!(matches!(result, ValidationResult::Valid(None)));
diff --git a/datafusion-cli/src/highlighter.rs b/datafusion-cli/src/highlighter.rs
index 7a886b94740b..f4e57a2e3593 100644
--- a/datafusion-cli/src/highlighter.rs
+++ b/datafusion-cli/src/highlighter.rs
@@ -27,6 +27,7 @@ use datafusion::sql::sqlparser::{
     keywords::Keyword,
     tokenizer::{Token, Tokenizer},
 };
+use datafusion_common::config;
 use rustyline::highlight::{CmdKind, Highlighter};
 
 /// The syntax highlighter.
@@ -36,7 +37,7 @@ pub struct SyntaxHighlighter {
 }
 
 impl SyntaxHighlighter {
-    pub fn new(dialect: &str) -> Self {
+    pub fn new(dialect: &config::Dialect) -> Self {
         let dialect = dialect_from_str(dialect).unwrap_or(Box::new(GenericDialect {}));
         Self { dialect }
     }
@@ -93,13 +94,14 @@ impl Color {
 
 #[cfg(test)]
 mod tests {
+    use super::config::Dialect;
     use super::SyntaxHighlighter;
     use rustyline::highlight::Highlighter;
 
     #[test]
     fn highlighter_valid() {
         let s = "SElect col_a from tab_1;";
-        let highlighter = SyntaxHighlighter::new("generic");
+        let highlighter = SyntaxHighlighter::new(&Dialect::Generic);
         let out = highlighter.highlight(s, s.len());
         assert_eq!(
             "\u{1b}[91mSElect\u{1b}[0m col_a \u{1b}[91mfrom\u{1b}[0m tab_1;",
@@ -110,7 +112,7 @@ mod tests {
     #[test]
     fn highlighter_valid_with_new_line() {
         let s = "SElect col_a from tab_1\n WHERE col_b = 'なにか';";
-        let highlighter = SyntaxHighlighter::new("generic");
+        let highlighter = SyntaxHighlighter::new(&Dialect::Generic);
         let out = highlighter.highlight(s, s.len());
         assert_eq!(
             "\u{1b}[91mSElect\u{1b}[0m col_a \u{1b}[91mfrom\u{1b}[0m tab_1\n \u{1b}[91mWHERE\u{1b}[0m col_b = \u{1b}[92m'なにか'\u{1b}[0m;",
@@ -121,7 +123,7 @@ mod tests {
     #[test]
     fn highlighter_invalid() {
         let s = "SElect col_a from tab_1 WHERE col_b = ';";
-        let highlighter = SyntaxHighlighter::new("generic");
+        let highlighter = SyntaxHighlighter::new(&Dialect::Generic);
         let out = highlighter.highlight(s, s.len());
         assert_eq!("SElect col_a from tab_1 WHERE col_b = ';", out);
     }
diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs
index 3dbe839d3c9b..09fa8ef15af8 100644
--- a/datafusion-cli/src/main.rs
+++ b/datafusion-cli/src/main.rs
@@ -151,7 +151,7 @@ struct Args {
 
     #[clap(
         long,
-        help = "Specify the default object_store_profiling mode, defaults to 'disabled'.\n[possible values: disabled, enabled]",
+        help = "Specify the default object_store_profiling mode, defaults to 'disabled'.\n[possible values: disabled, summary, trace]",
         default_value_t = InstrumentedObjectStoreMode::Disabled
     )]
     object_store_profiling: InstrumentedObjectStoreMode,
@@ -497,7 +497,7 @@ mod tests {
         +-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
         | filename                                                    | row_group_id | row_group_num_rows | row_group_num_columns | row_group_bytes | column_id | file_offset | num_values | path_in_schema | type  | stats_min | stats_max | stats_null_count | stats_distinct_count | stats_min_value | stats_max_value | compression | encodings                    | index_page_offset | dictionary_page_offset | data_page_offset | total_compressed_size | total_uncompressed_size |
         +-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
-        | ../datafusion/core/tests/data/fixed_size_list_array.parquet | 0            | 2                  | 1                     | 123             | 0         | 125         | 4          | "f0.list.item" | INT64 | 1         | 4         | 0                |                      | 1               | 4               | SNAPPY      | [RLE_DICTIONARY, PLAIN, RLE] |                   | 4                      | 46               | 121                   | 123                     |
+        | ../datafusion/core/tests/data/fixed_size_list_array.parquet | 0            | 2                  | 1                     | 123             | 0         | 125         | 4          | "f0.list.item" | INT64 | 1         | 4         | 0                |                      | 1               | 4               | SNAPPY      | [PLAIN, RLE, RLE_DICTIONARY] |                   | 4                      | 46               | 121                   | 123                     |
         +-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
         "#);
 
@@ -510,7 +510,7 @@ mod tests {
         +-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
         | filename                                                    | row_group_id | row_group_num_rows | row_group_num_columns | row_group_bytes | column_id | file_offset | num_values | path_in_schema | type  | stats_min | stats_max | stats_null_count | stats_distinct_count | stats_min_value | stats_max_value | compression | encodings                    | index_page_offset | dictionary_page_offset | data_page_offset | total_compressed_size | total_uncompressed_size |
         +-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
-        | ../datafusion/core/tests/data/fixed_size_list_array.parquet | 0            | 2                  | 1                     | 123             | 0         | 125         | 4          | "f0.list.item" | INT64 | 1         | 4         | 0                |                      | 1               | 4               | SNAPPY      | [RLE_DICTIONARY, PLAIN, RLE] |                   | 4                      | 46               | 121                   | 123                     |
+        | ../datafusion/core/tests/data/fixed_size_list_array.parquet | 0            | 2                  | 1                     | 123             | 0         | 125         | 4          | "f0.list.item" | INT64 | 1         | 4         | 0                |                      | 1               | 4               | SNAPPY      | [PLAIN, RLE, RLE_DICTIONARY] |                   | 4                      | 46               | 121                   | 123                     |
         +-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
         "#);
 
@@ -532,7 +532,7 @@ mod tests {
         +-----------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+------------+-----------+-----------+------------------+----------------------+-----------------+-----------------+--------------------+--------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
         | filename                                                        | row_group_id | row_group_num_rows | row_group_num_columns | row_group_bytes | column_id | file_offset | num_values | path_in_schema | type       | stats_min | stats_max | stats_null_count | stats_distinct_count | stats_min_value | stats_max_value | compression        | encodings                | index_page_offset | dictionary_page_offset | data_page_offset | total_compressed_size | total_uncompressed_size |
         +-----------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+------------+-----------+-----------+------------------+----------------------+-----------------+-----------------+--------------------+--------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
-        | ../parquet-testing/data/data_index_bloom_encoding_stats.parquet | 0            | 14                 | 1                     | 163             | 0         | 4           | 14         | "String"       | BYTE_ARRAY | Hello     | today     | 0                |                      | Hello           | today           | GZIP(GzipLevel(6)) | [BIT_PACKED, RLE, PLAIN] |                   |                        | 4                | 152                   | 163                     |
+        | ../parquet-testing/data/data_index_bloom_encoding_stats.parquet | 0            | 14                 | 1                     | 163             | 0         | 4           | 14         | "String"       | BYTE_ARRAY | Hello     | today     | 0                |                      | Hello           | today           | GZIP(GzipLevel(6)) | [PLAIN, RLE, BIT_PACKED] |                   |                        | 4                | 152                   | 163                     |
         +-----------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+------------+-----------+-----------+------------------+----------------------+-----------------+-----------------+--------------------+--------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
         "#);
 
@@ -592,9 +592,9 @@ mod tests {
         +-----------------------------------+-----------------+---------------------+------+------------------+
         | filename                          | file_size_bytes | metadata_size_bytes | hits | extra            |
         +-----------------------------------+-----------------+---------------------+------+------------------+
-        | alltypes_plain.parquet            | 1851            | 10181               | 2    | page_index=false |
-        | alltypes_tiny_pages.parquet       | 454233          | 881418              | 2    | page_index=true  |
-        | lz4_raw_compressed_larger.parquet | 380836          | 2939                | 2    | page_index=false |
+        | alltypes_plain.parquet            | 1851            | 6957                | 2    | page_index=false |
+        | alltypes_tiny_pages.parquet       | 454233          | 267014              | 2    | page_index=true  |
+        | lz4_raw_compressed_larger.parquet | 380836          | 996                 | 2    | page_index=false |
         +-----------------------------------+-----------------+---------------------+------+------------------+
         ");
 
@@ -623,9 +623,9 @@ mod tests {
         +-----------------------------------+-----------------+---------------------+------+------------------+
         | filename                          | file_size_bytes | metadata_size_bytes | hits | extra            |
         +-----------------------------------+-----------------+---------------------+------+------------------+
-        | alltypes_plain.parquet            | 1851            | 10181               | 5    | page_index=false |
-        | alltypes_tiny_pages.parquet       | 454233          | 881418              | 2    | page_index=true  |
-        | lz4_raw_compressed_larger.parquet | 380836          | 2939                | 3    | page_index=false |
+        | alltypes_plain.parquet            | 1851            | 6957                | 5    | page_index=false |
+        | alltypes_tiny_pages.parquet       | 454233          | 267014              | 2    | page_index=true  |
+        | lz4_raw_compressed_larger.parquet | 380836          | 996                 | 3    | page_index=false |
         +-----------------------------------+-----------------+---------------------+------+------------------+
         ");
 
diff --git a/datafusion-cli/src/object_storage/instrumented.rs b/datafusion-cli/src/object_storage/instrumented.rs
index 9252e0688c35..c4b63b417fe4 100644
--- a/datafusion-cli/src/object_storage/instrumented.rs
+++ b/datafusion-cli/src/object_storage/instrumented.rs
@@ -26,6 +26,8 @@ use std::{
     time::Duration,
 };
 
+use arrow::array::{ArrayRef, RecordBatch, StringArray};
+use arrow::util::pretty::pretty_format_batches;
 use async_trait::async_trait;
 use chrono::Utc;
 use datafusion::{
@@ -48,13 +50,15 @@ pub enum InstrumentedObjectStoreMode {
     /// Disable collection of profiling data
     #[default]
     Disabled,
-    /// Enable collection of profiling data
-    Enabled,
+    /// Enable collection of profiling data and output a summary
+    Summary,
+    /// Enable collection of profiling data and output a summary and all details
+    Trace,
 }
 
 impl fmt::Display for InstrumentedObjectStoreMode {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{:?}", self)
+        write!(f, "{self:?}")
     }
 }
 
@@ -64,7 +68,8 @@ impl FromStr for InstrumentedObjectStoreMode {
     fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
         match s.to_lowercase().as_str() {
             "disabled" => Ok(Self::Disabled),
-            "enabled" => Ok(Self::Enabled),
+            "summary" => Ok(Self::Summary),
+            "trace" => Ok(Self::Trace),
             _ => Err(DataFusionError::Execution(format!("Unrecognized mode {s}"))),
         }
     }
@@ -73,7 +78,8 @@ impl FromStr for InstrumentedObjectStoreMode {
 impl From<u8> for InstrumentedObjectStoreMode {
     fn from(value: u8) -> Self {
         match value {
-            1 => InstrumentedObjectStoreMode::Enabled,
+            1 => InstrumentedObjectStoreMode::Summary,
+            2 => InstrumentedObjectStoreMode::Trace,
             _ => InstrumentedObjectStoreMode::Disabled,
         }
     }
@@ -110,6 +116,59 @@ impl InstrumentedObjectStore {
         req.drain(..).collect()
     }
 
+    fn enabled(&self) -> bool {
+        self.instrument_mode.load(Ordering::Relaxed)
+            != InstrumentedObjectStoreMode::Disabled as u8
+    }
+
+    async fn instrumented_put_opts(
+        &self,
+        location: &Path,
+        payload: PutPayload,
+        opts: PutOptions,
+    ) -> Result<PutResult> {
+        let timestamp = Utc::now();
+        let start = Instant::now();
+        let size = payload.content_length();
+        let ret = self.inner.put_opts(location, payload, opts).await?;
+        let elapsed = start.elapsed();
+
+        self.requests.lock().push(RequestDetails {
+            op: Operation::Put,
+            path: location.clone(),
+            timestamp,
+            duration: Some(elapsed),
+            size: Some(size),
+            range: None,
+            extra_display: None,
+        });
+
+        Ok(ret)
+    }
+
+    async fn instrumented_put_multipart(
+        &self,
+        location: &Path,
+        opts: PutMultipartOptions,
+    ) -> Result<Box<dyn MultipartUpload>> {
+        let timestamp = Utc::now();
+        let start = Instant::now();
+        let ret = self.inner.put_multipart_opts(location, opts).await?;
+        let elapsed = start.elapsed();
+
+        self.requests.lock().push(RequestDetails {
+            op: Operation::Put,
+            path: location.clone(),
+            timestamp,
+            duration: Some(elapsed),
+            size: None,
+            range: None,
+            extra_display: None,
+        });
+
+        Ok(ret)
+    }
+
     async fn instrumented_get_opts(
         &self,
         location: &Path,
@@ -134,6 +193,128 @@ impl InstrumentedObjectStore {
 
         Ok(ret)
     }
+
+    async fn instrumented_delete(&self, location: &Path) -> Result<()> {
+        let timestamp = Utc::now();
+        let start = Instant::now();
+        self.inner.delete(location).await?;
+        let elapsed = start.elapsed();
+
+        self.requests.lock().push(RequestDetails {
+            op: Operation::Delete,
+            path: location.clone(),
+            timestamp,
+            duration: Some(elapsed),
+            size: None,
+            range: None,
+            extra_display: None,
+        });
+
+        Ok(())
+    }
+
+    fn instrumented_list(
+        &self,
+        prefix: Option<&Path>,
+    ) -> BoxStream<'static, Result<ObjectMeta>> {
+        let timestamp = Utc::now();
+        let ret = self.inner.list(prefix);
+
+        self.requests.lock().push(RequestDetails {
+            op: Operation::List,
+            path: prefix.cloned().unwrap_or_else(|| Path::from("")),
+            timestamp,
+            duration: None, // list returns a stream, so the duration isn't meaningful
+            size: None,
+            range: None,
+            extra_display: None,
+        });
+
+        ret
+    }
+
+    async fn instrumented_list_with_delimiter(
+        &self,
+        prefix: Option<&Path>,
+    ) -> Result<ListResult> {
+        let timestamp = Utc::now();
+        let start = Instant::now();
+        let ret = self.inner.list_with_delimiter(prefix).await?;
+        let elapsed = start.elapsed();
+
+        self.requests.lock().push(RequestDetails {
+            op: Operation::List,
+            path: prefix.cloned().unwrap_or_else(|| Path::from("")),
+            timestamp,
+            duration: Some(elapsed),
+            size: None,
+            range: None,
+            extra_display: None,
+        });
+
+        Ok(ret)
+    }
+
+    async fn instrumented_copy(&self, from: &Path, to: &Path) -> Result<()> {
+        let timestamp = Utc::now();
+        let start = Instant::now();
+        self.inner.copy(from, to).await?;
+        let elapsed = start.elapsed();
+
+        self.requests.lock().push(RequestDetails {
+            op: Operation::Copy,
+            path: from.clone(),
+            timestamp,
+            duration: Some(elapsed),
+            size: None,
+            range: None,
+            extra_display: Some(format!("copy_to: {to}")),
+        });
+
+        Ok(())
+    }
+
+    async fn instrumented_copy_if_not_exists(
+        &self,
+        from: &Path,
+        to: &Path,
+    ) -> Result<()> {
+        let timestamp = Utc::now();
+        let start = Instant::now();
+        self.inner.copy_if_not_exists(from, to).await?;
+        let elapsed = start.elapsed();
+
+        self.requests.lock().push(RequestDetails {
+            op: Operation::Copy,
+            path: from.clone(),
+            timestamp,
+            duration: Some(elapsed),
+            size: None,
+            range: None,
+            extra_display: Some(format!("copy_to: {to}")),
+        });
+
+        Ok(())
+    }
+
+    async fn instrumented_head(&self, location: &Path) -> Result<ObjectMeta> {
+        let timestamp = Utc::now();
+        let start = Instant::now();
+        let ret = self.inner.head(location).await?;
+        let elapsed = start.elapsed();
+
+        self.requests.lock().push(RequestDetails {
+            op: Operation::Head,
+            path: location.clone(),
+            timestamp,
+            duration: Some(elapsed),
+            size: None,
+            range: None,
+            extra_display: None,
+        });
+
+        Ok(ret)
+    }
 }
 
 impl fmt::Display for InstrumentedObjectStore {
@@ -156,6 +337,10 @@ impl ObjectStore for InstrumentedObjectStore {
         payload: PutPayload,
         opts: PutOptions,
     ) -> Result<PutResult> {
+        if self.enabled() {
+            return self.instrumented_put_opts(location, payload, opts).await;
+        }
+
         self.inner.put_opts(location, payload, opts).await
     }
 
@@ -164,13 +349,15 @@ impl ObjectStore for InstrumentedObjectStore {
         location: &Path,
         opts: PutMultipartOptions,
     ) -> Result<Box<dyn MultipartUpload>> {
+        if self.enabled() {
+            return self.instrumented_put_multipart(location, opts).await;
+        }
+
         self.inner.put_multipart_opts(location, opts).await
     }
 
     async fn get_opts(&self, location: &Path, options: GetOptions) -> Result<GetResult> {
-        if self.instrument_mode.load(Ordering::Relaxed)
-            != InstrumentedObjectStoreMode::Disabled as u8
-        {
+        if self.enabled() {
             return self.instrumented_get_opts(location, options).await;
         }
 
@@ -178,39 +365,69 @@ impl ObjectStore for InstrumentedObjectStore {
     }
 
     async fn delete(&self, location: &Path) -> Result<()> {
+        if self.enabled() {
+            return self.instrumented_delete(location).await;
+        }
+
         self.inner.delete(location).await
     }
 
     fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
+        if self.enabled() {
+            return self.instrumented_list(prefix);
+        }
+
         self.inner.list(prefix)
     }
 
     async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result<ListResult> {
+        if self.enabled() {
+            return self.instrumented_list_with_delimiter(prefix).await;
+        }
+
         self.inner.list_with_delimiter(prefix).await
     }
 
     async fn copy(&self, from: &Path, to: &Path) -> Result<()> {
+        if self.enabled() {
+            return self.instrumented_copy(from, to).await;
+        }
+
         self.inner.copy(from, to).await
     }
 
     async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> {
+        if self.enabled() {
+            return self.instrumented_copy_if_not_exists(from, to).await;
+        }
+
         self.inner.copy_if_not_exists(from, to).await
     }
 
     async fn head(&self, location: &Path) -> Result<ObjectMeta> {
+        if self.enabled() {
+            return self.instrumented_head(location).await;
+        }
+
         self.inner.head(location).await
     }
 }
 
 /// Object store operation types tracked by [`InstrumentedObjectStore`]
-#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub enum Operation {
-    _Copy,
-    _Delete,
+    Copy,
+    Delete,
     Get,
-    _Head,
-    _List,
-    _Put,
+    Head,
+    List,
+    Put,
+}
+
+impl fmt::Display for Operation {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{self:?}")
+    }
 }
 
 /// Holds profiling details about individual requests made through an [`InstrumentedObjectStore`]
@@ -252,35 +469,172 @@ impl fmt::Display for RequestDetails {
     }
 }
 
-/// Summary statistics for an [`InstrumentedObjectStore`]'s [`RequestDetails`]
+/// Summary statistics for all requests recorded in an [`InstrumentedObjectStore`]
 #[derive(Default)]
-pub struct RequestSummary {
-    count: usize,
-    duration_stats: Option<Stats<Duration>>,
-    size_stats: Option<Stats<usize>>,
+pub struct RequestSummaries {
+    summaries: Vec<RequestSummary>,
 }
 
-impl RequestSummary {
-    /// Generates a set of [RequestSummaries](RequestSummary) from the input [`RequestDetails`]
-    /// grouped by the input's [`Operation`]
-    pub fn summarize_by_operation(
-        requests: &[RequestDetails],
-    ) -> HashMap<Operation, Self> {
-        let mut summaries: HashMap<Operation, Self> = HashMap::new();
+/// Display the summary as a table
+impl fmt::Display for RequestSummaries {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        // Don't expect an error, but avoid panicking if it happens
+        match pretty_format_batches(&[self.to_batch()]) {
+            Err(e) => {
+                write!(f, "Error formatting summary: {e}")
+            }
+            Ok(displayable) => {
+                write!(f, "{displayable}")
+            }
+        }
+    }
+}
+
+impl RequestSummaries {
+    /// Summarizes input [`RequestDetails`]
+    pub fn new(requests: &[RequestDetails]) -> Self {
+        let mut summaries: HashMap<Operation, RequestSummary> = HashMap::new();
         for rd in requests {
             match summaries.get_mut(&rd.op) {
                 Some(rs) => rs.push(rd),
                 None => {
-                    let mut rs = RequestSummary::default();
+                    let mut rs = RequestSummary::new(rd.op);
                     rs.push(rd);
                     summaries.insert(rd.op, rs);
                 }
             }
         }
+        // Convert to a Vec with consistent ordering
+        let mut summaries: Vec<RequestSummary> = summaries.into_values().collect();
+        summaries.sort_by_key(|s| s.operation);
+        Self { summaries }
+    }
+
+    /// Convert the summaries into a `RecordBatch` for display
+    ///
+    /// Results in a table like:
+    /// ```text
+    /// +-----------+----------+-----------+-----------+-----------+-----------+-----------+
+    /// | Operation | Metric   | min       | max       | avg       | sum       | count     |
+    /// +-----------+----------+-----------+-----------+-----------+-----------+-----------+
+    /// | Get       | duration | 5.000000s | 5.000000s | 5.000000s |           | 1         |
+    /// | Get       | size     | 100 B     | 100 B     | 100 B     | 100 B     | 1         |
+    /// +-----------+----------+-----------+-----------+-----------+-----------+-----------+
+    /// ```
+    pub fn to_batch(&self) -> RecordBatch {
+        let operations: StringArray = self
+            .iter()
+            .flat_map(|s| std::iter::repeat_n(Some(s.operation.to_string()), 2))
+            .collect();
+        let metrics: StringArray = self
+            .iter()
+            .flat_map(|_s| [Some("duration"), Some("size")])
+            .collect();
+        let mins: StringArray = self
+            .stats_iter()
+            .flat_map(|(duration_stats, size_stats)| {
+                let dur_min =
+                    duration_stats.map(|d| format!("{:.6}s", d.min.as_secs_f32()));
+                let size_min = size_stats.map(|s| format!("{} B", s.min));
+                [dur_min, size_min]
+            })
+            .collect();
+        let maxs: StringArray = self
+            .stats_iter()
+            .flat_map(|(duration_stats, size_stats)| {
+                let dur_max =
+                    duration_stats.map(|d| format!("{:.6}s", d.max.as_secs_f32()));
+                let size_max = size_stats.map(|s| format!("{} B", s.max));
+                [dur_max, size_max]
+            })
+            .collect();
+        let avgs: StringArray = self
+            .iter()
+            .flat_map(|s| {
+                let count = s.count as f32;
+                let duration_stats = s.duration_stats.as_ref();
+                let size_stats = s.size_stats.as_ref();
+                let dur_avg = duration_stats.map(|d| {
+                    let avg = d.sum.as_secs_f32() / count;
+                    format!("{avg:.6}s")
+                });
+                let size_avg = size_stats.map(|s| {
+                    let avg = s.sum as f32 / count;
+                    format!("{avg} B")
+                });
+                [dur_avg, size_avg]
+            })
+            .collect();
+        let sums: StringArray = self
+            .stats_iter()
+            .flat_map(|(duration_stats, size_stats)| {
+                // Omit a sum stat for duration in the initial
+                // implementation because it can be a bit misleading (at least
+                // at first glance). For example, particularly large queries the
+                // sum of the durations was often larger than the total time of
+                // the query itself, can be confusing without additional
+                // explanation (e.g. that the sum is of individual requests,
+                // which may be concurrent).
+                let dur_sum =
+                    duration_stats.map(|d| format!("{:.6}s", d.sum.as_secs_f32()));
+                let size_sum = size_stats.map(|s| format!("{} B", s.sum));
+                [dur_sum, size_sum]
+            })
+            .collect();
+        let counts: StringArray = self
+            .iter()
+            .flat_map(|s| {
+                let count = s.count.to_string();
+                [Some(count.clone()), Some(count)]
+            })
+            .collect();
+
+        RecordBatch::try_from_iter(vec![
+            ("Operation", Arc::new(operations) as ArrayRef),
+            ("Metric", Arc::new(metrics) as ArrayRef),
+            ("min", Arc::new(mins) as ArrayRef),
+            ("max", Arc::new(maxs) as ArrayRef),
+            ("avg", Arc::new(avgs) as ArrayRef),
+            ("sum", Arc::new(sums) as ArrayRef),
+            ("count", Arc::new(counts) as ArrayRef),
+        ])
+        .expect("Created the batch correctly")
+    }
 
-        summaries
+    /// Return an iterator over the summaries
+    fn iter(&self) -> impl Iterator<Item = &RequestSummary> {
+        self.summaries.iter()
+    }
+
+    /// Return an iterator over (duration_stats, size_stats) tuples
+    /// for each summary
+    fn stats_iter(
+        &self,
+    ) -> impl Iterator<Item = (Option<&Stats<Duration>>, Option<&Stats<usize>>)> {
+        self.summaries
+            .iter()
+            .map(|s| (s.duration_stats.as_ref(), s.size_stats.as_ref()))
     }
+}
 
+/// Summary statistics for a particular type of [`Operation`] (e.g. `GET` or `PUT`)
+/// in an [`InstrumentedObjectStore`]'s [`RequestDetails`]
+pub struct RequestSummary {
+    operation: Operation,
+    count: usize,
+    duration_stats: Option<Stats<Duration>>,
+    size_stats: Option<Stats<usize>>,
+}
+
+impl RequestSummary {
+    fn new(operation: Operation) -> Self {
+        Self {
+            operation,
+            count: 0,
+            duration_stats: None,
+            size_stats: None,
+        }
+    }
     fn push(&mut self, request: &RequestDetails) {
         self.count += 1;
         if let Some(dur) = request.duration {
@@ -292,29 +646,6 @@ impl RequestSummary {
     }
 }
 
-impl fmt::Display for RequestSummary {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        writeln!(f, "count: {}", self.count)?;
-
-        if let Some(dur_stats) = &self.duration_stats {
-            writeln!(f, "duration min: {:.6}s", dur_stats.min.as_secs_f32())?;
-            writeln!(f, "duration max: {:.6}s", dur_stats.max.as_secs_f32())?;
-            let avg = dur_stats.sum.as_secs_f32() / (self.count as f32);
-            writeln!(f, "duration avg: {:.6}s", avg)?;
-        }
-
-        if let Some(size_stats) = &self.size_stats {
-            writeln!(f, "size min: {} B", size_stats.min)?;
-            writeln!(f, "size max: {} B", size_stats.max)?;
-            let avg = size_stats.sum / self.count;
-            writeln!(f, "size avg: {} B", avg)?;
-            writeln!(f, "size sum: {} B", size_stats.sum)?;
-        }
-
-        Ok(())
-    }
-}
-
 struct Stats<T: Copy + Ord + AddAssign<T>> {
     min: T,
     max: T,
@@ -413,6 +744,13 @@ impl ObjectStoreRegistry for InstrumentedObjectStoreRegistry {
         self.inner.register_store(url, instrumented)
     }
 
+    fn deregister_store(
+        &self,
+        url: &Url,
+    ) -> datafusion::common::Result<Arc<dyn ObjectStore>> {
+        self.inner.deregister_store(url)
+    }
+
     fn get_store(&self, url: &Url) -> datafusion::common::Result<Arc<dyn ObjectStore>> {
         self.inner.get_store(url)
     }
@@ -420,7 +758,10 @@ impl ObjectStoreRegistry for InstrumentedObjectStoreRegistry {
 
 #[cfg(test)]
 mod tests {
+    use object_store::WriteMultipart;
+
     use super::*;
+    use insta::assert_snapshot;
 
     #[test]
     fn instrumented_mode() {
@@ -434,16 +775,21 @@ mod tests {
             InstrumentedObjectStoreMode::Disabled
         ));
         assert!(matches!(
-            "EnABlEd".parse().unwrap(),
-            InstrumentedObjectStoreMode::Enabled
+            "SUmMaRy".parse().unwrap(),
+            InstrumentedObjectStoreMode::Summary
+        ));
+        assert!(matches!(
+            "TRaCe".parse().unwrap(),
+            InstrumentedObjectStoreMode::Trace
         ));
         assert!("does_not_exist"
             .parse::<InstrumentedObjectStoreMode>()
             .is_err());
 
         assert!(matches!(0.into(), InstrumentedObjectStoreMode::Disabled));
-        assert!(matches!(1.into(), InstrumentedObjectStoreMode::Enabled));
-        assert!(matches!(2.into(), InstrumentedObjectStoreMode::Disabled));
+        assert!(matches!(1.into(), InstrumentedObjectStoreMode::Summary));
+        assert!(matches!(2.into(), InstrumentedObjectStoreMode::Trace));
+        assert!(matches!(3.into(), InstrumentedObjectStoreMode::Disabled));
     }
 
     #[test]
@@ -455,8 +801,8 @@ mod tests {
             InstrumentedObjectStoreMode::default()
         );
 
-        reg = reg.with_profile_mode(InstrumentedObjectStoreMode::Enabled);
-        assert_eq!(reg.instrument_mode(), InstrumentedObjectStoreMode::Enabled);
+        reg = reg.with_profile_mode(InstrumentedObjectStoreMode::Trace);
+        assert_eq!(reg.instrument_mode(), InstrumentedObjectStoreMode::Trace);
 
         let store = object_store::memory::InMemory::new();
         let url = "mem://test".parse().unwrap();
@@ -468,8 +814,9 @@ mod tests {
         assert_eq!(reg.stores().len(), 1);
     }
 
-    #[tokio::test]
-    async fn instrumented_store() {
+    // Returns an `InstrumentedObjectStore` with some data loaded for testing and the path to
+    // access the data
+    async fn setup_test_store() -> (InstrumentedObjectStore, Path) {
         let store = Arc::new(object_store::memory::InMemory::new());
         let mode = AtomicU8::new(InstrumentedObjectStoreMode::default() as u8);
         let instrumented = InstrumentedObjectStore::new(store, mode);
@@ -479,12 +826,19 @@ mod tests {
         let payload = PutPayload::from_static(b"test_data");
         instrumented.put(&path, payload).await.unwrap();
 
+        (instrumented, path)
+    }
+
+    #[tokio::test]
+    async fn instrumented_store_get() {
+        let (instrumented, path) = setup_test_store().await;
+
         // By default no requests should be instrumented/stored
         assert!(instrumented.requests.lock().is_empty());
         let _ = instrumented.get(&path).await.unwrap();
         assert!(instrumented.requests.lock().is_empty());
 
-        instrumented.set_instrument_mode(InstrumentedObjectStoreMode::Enabled);
+        instrumented.set_instrument_mode(InstrumentedObjectStoreMode::Trace);
         assert!(instrumented.requests.lock().is_empty());
         let _ = instrumented.get(&path).await.unwrap();
         assert_eq!(instrumented.requests.lock().len(), 1);
@@ -502,6 +856,244 @@ mod tests {
         assert!(request.extra_display.is_none());
     }
 
+    #[tokio::test]
+    async fn instrumented_store_delete() {
+        let (instrumented, path) = setup_test_store().await;
+
+        // By default no requests should be instrumented/stored
+        assert!(instrumented.requests.lock().is_empty());
+        instrumented.delete(&path).await.unwrap();
+        assert!(instrumented.requests.lock().is_empty());
+
+        // We need a new store so we have data to delete again
+        let (instrumented, path) = setup_test_store().await;
+        instrumented.set_instrument_mode(InstrumentedObjectStoreMode::Trace);
+        assert!(instrumented.requests.lock().is_empty());
+        instrumented.delete(&path).await.unwrap();
+        assert_eq!(instrumented.requests.lock().len(), 1);
+
+        let mut requests = instrumented.take_requests();
+        assert_eq!(requests.len(), 1);
+        assert!(instrumented.requests.lock().is_empty());
+
+        let request = requests.pop().unwrap();
+        assert_eq!(request.op, Operation::Delete);
+        assert_eq!(request.path, path);
+        assert!(request.duration.is_some());
+        assert!(request.size.is_none());
+        assert!(request.range.is_none());
+        assert!(request.extra_display.is_none());
+    }
+
+    #[tokio::test]
+    async fn instrumented_store_list() {
+        let (instrumented, path) = setup_test_store().await;
+
+        // By default no requests should be instrumented/stored
+        assert!(instrumented.requests.lock().is_empty());
+        let _ = instrumented.list(Some(&path));
+        assert!(instrumented.requests.lock().is_empty());
+
+        instrumented.set_instrument_mode(InstrumentedObjectStoreMode::Trace);
+        assert!(instrumented.requests.lock().is_empty());
+        let _ = instrumented.list(Some(&path));
+        assert_eq!(instrumented.requests.lock().len(), 1);
+
+        let request = instrumented.take_requests().pop().unwrap();
+        assert_eq!(request.op, Operation::List);
+        assert_eq!(request.path, path);
+        assert!(request.duration.is_none());
+        assert!(request.size.is_none());
+        assert!(request.range.is_none());
+        assert!(request.extra_display.is_none());
+    }
+
+    #[tokio::test]
+    async fn instrumented_store_list_with_delimiter() {
+        let (instrumented, path) = setup_test_store().await;
+
+        // By default no requests should be instrumented/stored
+        assert!(instrumented.requests.lock().is_empty());
+        let _ = instrumented.list_with_delimiter(Some(&path)).await.unwrap();
+        assert!(instrumented.requests.lock().is_empty());
+
+        instrumented.set_instrument_mode(InstrumentedObjectStoreMode::Trace);
+        assert!(instrumented.requests.lock().is_empty());
+        let _ = instrumented.list_with_delimiter(Some(&path)).await.unwrap();
+        assert_eq!(instrumented.requests.lock().len(), 1);
+
+        let request = instrumented.take_requests().pop().unwrap();
+        assert_eq!(request.op, Operation::List);
+        assert_eq!(request.path, path);
+        assert!(request.duration.is_some());
+        assert!(request.size.is_none());
+        assert!(request.range.is_none());
+        assert!(request.extra_display.is_none());
+    }
+
+    #[tokio::test]
+    async fn instrumented_store_put_opts() {
+        // The `setup_test_store()` method comes with data already `put` into it, so we'll setup
+        // manually for this test
+        let store = Arc::new(object_store::memory::InMemory::new());
+        let mode = AtomicU8::new(InstrumentedObjectStoreMode::default() as u8);
+        let instrumented = InstrumentedObjectStore::new(store, mode);
+
+        let path = Path::from("test/data");
+        let payload = PutPayload::from_static(b"test_data");
+        let size = payload.content_length();
+
+        // By default no requests should be instrumented/stored
+        assert!(instrumented.requests.lock().is_empty());
+        instrumented.put(&path, payload.clone()).await.unwrap();
+        assert!(instrumented.requests.lock().is_empty());
+
+        instrumented.set_instrument_mode(InstrumentedObjectStoreMode::Trace);
+        assert!(instrumented.requests.lock().is_empty());
+        instrumented.put(&path, payload).await.unwrap();
+        assert_eq!(instrumented.requests.lock().len(), 1);
+
+        let request = instrumented.take_requests().pop().unwrap();
+        assert_eq!(request.op, Operation::Put);
+        assert_eq!(request.path, path);
+        assert!(request.duration.is_some());
+        assert_eq!(request.size.unwrap(), size);
+        assert!(request.range.is_none());
+        assert!(request.extra_display.is_none());
+    }
+
+    #[tokio::test]
+    async fn instrumented_store_put_multipart() {
+        // The `setup_test_store()` method comes with data already `put` into it, so we'll setup
+        // manually for this test
+        let store = Arc::new(object_store::memory::InMemory::new());
+        let mode = AtomicU8::new(InstrumentedObjectStoreMode::default() as u8);
+        let instrumented = InstrumentedObjectStore::new(store, mode);
+
+        let path = Path::from("test/data");
+
+        // By default no requests should be instrumented/stored
+        assert!(instrumented.requests.lock().is_empty());
+        let mp = instrumented.put_multipart(&path).await.unwrap();
+        let mut write = WriteMultipart::new(mp);
+        write.write(b"test_data");
+        write.finish().await.unwrap();
+        assert!(instrumented.requests.lock().is_empty());
+
+        instrumented.set_instrument_mode(InstrumentedObjectStoreMode::Trace);
+        assert!(instrumented.requests.lock().is_empty());
+        let mp = instrumented.put_multipart(&path).await.unwrap();
+        let mut write = WriteMultipart::new(mp);
+        write.write(b"test_data");
+        write.finish().await.unwrap();
+        assert_eq!(instrumented.requests.lock().len(), 1);
+
+        let request = instrumented.take_requests().pop().unwrap();
+        assert_eq!(request.op, Operation::Put);
+        assert_eq!(request.path, path);
+        assert!(request.duration.is_some());
+        assert!(request.size.is_none());
+        assert!(request.range.is_none());
+        assert!(request.extra_display.is_none());
+    }
+
+    #[tokio::test]
+    async fn instrumented_store_copy() {
+        let (instrumented, path) = setup_test_store().await;
+        let copy_to = Path::from("test/copied");
+
+        // By default no requests should be instrumented/stored
+        assert!(instrumented.requests.lock().is_empty());
+        instrumented.copy(&path, &copy_to).await.unwrap();
+        assert!(instrumented.requests.lock().is_empty());
+
+        instrumented.set_instrument_mode(InstrumentedObjectStoreMode::Trace);
+        assert!(instrumented.requests.lock().is_empty());
+        instrumented.copy(&path, &copy_to).await.unwrap();
+        assert_eq!(instrumented.requests.lock().len(), 1);
+
+        let mut requests = instrumented.take_requests();
+        assert_eq!(requests.len(), 1);
+        assert!(instrumented.requests.lock().is_empty());
+
+        let request = requests.pop().unwrap();
+        assert_eq!(request.op, Operation::Copy);
+        assert_eq!(request.path, path);
+        assert!(request.duration.is_some());
+        assert!(request.size.is_none());
+        assert!(request.range.is_none());
+        assert_eq!(
+            request.extra_display.unwrap(),
+            format!("copy_to: {copy_to}")
+        );
+    }
+
+    #[tokio::test]
+    async fn instrumented_store_copy_if_not_exists() {
+        let (instrumented, path) = setup_test_store().await;
+        let mut copy_to = Path::from("test/copied");
+
+        // By default no requests should be instrumented/stored
+        assert!(instrumented.requests.lock().is_empty());
+        instrumented
+            .copy_if_not_exists(&path, &copy_to)
+            .await
+            .unwrap();
+        assert!(instrumented.requests.lock().is_empty());
+
+        // Use a new destination since the previous one already exists
+        copy_to = Path::from("test/copied_again");
+        instrumented.set_instrument_mode(InstrumentedObjectStoreMode::Trace);
+        assert!(instrumented.requests.lock().is_empty());
+        instrumented
+            .copy_if_not_exists(&path, &copy_to)
+            .await
+            .unwrap();
+        assert_eq!(instrumented.requests.lock().len(), 1);
+
+        let mut requests = instrumented.take_requests();
+        assert_eq!(requests.len(), 1);
+        assert!(instrumented.requests.lock().is_empty());
+
+        let request = requests.pop().unwrap();
+        assert_eq!(request.op, Operation::Copy);
+        assert_eq!(request.path, path);
+        assert!(request.duration.is_some());
+        assert!(request.size.is_none());
+        assert!(request.range.is_none());
+        assert_eq!(
+            request.extra_display.unwrap(),
+            format!("copy_to: {copy_to}")
+        );
+    }
+
+    #[tokio::test]
+    async fn instrumented_store_head() {
+        let (instrumented, path) = setup_test_store().await;
+
+        // By default no requests should be instrumented/stored
+        assert!(instrumented.requests.lock().is_empty());
+        let _ = instrumented.head(&path).await.unwrap();
+        assert!(instrumented.requests.lock().is_empty());
+
+        instrumented.set_instrument_mode(InstrumentedObjectStoreMode::Trace);
+        assert!(instrumented.requests.lock().is_empty());
+        let _ = instrumented.head(&path).await.unwrap();
+        assert_eq!(instrumented.requests.lock().len(), 1);
+
+        let mut requests = instrumented.take_requests();
+        assert_eq!(requests.len(), 1);
+        assert!(instrumented.requests.lock().is_empty());
+
+        let request = requests.pop().unwrap();
+        assert_eq!(request.op, Operation::Head);
+        assert_eq!(request.path, path);
+        assert!(request.duration.is_some());
+        assert!(request.size.is_none());
+        assert!(request.range.is_none());
+        assert!(request.extra_display.is_none());
+    }
+
     #[test]
     fn request_details() {
         let rd = RequestDetails {
@@ -524,8 +1116,12 @@ mod tests {
     fn request_summary() {
         // Test empty request list
         let mut requests = Vec::new();
-        let summaries = RequestSummary::summarize_by_operation(&requests);
-        assert!(summaries.is_empty());
+        assert_snapshot!(RequestSummaries::new(&requests), @r"
+        +-----------+--------+-----+-----+-----+-----+-------+
+        | Operation | Metric | min | max | avg | sum | count |
+        +-----------+--------+-----+-----+-----+-----+-------+
+        +-----------+--------+-----+-----+-----+-----+-------+
+        ");
 
         requests.push(RequestDetails {
             op: Operation::Get,
@@ -537,26 +1133,14 @@ mod tests {
             extra_display: None,
         });
 
-        let summaries = RequestSummary::summarize_by_operation(&requests);
-        assert_eq!(summaries.len(), 1);
-
-        let summary = summaries.get(&Operation::Get).unwrap();
-        assert_eq!(summary.count, 1);
-        assert_eq!(
-            summary.duration_stats.as_ref().unwrap().min,
-            Duration::from_secs(5)
-        );
-        assert_eq!(
-            summary.duration_stats.as_ref().unwrap().max,
-            Duration::from_secs(5)
-        );
-        assert_eq!(
-            summary.duration_stats.as_ref().unwrap().sum,
-            Duration::from_secs(5)
-        );
-        assert_eq!(summary.size_stats.as_ref().unwrap().min, 100);
-        assert_eq!(summary.size_stats.as_ref().unwrap().max, 100);
-        assert_eq!(summary.size_stats.as_ref().unwrap().sum, 100);
+        assert_snapshot!(RequestSummaries::new(&requests), @r"
+        +-----------+----------+-----------+-----------+-----------+-----------+-------+
+        | Operation | Metric   | min       | max       | avg       | sum       | count |
+        +-----------+----------+-----------+-----------+-----------+-----------+-------+
+        | Get       | duration | 5.000000s | 5.000000s | 5.000000s | 5.000000s | 1     |
+        | Get       | size     | 100 B     | 100 B     | 100 B     | 100 B     | 1     |
+        +-----------+----------+-----------+-----------+-----------+-----------+-------+
+        ");
 
         // Add more Get requests to test aggregation
         requests.push(RequestDetails {
@@ -577,31 +1161,18 @@ mod tests {
             range: None,
             extra_display: None,
         });
-
-        let summaries = RequestSummary::summarize_by_operation(&requests);
-        assert_eq!(summaries.len(), 1);
-
-        let summary = summaries.get(&Operation::Get).unwrap();
-        assert_eq!(summary.count, 3);
-        assert_eq!(
-            summary.duration_stats.as_ref().unwrap().min,
-            Duration::from_secs(2)
-        );
-        assert_eq!(
-            summary.duration_stats.as_ref().unwrap().max,
-            Duration::from_secs(8)
-        );
-        assert_eq!(
-            summary.duration_stats.as_ref().unwrap().sum,
-            Duration::from_secs(15)
-        );
-        assert_eq!(summary.size_stats.as_ref().unwrap().min, 50);
-        assert_eq!(summary.size_stats.as_ref().unwrap().max, 150);
-        assert_eq!(summary.size_stats.as_ref().unwrap().sum, 300);
+        assert_snapshot!(RequestSummaries::new(&requests), @r"
+        +-----------+----------+-----------+-----------+-----------+------------+-------+
+        | Operation | Metric   | min       | max       | avg       | sum        | count |
+        +-----------+----------+-----------+-----------+-----------+------------+-------+
+        | Get       | duration | 2.000000s | 8.000000s | 5.000000s | 15.000000s | 3     |
+        | Get       | size     | 50 B      | 150 B     | 100 B     | 300 B      | 3     |
+        +-----------+----------+-----------+-----------+-----------+------------+-------+
+        ");
 
         // Add Put requests to test grouping
         requests.push(RequestDetails {
-            op: Operation::_Put,
+            op: Operation::Put,
             path: Path::from("test4"),
             timestamp: chrono::DateTime::from_timestamp(3, 0).unwrap(),
             duration: Some(Duration::from_millis(200)),
@@ -610,20 +1181,20 @@ mod tests {
             extra_display: None,
         });
 
-        let summaries = RequestSummary::summarize_by_operation(&requests);
-        assert_eq!(summaries.len(), 2);
-
-        let get_summary = summaries.get(&Operation::Get).unwrap();
-        assert_eq!(get_summary.count, 3);
-
-        let put_summary = summaries.get(&Operation::_Put).unwrap();
-        assert_eq!(put_summary.count, 1);
-        assert_eq!(
-            put_summary.duration_stats.as_ref().unwrap().min,
-            Duration::from_millis(200)
-        );
-        assert_eq!(put_summary.size_stats.as_ref().unwrap().sum, 75);
+        assert_snapshot!(RequestSummaries::new(&requests), @r"
+        +-----------+----------+-----------+-----------+-----------+------------+-------+
+        | Operation | Metric   | min       | max       | avg       | sum        | count |
+        +-----------+----------+-----------+-----------+-----------+------------+-------+
+        | Get       | duration | 2.000000s | 8.000000s | 5.000000s | 15.000000s | 3     |
+        | Get       | size     | 50 B      | 150 B     | 100 B     | 300 B      | 3     |
+        | Put       | duration | 0.200000s | 0.200000s | 0.200000s | 0.200000s  | 1     |
+        | Put       | size     | 75 B      | 75 B      | 75 B      | 75 B       | 1     |
+        +-----------+----------+-----------+-----------+-----------+------------+-------+
+        ");
+    }
 
+    #[test]
+    fn request_summary_only_duration() {
         // Test request with only duration (no size)
         let only_duration = vec![RequestDetails {
             op: Operation::Get,
@@ -634,12 +1205,18 @@ mod tests {
             range: None,
             extra_display: None,
         }];
-        let summaries = RequestSummary::summarize_by_operation(&only_duration);
-        let summary = summaries.get(&Operation::Get).unwrap();
-        assert_eq!(summary.count, 1);
-        assert!(summary.duration_stats.is_some());
-        assert!(summary.size_stats.is_none());
+        assert_snapshot!(RequestSummaries::new(&only_duration), @r"
+        +-----------+----------+-----------+-----------+-----------+-----------+-------+
+        | Operation | Metric   | min       | max       | avg       | sum       | count |
+        +-----------+----------+-----------+-----------+-----------+-----------+-------+
+        | Get       | duration | 3.000000s | 3.000000s | 3.000000s | 3.000000s | 1     |
+        | Get       | size     |           |           |           |           | 1     |
+        +-----------+----------+-----------+-----------+-----------+-----------+-------+
+        ");
+    }
 
+    #[test]
+    fn request_summary_only_size() {
         // Test request with only size (no duration)
         let only_size = vec![RequestDetails {
             op: Operation::Get,
@@ -650,13 +1227,18 @@ mod tests {
             range: None,
             extra_display: None,
         }];
-        let summaries = RequestSummary::summarize_by_operation(&only_size);
-        let summary = summaries.get(&Operation::Get).unwrap();
-        assert_eq!(summary.count, 1);
-        assert!(summary.duration_stats.is_none());
-        assert!(summary.size_stats.is_some());
-        assert_eq!(summary.size_stats.as_ref().unwrap().sum, 200);
+        assert_snapshot!(RequestSummaries::new(&only_size), @r"
+        +-----------+----------+-------+-------+-------+-------+-------+
+        | Operation | Metric   | min   | max   | avg   | sum   | count |
+        +-----------+----------+-------+-------+-------+-------+-------+
+        | Get       | duration |       |       |       |       | 1     |
+        | Get       | size     | 200 B | 200 B | 200 B | 200 B | 1     |
+        +-----------+----------+-------+-------+-------+-------+-------+
+        ");
+    }
 
+    #[test]
+    fn request_summary_neither_duration_or_size() {
         // Test request with neither duration nor size
         let no_stats = vec![RequestDetails {
             op: Operation::Get,
@@ -667,10 +1249,13 @@ mod tests {
             range: None,
             extra_display: None,
         }];
-        let summaries = RequestSummary::summarize_by_operation(&no_stats);
-        let summary = summaries.get(&Operation::Get).unwrap();
-        assert_eq!(summary.count, 1);
-        assert!(summary.duration_stats.is_none());
-        assert!(summary.size_stats.is_none());
+        assert_snapshot!(RequestSummaries::new(&no_stats), @r"
+        +-----------+----------+-----+-----+-----+-----+-------+
+        | Operation | Metric   | min | max | avg | sum | count |
+        +-----------+----------+-----+-----+-----+-----+-------+
+        | Get       | duration |     |     |     |     | 1     |
+        | Get       | size     |     |     |     |     | 1     |
+        +-----------+----------+-----+-----+-----+-----+-------+
+        ");
     }
 }
diff --git a/datafusion-cli/src/print_options.rs b/datafusion-cli/src/print_options.rs
index f54de189b4ef..93d1d450fd82 100644
--- a/datafusion-cli/src/print_options.rs
+++ b/datafusion-cli/src/print_options.rs
@@ -22,7 +22,7 @@ use std::str::FromStr;
 use std::sync::Arc;
 
 use crate::object_storage::instrumented::{
-    InstrumentedObjectStoreMode, InstrumentedObjectStoreRegistry, RequestSummary,
+    InstrumentedObjectStoreMode, InstrumentedObjectStoreRegistry, RequestSummaries,
 };
 use crate::print_format::PrintFormat;
 
@@ -188,27 +188,25 @@ impl PrintOptions {
         if !self.quiet {
             writeln!(writer, "{formatted_exec_details}")?;
 
-            if self.instrumented_registry.instrument_mode()
-                != InstrumentedObjectStoreMode::Disabled
-            {
+            let instrument_mode = self.instrumented_registry.instrument_mode();
+            if instrument_mode != InstrumentedObjectStoreMode::Disabled {
                 writeln!(writer, "{OBJECT_STORE_PROFILING_HEADER}")?;
                 for store in self.instrumented_registry.stores() {
                     let requests = store.take_requests();
 
                     if !requests.is_empty() {
                         writeln!(writer, "{store}")?;
-                        for req in requests.iter() {
-                            writeln!(writer, "{req}")?;
+                        if instrument_mode == InstrumentedObjectStoreMode::Trace {
+                            for req in requests.iter() {
+                                writeln!(writer, "{req}")?;
+                            }
+                            // Add an extra blank line to help visually organize the output
+                            writeln!(writer)?;
                         }
-                        // Add an extra blank line to help visually organize the output
-                        writeln!(writer)?;
 
                         writeln!(writer, "Summaries:")?;
-                        let summaries = RequestSummary::summarize_by_operation(&requests);
-                        for (op, summary) in summaries {
-                            writeln!(writer, "{op:?}")?;
-                            writeln!(writer, "{summary}")?;
-                        }
+                        let summaries = RequestSummaries::new(&requests);
+                        writeln!(writer, "{summaries}")?;
                     }
                 }
             }
@@ -252,7 +250,7 @@ mod tests {
         print_output.clear();
         print_options
             .instrumented_registry
-            .set_instrument_mode(InstrumentedObjectStoreMode::Enabled);
+            .set_instrument_mode(InstrumentedObjectStoreMode::Trace);
         print_options.write_output(&mut print_output, exec_out.clone())?;
         let out_str: String = print_output
             .clone()
diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs
index a67924fef253..c1395aa4f562 100644
--- a/datafusion-cli/tests/cli_integration.rs
+++ b/datafusion-cli/tests/cli_integration.rs
@@ -402,7 +402,6 @@ async fn test_object_store_profiling() {
     let container = setup_minio_container().await;
 
     let mut settings = make_settings();
-    settings.set_snapshot_suffix("s3_url_fallback");
 
     // as the object store profiling contains timestamps and durations, we must
     // filter them out to have stable snapshots
@@ -412,18 +411,17 @@ async fn test_object_store_profiling() {
     // Output:
     // <TIMESTAMP> operation=Get duration=[DURATION] size=1006 path=cars.csv
     settings.add_filter(
-        r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?[+-]\d{2}:\d{2} operation=(Get|Put|Delete|List|Head) duration=\d+\.\d{6}s size=(\d+) path=(.*)",
-        "<TIMESTAMP> operation=$1 duration=[DURATION] size=$2 path=$3",
+        r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?[+-]\d{2}:\d{2} operation=(Get|Put|Delete|List|Head) duration=\d+\.\d{6}s (size=\d+\s+)?path=(.*)",
+        "<TIMESTAMP> operation=$1 duration=[DURATION] ${2}path=$3",
     );
 
-    // We also need to filter out the durations reported in the summary output
-    //
+    // We also need to filter out the summary statistics (anything with an 's' at the end)
     // Example line(s) to filter:
-    //
-    // duration min: 0.000729s
-    // duration max: 0.000729s
-    // duration avg: 0.000729s
-    settings.add_filter(r"duration (min|max|avg): \d+\.\d{6}s", "[SUMMARY_DURATION]");
+    // | Get       | duration | 5.000000s | 5.000000s | 5.000000s |           | 1         |
+    settings.add_filter(
+        r"\| (Get|Put|Delete|List|Head)( +)\| duration \| .*? \| .*? \| .*? \| .*? \| (.*?) \|",
+        "| $1$2 | duration | ...NORMALIZED...| $3 |",
+    );
 
     let _bound = settings.bind_to_scope();
 
@@ -434,8 +432,11 @@ LOCATION 's3://data/cars.csv';
 
 -- Initial query should not show any profiling as the object store is not instrumented yet
 SELECT * from CARS LIMIT 1;
-\object_store_profiling enabled
--- Query again to see the profiling output
+\object_store_profiling trace
+-- Query again to see the full profiling output
+SELECT * from CARS LIMIT 1;
+\object_store_profiling summary
+-- Query again to see the summarized profiling output
 SELECT * from CARS LIMIT 1;
 \object_store_profiling disabled
 -- Final query should not show any profiling as we disabled it again
diff --git a/datafusion-cli/tests/snapshots/object_store_profiling.snap b/datafusion-cli/tests/snapshots/object_store_profiling.snap
new file mode 100644
index 000000000000..029b07c324f5
--- /dev/null
+++ b/datafusion-cli/tests/snapshots/object_store_profiling.snap
@@ -0,0 +1,83 @@
+---
+source: datafusion-cli/tests/cli_integration.rs
+info:
+  program: datafusion-cli
+  args: []
+  env:
+    AWS_ACCESS_KEY_ID: TEST-DataFusionLogin
+    AWS_ALLOW_HTTP: "true"
+    AWS_ENDPOINT: "http://localhost:55057"
+    AWS_SECRET_ACCESS_KEY: TEST-DataFusionPassword
+  stdin: "\n    CREATE EXTERNAL TABLE CARS\nSTORED AS CSV\nLOCATION 's3://data/cars.csv';\n\n-- Initial query should not show any profiling as the object store is not instrumented yet\nSELECT * from CARS LIMIT 1;\n\\object_store_profiling trace\n-- Query again to see the full profiling output\nSELECT * from CARS LIMIT 1;\n\\object_store_profiling summary\n-- Query again to see the summarized profiling output\nSELECT * from CARS LIMIT 1;\n\\object_store_profiling disabled\n-- Final query should not show any profiling as we disabled it again\nSELECT * from CARS LIMIT 1;\n"
+snapshot_kind: text
+---
+success: true
+exit_code: 0
+----- stdout -----
+[CLI_VERSION]
+0 row(s) fetched. 
+[ELAPSED]
+
++-----+-------+---------------------+
+| car | speed | time                |
++-----+-------+---------------------+
+| red | 20.0  | 1996-04-12T12:05:03 |
++-----+-------+---------------------+
+1 row(s) fetched. 
+[ELAPSED]
+
+ObjectStore Profile mode set to Trace
++-----+-------+---------------------+
+| car | speed | time                |
++-----+-------+---------------------+
+| red | 20.0  | 1996-04-12T12:05:03 |
++-----+-------+---------------------+
+1 row(s) fetched. 
+[ELAPSED]
+
+Object Store Profiling
+Instrumented Object Store: instrument_mode: Trace, inner: AmazonS3(data)
+<TIMESTAMP> operation=Head duration=[DURATION] path=cars.csv
+<TIMESTAMP> operation=Get duration=[DURATION] size=1006 path=cars.csv
+
+Summaries:
++-----------+----------+-----------+-----------+-----------+-----------+-------+
+| Operation | Metric   | min       | max       | avg       | sum       | count |
++-----------+----------+-----------+-----------+-----------+-----------+-------+
+| Get        | duration | ...NORMALIZED...| 1     |
+| Get       | size     | 1006 B    | 1006 B    | 1006 B    | 1006 B    | 1     |
+| Head       | duration | ...NORMALIZED...| 1     |
+| Head      | size     |           |           |           |           | 1     |
++-----------+----------+-----------+-----------+-----------+-----------+-------+
+ObjectStore Profile mode set to Summary
++-----+-------+---------------------+
+| car | speed | time                |
++-----+-------+---------------------+
+| red | 20.0  | 1996-04-12T12:05:03 |
++-----+-------+---------------------+
+1 row(s) fetched. 
+[ELAPSED]
+
+Object Store Profiling
+Instrumented Object Store: instrument_mode: Summary, inner: AmazonS3(data)
+Summaries:
++-----------+----------+-----------+-----------+-----------+-----------+-------+
+| Operation | Metric   | min       | max       | avg       | sum       | count |
++-----------+----------+-----------+-----------+-----------+-----------+-------+
+| Get        | duration | ...NORMALIZED...| 1     |
+| Get       | size     | 1006 B    | 1006 B    | 1006 B    | 1006 B    | 1     |
+| Head       | duration | ...NORMALIZED...| 1     |
+| Head      | size     |           |           |           |           | 1     |
++-----------+----------+-----------+-----------+-----------+-----------+-------+
+ObjectStore Profile mode set to Disabled
++-----+-------+---------------------+
+| car | speed | time                |
++-----+-------+---------------------+
+| red | 20.0  | 1996-04-12T12:05:03 |
++-----+-------+---------------------+
+1 row(s) fetched. 
+[ELAPSED]
+
+\q
+
+----- stderr -----
diff --git a/datafusion-cli/tests/snapshots/object_store_profiling@s3_url_fallback.snap b/datafusion-cli/tests/snapshots/object_store_profiling@s3_url_fallback.snap
deleted file mode 100644
index 50c6cc8eab99..000000000000
--- a/datafusion-cli/tests/snapshots/object_store_profiling@s3_url_fallback.snap
+++ /dev/null
@@ -1,64 +0,0 @@
----
-source: datafusion-cli/tests/cli_integration.rs
-info:
-  program: datafusion-cli
-  args: []
-  env:
-    AWS_ACCESS_KEY_ID: TEST-DataFusionLogin
-    AWS_ALLOW_HTTP: "true"
-    AWS_ENDPOINT: "http://localhost:55031"
-    AWS_SECRET_ACCESS_KEY: TEST-DataFusionPassword
-  stdin: "\n    CREATE EXTERNAL TABLE CARS\nSTORED AS CSV\nLOCATION 's3://data/cars.csv';\n\n-- Initial query should not show any profiling as the object store is not instrumented yet\nSELECT * from CARS LIMIT 1;\n\\object_store_profiling enabled\n-- Query again to see the profiling output\nSELECT * from CARS LIMIT 1;\n\\object_store_profiling disabled\n-- Final query should not show any profiling as we disabled it again\nSELECT * from CARS LIMIT 1;\n"
-snapshot_kind: text
----
-success: true
-exit_code: 0
------ stdout -----
-[CLI_VERSION]
-0 row(s) fetched. 
-[ELAPSED]
-
-+-----+-------+---------------------+
-| car | speed | time                |
-+-----+-------+---------------------+
-| red | 20.0  | 1996-04-12T12:05:03 |
-+-----+-------+---------------------+
-1 row(s) fetched. 
-[ELAPSED]
-
-ObjectStore Profile mode set to Enabled
-+-----+-------+---------------------+
-| car | speed | time                |
-+-----+-------+---------------------+
-| red | 20.0  | 1996-04-12T12:05:03 |
-+-----+-------+---------------------+
-1 row(s) fetched. 
-[ELAPSED]
-
-Object Store Profiling
-Instrumented Object Store: instrument_mode: Enabled, inner: AmazonS3(data)
-<TIMESTAMP> operation=Get duration=[DURATION] size=1006 path=cars.csv
-
-Summaries:
-Get
-count: 1
-[SUMMARY_DURATION]
-[SUMMARY_DURATION]
-[SUMMARY_DURATION]
-size min: 1006 B
-size max: 1006 B
-size avg: 1006 B
-size sum: 1006 B
-
-ObjectStore Profile mode set to Disabled
-+-----+-------+---------------------+
-| car | speed | time                |
-+-----+-------+---------------------+
-| red | 20.0  | 1996-04-12T12:05:03 |
-+-----+-------+---------------------+
-1 row(s) fetched. 
-[ELAPSED]
-
-\q
-
------ stderr -----
diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml
index 68bb5376a1ac..bb0525e57753 100644
--- a/datafusion-examples/Cargo.toml
+++ b/datafusion-examples/Cargo.toml
@@ -81,7 +81,7 @@ serde_json = { workspace = true }
 tempfile = { workspace = true }
 test-utils = { path = "../test-utils" }
 tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot"] }
-tonic = "0.13.1"
+tonic = "0.14"
 tracing = { version = "0.1" }
 tracing-subscriber = { version = "0.3" }
 url = { workspace = true }
diff --git a/datafusion-examples/examples/custom_file_casts.rs b/datafusion-examples/examples/custom_file_casts.rs
index 65ca09682064..4d97ecd91dc6 100644
--- a/datafusion-examples/examples/custom_file_casts.rs
+++ b/datafusion-examples/examples/custom_file_casts.rs
@@ -25,7 +25,7 @@ use datafusion::common::not_impl_err;
 use datafusion::common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion::common::{Result, ScalarValue};
 use datafusion::datasource::listing::{
-    ListingTable, ListingTableConfig, ListingTableUrl,
+    ListingTable, ListingTableConfig, ListingTableConfigExt, ListingTableUrl,
 };
 use datafusion::execution::context::SessionContext;
 use datafusion::execution::object_store::ObjectStoreUrl;
diff --git a/datafusion-examples/examples/flight/flight_client.rs b/datafusion-examples/examples/flight/flight_client.rs
index e3237284b430..ff4b5903ad88 100644
--- a/datafusion-examples/examples/flight/flight_client.rs
+++ b/datafusion-examples/examples/flight/flight_client.rs
@@ -17,6 +17,7 @@
 
 use std::collections::HashMap;
 use std::sync::Arc;
+use tonic::transport::Endpoint;
 
 use datafusion::arrow::datatypes::Schema;
 
@@ -34,7 +35,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     let testdata = datafusion::test_util::parquet_test_data();
 
     // Create Flight client
-    let mut client = FlightServiceClient::connect("http://localhost:50051").await?;
+    let endpoint = Endpoint::new("http://localhost:50051")?;
+    let channel = endpoint.connect().await?;
+    let mut client = FlightServiceClient::new(channel);
 
     // Call get_schema to get the schema of a Parquet file
     let request = tonic::Request::new(FlightDescriptor {
diff --git a/datafusion-examples/examples/flight/flight_server.rs b/datafusion-examples/examples/flight/flight_server.rs
index 58bfb7a341c1..22265e415fbd 100644
--- a/datafusion-examples/examples/flight/flight_server.rs
+++ b/datafusion-examples/examples/flight/flight_server.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::ipc::writer::{DictionaryTracker, IpcDataGenerator};
+use arrow::ipc::writer::{CompressionContext, DictionaryTracker, IpcDataGenerator};
 use std::sync::Arc;
 
 use arrow_flight::{PollInfo, SchemaAsIpc};
@@ -106,6 +106,7 @@ impl FlightService for FlightServiceImpl {
 
                 // add an initial FlightData message that sends schema
                 let options = arrow::ipc::writer::IpcWriteOptions::default();
+                let mut compression_context = CompressionContext::default();
                 let schema_flight_data = SchemaAsIpc::new(&schema, &options);
 
                 let mut flights = vec![FlightData::from(schema_flight_data)];
@@ -115,7 +116,7 @@ impl FlightService for FlightServiceImpl {
 
                 for batch in &results {
                     let (flight_dictionaries, flight_batch) = encoder
-                        .encoded_batch(batch, &mut tracker, &options)
+                        .encode(batch, &mut tracker, &options, &mut compression_context)
                         .map_err(|e: ArrowError| Status::internal(e.to_string()))?;
 
                     flights.extend(flight_dictionaries.into_iter().map(Into::into));
diff --git a/datafusion-examples/examples/json_shredding.rs b/datafusion-examples/examples/json_shredding.rs
index c7d0146a001f..a2e83bc9510a 100644
--- a/datafusion-examples/examples/json_shredding.rs
+++ b/datafusion-examples/examples/json_shredding.rs
@@ -27,7 +27,7 @@ use datafusion::common::tree_node::{
 };
 use datafusion::common::{assert_contains, exec_datafusion_err, Result};
 use datafusion::datasource::listing::{
-    ListingTable, ListingTableConfig, ListingTableUrl,
+    ListingTable, ListingTableConfig, ListingTableConfigExt, ListingTableUrl,
 };
 use datafusion::execution::context::SessionContext;
 use datafusion::execution::object_store::ObjectStoreUrl;
diff --git a/datafusion-examples/examples/parquet_encrypted.rs b/datafusion-examples/examples/parquet_encrypted.rs
index e9e239b7a1c3..690d9f2a5f14 100644
--- a/datafusion-examples/examples/parquet_encrypted.rs
+++ b/datafusion-examples/examples/parquet_encrypted.rs
@@ -16,12 +16,13 @@
 // under the License.
 
 use datafusion::common::DataFusionError;
-use datafusion::config::TableParquetOptions;
+use datafusion::config::{ConfigFileEncryptionProperties, TableParquetOptions};
 use datafusion::dataframe::{DataFrame, DataFrameWriteOptions};
 use datafusion::logical_expr::{col, lit};
 use datafusion::parquet::encryption::decrypt::FileDecryptionProperties;
 use datafusion::parquet::encryption::encrypt::FileEncryptionProperties;
 use datafusion::prelude::{ParquetReadOptions, SessionContext};
+use std::sync::Arc;
 use tempfile::TempDir;
 
 #[tokio::main]
@@ -55,7 +56,7 @@ async fn main() -> datafusion::common::Result<()> {
 
     // Write encrypted parquet
     let mut options = TableParquetOptions::default();
-    options.crypto.file_encryption = Some((&encrypt).into());
+    options.crypto.file_encryption = Some(ConfigFileEncryptionProperties::from(&encrypt));
     parquet_df
         .write_parquet(
             tempfile_str.as_str(),
@@ -100,7 +101,8 @@ async fn query_dataframe(df: &DataFrame) -> Result<(), DataFusionError> {
 // Setup encryption and decryption properties
 fn setup_encryption(
     parquet_df: &DataFrame,
-) -> Result<(FileEncryptionProperties, FileDecryptionProperties), DataFusionError> {
+) -> Result<(Arc<FileEncryptionProperties>, Arc<FileDecryptionProperties>), DataFusionError>
+{
     let schema = parquet_df.schema();
     let footer_key = b"0123456789012345".to_vec(); // 128bit/16
     let column_key = b"1234567890123450".to_vec(); // 128bit/16
diff --git a/datafusion-examples/examples/parquet_encrypted_with_kms.rs b/datafusion-examples/examples/parquet_encrypted_with_kms.rs
index 19b0e8d0b199..45bfd183773a 100644
--- a/datafusion-examples/examples/parquet_encrypted_with_kms.rs
+++ b/datafusion-examples/examples/parquet_encrypted_with_kms.rs
@@ -226,7 +226,7 @@ impl EncryptionFactory for TestEncryptionFactory {
         options: &EncryptionFactoryOptions,
         schema: &SchemaRef,
         _file_path: &Path,
-    ) -> Result<Option<FileEncryptionProperties>> {
+    ) -> Result<Option<Arc<FileEncryptionProperties>>> {
         let config: EncryptionConfig = options.to_extension_options()?;
 
         // Generate a random encryption key for this file.
@@ -268,7 +268,7 @@ impl EncryptionFactory for TestEncryptionFactory {
         &self,
         _options: &EncryptionFactoryOptions,
         _file_path: &Path,
-    ) -> Result<Option<FileDecryptionProperties>> {
+    ) -> Result<Option<Arc<FileDecryptionProperties>>> {
         let decryption_properties =
             FileDecryptionProperties::with_key_retriever(Arc::new(TestKeyRetriever {}))
                 .build()?;
diff --git a/datafusion-examples/examples/remote_catalog.rs b/datafusion-examples/examples/remote_catalog.rs
index 70c0963545e0..74575554ec0a 100644
--- a/datafusion-examples/examples/remote_catalog.rs
+++ b/datafusion-examples/examples/remote_catalog.rs
@@ -75,8 +75,8 @@ async fn main() -> Result<()> {
     let state = ctx.state();
 
     // First, parse the SQL (but don't plan it / resolve any table references)
-    let dialect = state.config().options().sql_parser.dialect.as_str();
-    let statement = state.sql_to_statement(sql, dialect)?;
+    let dialect = state.config().options().sql_parser.dialect;
+    let statement = state.sql_to_statement(sql, &dialect)?;
 
     // Find all `TableReferences` in the parsed queries. These correspond to the
     // tables referred to by the query (in this case
diff --git a/datafusion-testing b/datafusion-testing
index 905df5f65cc9..eccb0e4a4263 160000
--- a/datafusion-testing
+++ b/datafusion-testing
@@ -1 +1 @@
-Subproject commit 905df5f65cc9d0851719c21f5a4dd5cd77621f19
+Subproject commit eccb0e4a426344ef3faf534cd60e02e9c3afd3ac
diff --git a/datafusion/catalog-listing/Cargo.toml b/datafusion/catalog-listing/Cargo.toml
index 69f952ae9840..4eaeed675a20 100644
--- a/datafusion/catalog-listing/Cargo.toml
+++ b/datafusion/catalog-listing/Cargo.toml
@@ -39,14 +39,17 @@ datafusion-datasource = { workspace = true }
 datafusion-execution = { workspace = true }
 datafusion-expr = { workspace = true }
 datafusion-physical-expr = { workspace = true }
+datafusion-physical-expr-adapter = { workspace = true }
 datafusion-physical-expr-common = { workspace = true }
 datafusion-physical-plan = { workspace = true }
 futures = { workspace = true }
+itertools = { workspace = true }
 log = { workspace = true }
 object_store = { workspace = true }
 tokio = { workspace = true }
 
 [dev-dependencies]
+datafusion-datasource-parquet = { workspace = true }
 
 [lints]
 workspace = true
@@ -54,3 +57,6 @@ workspace = true
 [lib]
 name = "datafusion_catalog_listing"
 path = "src/mod.rs"
+
+[package.metadata.cargo-machete]
+ignored = ["datafusion-datasource-parquet"]
diff --git a/datafusion/catalog-listing/src/config.rs b/datafusion/catalog-listing/src/config.rs
new file mode 100644
index 000000000000..90f44de4fdbc
--- /dev/null
+++ b/datafusion/catalog-listing/src/config.rs
@@ -0,0 +1,360 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::options::ListingOptions;
+use arrow::datatypes::{DataType, Schema, SchemaRef};
+use datafusion_catalog::Session;
+use datafusion_common::{config_err, internal_err};
+use datafusion_datasource::file_compression_type::FileCompressionType;
+use datafusion_datasource::schema_adapter::SchemaAdapterFactory;
+use datafusion_datasource::ListingTableUrl;
+use datafusion_physical_expr_adapter::PhysicalExprAdapterFactory;
+use std::str::FromStr;
+use std::sync::Arc;
+
+/// Indicates the source of the schema for a [`crate::ListingTable`]
+// PartialEq required for assert_eq! in tests
+#[derive(Debug, Clone, Copy, PartialEq, Default)]
+pub enum SchemaSource {
+    /// Schema is not yet set (initial state)
+    #[default]
+    Unset,
+    /// Schema was inferred from first table_path
+    Inferred,
+    /// Schema was specified explicitly via with_schema
+    Specified,
+}
+
+/// Configuration for creating a [`crate::ListingTable`]
+///
+/// # Schema Evolution Support
+///
+/// This configuration supports schema evolution through the optional
+/// [`SchemaAdapterFactory`]. You might want to override the default factory when you need:
+///
+/// - **Type coercion requirements**: When you need custom logic for converting between
+///   different Arrow data types (e.g., Int32 ↔ Int64, Utf8 ↔ LargeUtf8)
+/// - **Column mapping**: You need to map columns with a legacy name to a new name
+/// - **Custom handling of missing columns**: By default they are filled in with nulls, but you may e.g. want to fill them in with `0` or `""`.
+///
+/// If not specified, a [`datafusion_datasource::schema_adapter::DefaultSchemaAdapterFactory`]
+/// will be used, which handles basic schema compatibility cases.
+///
+#[derive(Debug, Clone, Default)]
+pub struct ListingTableConfig {
+    /// Paths on the `ObjectStore` for creating [`crate::ListingTable`].
+    /// They should share the same schema and object store.
+    pub table_paths: Vec<ListingTableUrl>,
+    /// Optional `SchemaRef` for the to be created [`crate::ListingTable`].
+    ///
+    /// See details on [`ListingTableConfig::with_schema`]
+    pub file_schema: Option<SchemaRef>,
+    /// Optional [`ListingOptions`] for the to be created [`crate::ListingTable`].
+    ///
+    /// See details on [`ListingTableConfig::with_listing_options`]
+    pub options: Option<ListingOptions>,
+    /// Tracks the source of the schema information
+    pub(crate) schema_source: SchemaSource,
+    /// Optional [`SchemaAdapterFactory`] for creating schema adapters
+    pub(crate) schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
+    /// Optional [`PhysicalExprAdapterFactory`] for creating physical expression adapters
+    pub(crate) expr_adapter_factory: Option<Arc<dyn PhysicalExprAdapterFactory>>,
+}
+
+impl ListingTableConfig {
+    /// Creates new [`ListingTableConfig`] for reading the specified URL
+    pub fn new(table_path: ListingTableUrl) -> Self {
+        Self {
+            table_paths: vec![table_path],
+            ..Default::default()
+        }
+    }
+
+    /// Creates new [`ListingTableConfig`] with multiple table paths.
+    ///
+    ///  See `ListingTableConfigExt::infer_options` for details on what happens with multiple paths
+    pub fn new_with_multi_paths(table_paths: Vec<ListingTableUrl>) -> Self {
+        Self {
+            table_paths,
+            ..Default::default()
+        }
+    }
+
+    /// Returns the source of the schema for this configuration
+    pub fn schema_source(&self) -> SchemaSource {
+        self.schema_source
+    }
+    /// Set the `schema` for the overall [`crate::ListingTable`]
+    ///
+    /// [`crate::ListingTable`] will automatically coerce, when possible, the schema
+    /// for individual files to match this schema.
+    ///
+    /// If a schema is not provided, it is inferred using
+    /// [`Self::infer_schema`].
+    ///
+    /// If the schema is provided, it must contain only the fields in the file
+    /// without the table partitioning columns.
+    ///
+    /// # Example: Specifying Table Schema
+    /// ```rust
+    /// # use std::sync::Arc;
+    /// # use datafusion_catalog_listing::{ListingTableConfig, ListingOptions};
+    /// # use datafusion_datasource::ListingTableUrl;
+    /// # use datafusion_datasource_parquet::file_format::ParquetFormat;
+    /// # use arrow::datatypes::{Schema, Field, DataType};
+    /// # let table_paths = ListingTableUrl::parse("file:///path/to/data").unwrap();
+    /// # let listing_options = ListingOptions::new(Arc::new(ParquetFormat::default()));
+    /// let schema = Arc::new(Schema::new(vec![
+    ///     Field::new("id", DataType::Int64, false),
+    ///     Field::new("name", DataType::Utf8, true),
+    /// ]));
+    ///
+    /// let config = ListingTableConfig::new(table_paths)
+    ///     .with_listing_options(listing_options)  // Set options first
+    ///     .with_schema(schema);                    // Then set schema
+    /// ```
+    pub fn with_schema(self, schema: SchemaRef) -> Self {
+        // Note: We preserve existing options state, but downstream code may expect
+        // options to be set. Consider calling with_listing_options() or infer_options()
+        // before operations that require options to be present.
+        debug_assert!(
+            self.options.is_some() || cfg!(test),
+            "ListingTableConfig::with_schema called without options set. \
+             Consider calling with_listing_options() or infer_options() first to avoid panics in downstream code."
+        );
+
+        Self {
+            file_schema: Some(schema),
+            schema_source: SchemaSource::Specified,
+            ..self
+        }
+    }
+
+    /// Add `listing_options` to [`ListingTableConfig`]
+    ///
+    /// If not provided, format and other options are inferred via
+    /// `ListingTableConfigExt::infer_options`.
+    ///
+    /// # Example: Configuring Parquet Files with Custom Options
+    /// ```rust
+    /// # use std::sync::Arc;
+    /// # use datafusion_catalog_listing::{ListingTableConfig, ListingOptions};
+    /// # use datafusion_datasource::ListingTableUrl;
+    /// # use datafusion_datasource_parquet::file_format::ParquetFormat;
+    /// # let table_paths = ListingTableUrl::parse("file:///path/to/data").unwrap();
+    /// let options = ListingOptions::new(Arc::new(ParquetFormat::default()))
+    ///     .with_file_extension(".parquet")
+    ///     .with_collect_stat(true);
+    ///
+    /// let config = ListingTableConfig::new(table_paths)
+    ///     .with_listing_options(options);  // Configure file format and options
+    /// ```
+    pub fn with_listing_options(self, listing_options: ListingOptions) -> Self {
+        // Note: This method properly sets options, but be aware that downstream
+        // methods like infer_schema() and try_new() require both schema and options
+        // to be set to function correctly.
+        debug_assert!(
+            !self.table_paths.is_empty() || cfg!(test),
+            "ListingTableConfig::with_listing_options called without table_paths set. \
+             Consider calling new() or new_with_multi_paths() first to establish table paths."
+        );
+
+        Self {
+            options: Some(listing_options),
+            ..self
+        }
+    }
+
+    /// Returns a tuple of `(file_extension, optional compression_extension)`
+    ///
+    /// For example a path ending with blah.test.csv.gz returns `("csv", Some("gz"))`
+    /// For example a path ending with blah.test.csv returns `("csv", None)`
+    pub fn infer_file_extension_and_compression_type(
+        path: &str,
+    ) -> datafusion_common::Result<(String, Option<String>)> {
+        let mut exts = path.rsplit('.');
+
+        let split = exts.next().unwrap_or("");
+
+        let file_compression_type = FileCompressionType::from_str(split)
+            .unwrap_or(FileCompressionType::UNCOMPRESSED);
+
+        if file_compression_type.is_compressed() {
+            let split2 = exts.next().unwrap_or("");
+            Ok((split2.to_string(), Some(split.to_string())))
+        } else {
+            Ok((split.to_string(), None))
+        }
+    }
+
+    /// Infer the [`SchemaRef`] based on `table_path`s.
+    ///
+    /// This method infers the table schema using the first `table_path`.
+    /// See [`ListingOptions::infer_schema`] for more details
+    ///
+    /// # Errors
+    /// * if `self.options` is not set. See [`Self::with_listing_options`]
+    pub async fn infer_schema(
+        self,
+        state: &dyn Session,
+    ) -> datafusion_common::Result<Self> {
+        match self.options {
+            Some(options) => {
+                let ListingTableConfig {
+                    table_paths,
+                    file_schema,
+                    options: _,
+                    schema_source,
+                    schema_adapter_factory,
+                    expr_adapter_factory: physical_expr_adapter_factory,
+                } = self;
+
+                let (schema, new_schema_source) = match file_schema {
+                    Some(schema) => (schema, schema_source), // Keep existing source if schema exists
+                    None => {
+                        if let Some(url) = table_paths.first() {
+                            (
+                                options.infer_schema(state, url).await?,
+                                SchemaSource::Inferred,
+                            )
+                        } else {
+                            (Arc::new(Schema::empty()), SchemaSource::Inferred)
+                        }
+                    }
+                };
+
+                Ok(Self {
+                    table_paths,
+                    file_schema: Some(schema),
+                    options: Some(options),
+                    schema_source: new_schema_source,
+                    schema_adapter_factory,
+                    expr_adapter_factory: physical_expr_adapter_factory,
+                })
+            }
+            None => internal_err!("No `ListingOptions` set for inferring schema"),
+        }
+    }
+
+    /// Infer the partition columns from `table_paths`.
+    ///
+    /// # Errors
+    /// * if `self.options` is not set. See [`Self::with_listing_options`]
+    pub async fn infer_partitions_from_path(
+        self,
+        state: &dyn Session,
+    ) -> datafusion_common::Result<Self> {
+        match self.options {
+            Some(options) => {
+                let Some(url) = self.table_paths.first() else {
+                    return config_err!("No table path found");
+                };
+                let partitions = options
+                    .infer_partitions(state, url)
+                    .await?
+                    .into_iter()
+                    .map(|col_name| {
+                        (
+                            col_name,
+                            DataType::Dictionary(
+                                Box::new(DataType::UInt16),
+                                Box::new(DataType::Utf8),
+                            ),
+                        )
+                    })
+                    .collect::<Vec<_>>();
+                let options = options.with_table_partition_cols(partitions);
+                Ok(Self {
+                    table_paths: self.table_paths,
+                    file_schema: self.file_schema,
+                    options: Some(options),
+                    schema_source: self.schema_source,
+                    schema_adapter_factory: self.schema_adapter_factory,
+                    expr_adapter_factory: self.expr_adapter_factory,
+                })
+            }
+            None => config_err!("No `ListingOptions` set for inferring schema"),
+        }
+    }
+
+    /// Set the [`SchemaAdapterFactory`] for the [`crate::ListingTable`]
+    ///
+    /// The schema adapter factory is used to create schema adapters that can
+    /// handle schema evolution and type conversions when reading files with
+    /// different schemas than the table schema.
+    ///
+    /// If not provided, a default schema adapter factory will be used.
+    ///
+    /// # Example: Custom Schema Adapter for Type Coercion
+    /// ```rust
+    /// # use std::sync::Arc;
+    /// # use datafusion_catalog_listing::{ListingTableConfig, ListingOptions};
+    /// # use datafusion_datasource::schema_adapter::{SchemaAdapterFactory, SchemaAdapter};
+    /// # use datafusion_datasource::ListingTableUrl;
+    /// # use datafusion_datasource_parquet::file_format::ParquetFormat;
+    /// # use arrow::datatypes::{SchemaRef, Schema, Field, DataType};
+    /// #
+    /// # #[derive(Debug)]
+    /// # struct MySchemaAdapterFactory;
+    /// # impl SchemaAdapterFactory for MySchemaAdapterFactory {
+    /// #     fn create(&self, _projected_table_schema: SchemaRef, _file_schema: SchemaRef) -> Box<dyn SchemaAdapter> {
+    /// #         unimplemented!()
+    /// #     }
+    /// # }
+    /// # let table_paths = ListingTableUrl::parse("file:///path/to/data").unwrap();
+    /// # let listing_options = ListingOptions::new(Arc::new(ParquetFormat::default()));
+    /// # let table_schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int64, false)]));
+    /// let config = ListingTableConfig::new(table_paths)
+    ///     .with_listing_options(listing_options)
+    ///     .with_schema(table_schema)
+    ///     .with_schema_adapter_factory(Arc::new(MySchemaAdapterFactory));
+    /// ```
+    pub fn with_schema_adapter_factory(
+        self,
+        schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
+    ) -> Self {
+        Self {
+            schema_adapter_factory: Some(schema_adapter_factory),
+            ..self
+        }
+    }
+
+    /// Get the [`SchemaAdapterFactory`] for this configuration
+    pub fn schema_adapter_factory(&self) -> Option<&Arc<dyn SchemaAdapterFactory>> {
+        self.schema_adapter_factory.as_ref()
+    }
+
+    /// Set the [`PhysicalExprAdapterFactory`] for the [`crate::ListingTable`]
+    ///
+    /// The expression adapter factory is used to create physical expression adapters that can
+    /// handle schema evolution and type conversions when evaluating expressions
+    /// with different schemas than the table schema.
+    ///
+    /// If not provided, a default physical expression adapter factory will be used unless a custom
+    /// `SchemaAdapterFactory` is set, in which case only the `SchemaAdapterFactory` will be used.
+    ///
+    /// See <https://github.com/apache/datafusion/issues/16800> for details on this transition.
+    pub fn with_expr_adapter_factory(
+        self,
+        expr_adapter_factory: Arc<dyn PhysicalExprAdapterFactory>,
+    ) -> Self {
+        Self {
+            expr_adapter_factory: Some(expr_adapter_factory),
+            ..self
+        }
+    }
+}
diff --git a/datafusion/catalog-listing/src/helpers.rs b/datafusion/catalog-listing/src/helpers.rs
index 00e9c71df348..82cc36867939 100644
--- a/datafusion/catalog-listing/src/helpers.rs
+++ b/datafusion/catalog-listing/src/helpers.rs
@@ -156,6 +156,7 @@ pub fn split_files(
     chunks
 }
 
+#[derive(Debug)]
 pub struct Partition {
     /// The path to the partition, including the table prefix
     path: Path,
@@ -245,7 +246,16 @@ async fn prune_partitions(
     partition_cols: &[(String, DataType)],
 ) -> Result<Vec<Partition>> {
     if filters.is_empty() {
-        return Ok(partitions);
+        // prune partitions which don't contain the partition columns
+        return Ok(partitions
+            .into_iter()
+            .filter(|p| {
+                let cols = partition_cols.iter().map(|x| x.0.as_str());
+                !parse_partitions_for_path(table_path, &p.path, cols)
+                    .unwrap_or_default()
+                    .is_empty()
+            })
+            .collect());
     }
 
     let mut builders: Vec<_> = (0..partition_cols.len())
@@ -432,6 +442,7 @@ pub async fn pruned_partition_list<'a>(
     }
 
     let partition_prefix = evaluate_partition_prefix(partition_cols, filters);
+
     let partitions =
         list_partitions(store, table_path, partition_cols.len(), partition_prefix)
             .await?;
@@ -502,12 +513,12 @@ where
     let subpath = table_path.strip_prefix(file_path)?;
 
     let mut part_values = vec![];
-    for (part, pn) in subpath.zip(table_partition_cols) {
+    for (part, expected_partition) in subpath.zip(table_partition_cols) {
         match part.split_once('=') {
-            Some((name, val)) if name == pn => part_values.push(val),
+            Some((name, val)) if name == expected_partition => part_values.push(val),
             _ => {
                 debug!(
-                    "Ignoring file: file_path='{file_path}', table_path='{table_path}', part='{part}', partition_col='{pn}'",
+                    "Ignoring file: file_path='{file_path}', table_path='{table_path}', part='{part}', partition_col='{expected_partition}'",
                 );
                 return None;
             }
@@ -594,6 +605,8 @@ mod tests {
             ("tablepath/mypartition=val1/notparquetfile", 100),
             ("tablepath/mypartition=val1/ignoresemptyfile.parquet", 0),
             ("tablepath/file.parquet", 100),
+            ("tablepath/notapartition/file.parquet", 100),
+            ("tablepath/notmypartition=val1/file.parquet", 100),
         ]);
         let filter = Expr::eq(col("mypartition"), lit("val1"));
         let pruned = pruned_partition_list(
@@ -619,6 +632,8 @@ mod tests {
             ("tablepath/mypartition=val2/file.parquet", 100),
             ("tablepath/mypartition=val1/ignoresemptyfile.parquet", 0),
             ("tablepath/mypartition=val1/other=val3/file.parquet", 100),
+            ("tablepath/notapartition/file.parquet", 100),
+            ("tablepath/notmypartition=val1/file.parquet", 100),
         ]);
         let filter = Expr::eq(col("mypartition"), lit("val1"));
         let pruned = pruned_partition_list(
diff --git a/datafusion/catalog-listing/src/mod.rs b/datafusion/catalog-listing/src/mod.rs
index 1322577b207a..90d04b46b806 100644
--- a/datafusion/catalog-listing/src/mod.rs
+++ b/datafusion/catalog-listing/src/mod.rs
@@ -24,4 +24,11 @@
 // https://github.com/apache/datafusion/issues/11143
 #![cfg_attr(not(test), deny(clippy::clone_on_ref_ptr))]
 
+mod config;
 pub mod helpers;
+mod options;
+mod table;
+
+pub use config::{ListingTableConfig, SchemaSource};
+pub use options::ListingOptions;
+pub use table::ListingTable;
diff --git a/datafusion/catalog-listing/src/options.rs b/datafusion/catalog-listing/src/options.rs
new file mode 100644
index 000000000000..3cbf3573e951
--- /dev/null
+++ b/datafusion/catalog-listing/src/options.rs
@@ -0,0 +1,411 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::datatypes::{DataType, SchemaRef};
+use datafusion_catalog::Session;
+use datafusion_common::plan_err;
+use datafusion_datasource::file_format::FileFormat;
+use datafusion_datasource::ListingTableUrl;
+use datafusion_execution::config::SessionConfig;
+use datafusion_expr::SortExpr;
+use futures::StreamExt;
+use futures::{future, TryStreamExt};
+use itertools::Itertools;
+use std::sync::Arc;
+
+/// Options for creating a [`crate::ListingTable`]
+#[derive(Clone, Debug)]
+pub struct ListingOptions {
+    /// A suffix on which files should be filtered (leave empty to
+    /// keep all files on the path)
+    pub file_extension: String,
+    /// The file format
+    pub format: Arc<dyn FileFormat>,
+    /// The expected partition column names in the folder structure.
+    /// See [Self::with_table_partition_cols] for details
+    pub table_partition_cols: Vec<(String, DataType)>,
+    /// Set true to try to guess statistics from the files.
+    /// This can add a lot of overhead as it will usually require files
+    /// to be opened and at least partially parsed.
+    pub collect_stat: bool,
+    /// Group files to avoid that the number of partitions exceeds
+    /// this limit
+    pub target_partitions: usize,
+    /// Optional pre-known sort order(s). Must be `SortExpr`s.
+    ///
+    /// DataFusion may take advantage of this ordering to omit sorts
+    /// or use more efficient algorithms. Currently sortedness must be
+    /// provided if it is known by some external mechanism, but may in
+    /// the future be automatically determined, for example using
+    /// parquet metadata.
+    ///
+    /// See <https://github.com/apache/datafusion/issues/4177>
+    ///
+    /// NOTE: This attribute stores all equivalent orderings (the outer `Vec`)
+    ///       where each ordering consists of an individual lexicographic
+    ///       ordering (encapsulated by a `Vec<Expr>`). If there aren't
+    ///       multiple equivalent orderings, the outer `Vec` will have a
+    ///       single element.
+    pub file_sort_order: Vec<Vec<SortExpr>>,
+}
+
+impl ListingOptions {
+    /// Creates an options instance with the given format
+    /// Default values:
+    /// - use default file extension filter
+    /// - no input partition to discover
+    /// - one target partition
+    /// - do not collect statistics
+    pub fn new(format: Arc<dyn FileFormat>) -> Self {
+        Self {
+            file_extension: format.get_ext(),
+            format,
+            table_partition_cols: vec![],
+            collect_stat: false,
+            target_partitions: 1,
+            file_sort_order: vec![],
+        }
+    }
+
+    /// Set options from [`SessionConfig`] and returns self.
+    ///
+    /// Currently this sets `target_partitions` and `collect_stat`
+    /// but if more options are added in the future that need to be coordinated
+    /// they will be synchronized through this method.
+    pub fn with_session_config_options(mut self, config: &SessionConfig) -> Self {
+        self = self.with_target_partitions(config.target_partitions());
+        self = self.with_collect_stat(config.collect_statistics());
+        self
+    }
+
+    /// Set file extension on [`ListingOptions`] and returns self.
+    ///
+    /// # Example
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use datafusion_catalog_listing::ListingOptions;
+    /// # use datafusion_datasource_parquet::file_format::ParquetFormat;
+    ///
+    /// let listing_options = ListingOptions::new(Arc::new(
+    ///     ParquetFormat::default()
+    ///   ))
+    ///   .with_file_extension(".parquet");
+    ///
+    /// assert_eq!(listing_options.file_extension, ".parquet");
+    /// ```
+    pub fn with_file_extension(mut self, file_extension: impl Into<String>) -> Self {
+        self.file_extension = file_extension.into();
+        self
+    }
+
+    /// Optionally set file extension on [`ListingOptions`] and returns self.
+    ///
+    /// If `file_extension` is `None`, the file extension will not be changed
+    ///
+    /// # Example
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use datafusion_catalog_listing::ListingOptions;
+    /// # use datafusion_datasource_parquet::file_format::ParquetFormat;
+    ///
+    /// let extension = Some(".parquet");
+    /// let listing_options = ListingOptions::new(Arc::new(
+    ///     ParquetFormat::default()
+    ///   ))
+    ///   .with_file_extension_opt(extension);
+    ///
+    /// assert_eq!(listing_options.file_extension, ".parquet");
+    /// ```
+    pub fn with_file_extension_opt<S>(mut self, file_extension: Option<S>) -> Self
+    where
+        S: Into<String>,
+    {
+        if let Some(file_extension) = file_extension {
+            self.file_extension = file_extension.into();
+        }
+        self
+    }
+
+    /// Set `table partition columns` on [`ListingOptions`] and returns self.
+    ///
+    /// "partition columns," used to support [Hive Partitioning], are
+    /// columns added to the data that is read, based on the folder
+    /// structure where the data resides.
+    ///
+    /// For example, give the following files in your filesystem:
+    ///
+    /// ```text
+    /// /mnt/nyctaxi/year=2022/month=01/tripdata.parquet
+    /// /mnt/nyctaxi/year=2021/month=12/tripdata.parquet
+    /// /mnt/nyctaxi/year=2021/month=11/tripdata.parquet
+    /// ```
+    ///
+    /// A [`crate::ListingTable`] created at `/mnt/nyctaxi/` with partition
+    /// columns "year" and "month" will include new `year` and `month`
+    /// columns while reading the files. The `year` column would have
+    /// value `2022` and the `month` column would have value `01` for
+    /// the rows read from
+    /// `/mnt/nyctaxi/year=2022/month=01/tripdata.parquet`
+    ///
+    ///# Notes
+    ///
+    /// - If only one level (e.g. `year` in the example above) is
+    ///   specified, the other levels are ignored but the files are
+    ///   still read.
+    ///
+    /// - Files that don't follow this partitioning scheme will be
+    ///   ignored.
+    ///
+    /// - Since the columns have the same value for all rows read from
+    ///   each individual file (such as dates), they are typically
+    ///   dictionary encoded for efficiency. You may use
+    ///   [`wrap_partition_type_in_dict`] to request a
+    ///   dictionary-encoded type.
+    ///
+    /// - The partition columns are solely extracted from the file path. Especially they are NOT part of the parquet files itself.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use arrow::datatypes::DataType;
+    /// # use datafusion_expr::col;
+    /// # use datafusion_catalog_listing::ListingOptions;
+    /// # use datafusion_datasource_parquet::file_format::ParquetFormat;
+    ///
+    /// // listing options for files with paths such as  `/mnt/data/col_a=x/col_b=y/data.parquet`
+    /// // `col_a` and `col_b` will be included in the data read from those files
+    /// let listing_options = ListingOptions::new(Arc::new(
+    ///     ParquetFormat::default()
+    ///   ))
+    ///   .with_table_partition_cols(vec![("col_a".to_string(), DataType::Utf8),
+    ///       ("col_b".to_string(), DataType::Utf8)]);
+    ///
+    /// assert_eq!(listing_options.table_partition_cols, vec![("col_a".to_string(), DataType::Utf8),
+    ///     ("col_b".to_string(), DataType::Utf8)]);
+    /// ```
+    ///
+    /// [Hive Partitioning]: https://docs.cloudera.com/HDPDocuments/HDP2/HDP-2.1.3/bk_system-admin-guide/content/hive_partitioned_tables.html
+    /// [`wrap_partition_type_in_dict`]: datafusion_datasource::file_scan_config::wrap_partition_type_in_dict
+    pub fn with_table_partition_cols(
+        mut self,
+        table_partition_cols: Vec<(String, DataType)>,
+    ) -> Self {
+        self.table_partition_cols = table_partition_cols;
+        self
+    }
+
+    /// Set stat collection on [`ListingOptions`] and returns self.
+    ///
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use datafusion_catalog_listing::ListingOptions;
+    /// # use datafusion_datasource_parquet::file_format::ParquetFormat;
+    ///
+    /// let listing_options = ListingOptions::new(Arc::new(
+    ///     ParquetFormat::default()
+    ///   ))
+    ///   .with_collect_stat(true);
+    ///
+    /// assert_eq!(listing_options.collect_stat, true);
+    /// ```
+    pub fn with_collect_stat(mut self, collect_stat: bool) -> Self {
+        self.collect_stat = collect_stat;
+        self
+    }
+
+    /// Set number of target partitions on [`ListingOptions`] and returns self.
+    ///
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use datafusion_catalog_listing::ListingOptions;
+    /// # use datafusion_datasource_parquet::file_format::ParquetFormat;
+    ///
+    /// let listing_options = ListingOptions::new(Arc::new(
+    ///     ParquetFormat::default()
+    ///   ))
+    ///   .with_target_partitions(8);
+    ///
+    /// assert_eq!(listing_options.target_partitions, 8);
+    /// ```
+    pub fn with_target_partitions(mut self, target_partitions: usize) -> Self {
+        self.target_partitions = target_partitions;
+        self
+    }
+
+    /// Set file sort order on [`ListingOptions`] and returns self.
+    ///
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use datafusion_expr::col;
+    /// # use datafusion_catalog_listing::ListingOptions;
+    /// # use datafusion_datasource_parquet::file_format::ParquetFormat;
+    ///
+    ///  // Tell datafusion that the files are sorted by column "a"
+    ///  let file_sort_order = vec![vec![
+    ///    col("a").sort(true, true)
+    ///  ]];
+    ///
+    /// let listing_options = ListingOptions::new(Arc::new(
+    ///     ParquetFormat::default()
+    ///   ))
+    ///   .with_file_sort_order(file_sort_order.clone());
+    ///
+    /// assert_eq!(listing_options.file_sort_order, file_sort_order);
+    /// ```
+    pub fn with_file_sort_order(mut self, file_sort_order: Vec<Vec<SortExpr>>) -> Self {
+        self.file_sort_order = file_sort_order;
+        self
+    }
+
+    /// Infer the schema of the files at the given path on the provided object store.
+    ///
+    /// If the table_path contains one or more files (i.e. it is a directory /
+    /// prefix of files) their schema is merged by calling [`FileFormat::infer_schema`]
+    ///
+    /// Note: The inferred schema does not include any partitioning columns.
+    ///
+    /// This method is called as part of creating a [`crate::ListingTable`].
+    pub async fn infer_schema<'a>(
+        &'a self,
+        state: &dyn Session,
+        table_path: &'a ListingTableUrl,
+    ) -> datafusion_common::Result<SchemaRef> {
+        let store = state.runtime_env().object_store(table_path)?;
+
+        let files: Vec<_> = table_path
+            .list_all_files(state, store.as_ref(), &self.file_extension)
+            .await?
+            // Empty files cannot affect schema but may throw when trying to read for it
+            .try_filter(|object_meta| future::ready(object_meta.size > 0))
+            .try_collect()
+            .await?;
+
+        let schema = self.format.infer_schema(state, &store, &files).await?;
+
+        Ok(schema)
+    }
+
+    /// Infers the partition columns stored in `LOCATION` and compares
+    /// them with the columns provided in `PARTITIONED BY` to help prevent
+    /// accidental corrupts of partitioned tables.
+    ///
+    /// Allows specifying partial partitions.
+    pub async fn validate_partitions(
+        &self,
+        state: &dyn Session,
+        table_path: &ListingTableUrl,
+    ) -> datafusion_common::Result<()> {
+        if self.table_partition_cols.is_empty() {
+            return Ok(());
+        }
+
+        if !table_path.is_collection() {
+            return plan_err!(
+                "Can't create a partitioned table backed by a single file, \
+                perhaps the URL is missing a trailing slash?"
+            );
+        }
+
+        let inferred = self.infer_partitions(state, table_path).await?;
+
+        // no partitioned files found on disk
+        if inferred.is_empty() {
+            return Ok(());
+        }
+
+        let table_partition_names = self
+            .table_partition_cols
+            .iter()
+            .map(|(col_name, _)| col_name.clone())
+            .collect_vec();
+
+        if inferred.len() < table_partition_names.len() {
+            return plan_err!(
+                "Inferred partitions to be {:?}, but got {:?}",
+                inferred,
+                table_partition_names
+            );
+        }
+
+        // match prefix to allow creating tables with partial partitions
+        for (idx, col) in table_partition_names.iter().enumerate() {
+            if &inferred[idx] != col {
+                return plan_err!(
+                    "Inferred partitions to be {:?}, but got {:?}",
+                    inferred,
+                    table_partition_names
+                );
+            }
+        }
+
+        Ok(())
+    }
+
+    /// Infer the partitioning at the given path on the provided object store.
+    /// For performance reasons, it doesn't read all the files on disk
+    /// and therefore may fail to detect invalid partitioning.
+    pub async fn infer_partitions(
+        &self,
+        state: &dyn Session,
+        table_path: &ListingTableUrl,
+    ) -> datafusion_common::Result<Vec<String>> {
+        let store = state.runtime_env().object_store(table_path)?;
+
+        // only use 10 files for inference
+        // This can fail to detect inconsistent partition keys
+        // A DFS traversal approach of the store can help here
+        let files: Vec<_> = table_path
+            .list_all_files(state, store.as_ref(), &self.file_extension)
+            .await?
+            .take(10)
+            .try_collect()
+            .await?;
+
+        let stripped_path_parts = files.iter().map(|file| {
+            table_path
+                .strip_prefix(&file.location)
+                .unwrap()
+                .collect_vec()
+        });
+
+        let partition_keys = stripped_path_parts
+            .map(|path_parts| {
+                path_parts
+                    .into_iter()
+                    .rev()
+                    .skip(1) // get parents only; skip the file itself
+                    .rev()
+                    // Partitions are expected to follow the format "column_name=value", so we
+                    // should ignore any path part that cannot be parsed into the expected format
+                    .filter(|s| s.contains('='))
+                    .map(|s| s.split('=').take(1).collect())
+                    .collect_vec()
+            })
+            .collect_vec();
+
+        match partition_keys.into_iter().all_equal_value() {
+            Ok(v) => Ok(v),
+            Err(None) => Ok(vec![]),
+            Err(Some(diff)) => {
+                let mut sorted_diff = [diff.0, diff.1];
+                sorted_diff.sort();
+                plan_err!("Found mixed partition values on disk {:?}", sorted_diff)
+            }
+        }
+    }
+}
diff --git a/datafusion/catalog-listing/src/table.rs b/datafusion/catalog-listing/src/table.rs
new file mode 100644
index 000000000000..e9ac1bf097a2
--- /dev/null
+++ b/datafusion/catalog-listing/src/table.rs
@@ -0,0 +1,788 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::config::SchemaSource;
+use crate::helpers::{expr_applicable_for_cols, pruned_partition_list};
+use crate::{ListingOptions, ListingTableConfig};
+use arrow::datatypes::{Field, Schema, SchemaBuilder, SchemaRef};
+use async_trait::async_trait;
+use datafusion_catalog::{ScanArgs, ScanResult, Session, TableProvider};
+use datafusion_common::stats::Precision;
+use datafusion_common::{
+    internal_datafusion_err, plan_err, project_schema, Constraints, DataFusionError,
+    SchemaExt, Statistics,
+};
+use datafusion_datasource::file::FileSource;
+use datafusion_datasource::file_groups::FileGroup;
+use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder};
+use datafusion_datasource::file_sink_config::FileSinkConfig;
+use datafusion_datasource::schema_adapter::{
+    DefaultSchemaAdapterFactory, SchemaAdapter, SchemaAdapterFactory,
+};
+use datafusion_datasource::{
+    compute_all_files_statistics, ListingTableUrl, PartitionedFile,
+};
+use datafusion_execution::cache::cache_manager::FileStatisticsCache;
+use datafusion_execution::cache::cache_unit::DefaultFileStatisticsCache;
+use datafusion_expr::dml::InsertOp;
+use datafusion_expr::execution_props::ExecutionProps;
+use datafusion_expr::{Expr, TableProviderFilterPushDown, TableType};
+use datafusion_physical_expr::create_lex_ordering;
+use datafusion_physical_expr_adapter::PhysicalExprAdapterFactory;
+use datafusion_physical_expr_common::sort_expr::LexOrdering;
+use datafusion_physical_plan::empty::EmptyExec;
+use datafusion_physical_plan::ExecutionPlan;
+use futures::{future, stream, Stream, StreamExt, TryStreamExt};
+use object_store::ObjectStore;
+use std::any::Any;
+use std::collections::HashMap;
+use std::sync::Arc;
+
+/// Built in [`TableProvider`] that reads data from one or more files as a single table.
+///
+/// The files are read using an  [`ObjectStore`] instance, for example from
+/// local files or objects from AWS S3.
+///
+/// # Features:
+/// * Reading multiple files as a single table
+/// * Hive style partitioning (e.g., directories named `date=2024-06-01`)
+/// * Merges schemas from files with compatible but not identical schemas (see [`ListingTableConfig::file_schema`])
+/// * `limit`, `filter` and `projection` pushdown for formats that support it (e.g.,
+///   Parquet)
+/// * Statistics collection and pruning based on file metadata
+/// * Pre-existing sort order (see [`ListingOptions::file_sort_order`])
+/// * Metadata caching to speed up repeated queries (see [`FileMetadataCache`])
+/// * Statistics caching (see [`FileStatisticsCache`])
+///
+/// [`FileMetadataCache`]: datafusion_execution::cache::cache_manager::FileMetadataCache
+///
+/// # Reading Directories and Hive Style Partitioning
+///
+/// For example, given the `table1` directory (or object store prefix)
+///
+/// ```text
+/// table1
+///  ├── file1.parquet
+///  └── file2.parquet
+/// ```
+///
+/// A `ListingTable` would read the files `file1.parquet` and `file2.parquet` as
+/// a single table, merging the schemas if the files have compatible but not
+/// identical schemas.
+///
+/// Given the `table2` directory (or object store prefix)
+///
+/// ```text
+/// table2
+///  ├── date=2024-06-01
+///  │    ├── file3.parquet
+///  │    └── file4.parquet
+///  └── date=2024-06-02
+///       └── file5.parquet
+/// ```
+///
+/// A `ListingTable` would read the files `file3.parquet`, `file4.parquet`, and
+/// `file5.parquet` as a single table, again merging schemas if necessary.
+///
+/// Given the hive style partitioning structure (e.g,. directories named
+/// `date=2024-06-01` and `date=2026-06-02`), `ListingTable` also adds a `date`
+/// column when reading the table:
+/// * The files in `table2/date=2024-06-01` will have the value `2024-06-01`
+/// * The files in `table2/date=2024-06-02` will have the value `2024-06-02`.
+///
+/// If the query has a predicate like `WHERE date = '2024-06-01'`
+/// only the corresponding directory will be read.
+///
+/// # See Also
+///
+/// 1. [`ListingTableConfig`]: Configuration options
+/// 1. [`DataSourceExec`]: `ExecutionPlan` used by `ListingTable`
+///
+/// [`DataSourceExec`]: datafusion_datasource::source::DataSourceExec
+///
+/// # Caching Metadata
+///
+/// Some formats, such as Parquet, use the `FileMetadataCache` to cache file
+/// metadata that is needed to execute but expensive to read, such as row
+/// groups and statistics. The cache is scoped to the `SessionContext` and can
+/// be configured via the [runtime config options].
+///
+/// [runtime config options]: https://datafusion.apache.org/user-guide/configs.html#runtime-configuration-settings
+///
+/// # Example: Read a directory of parquet files using a [`ListingTable`]
+///
+/// ```no_run
+/// # use datafusion_common::Result;
+/// # use std::sync::Arc;
+/// # use datafusion_catalog::TableProvider;
+/// # use datafusion_catalog_listing::{ListingOptions, ListingTable, ListingTableConfig};
+/// # use datafusion_datasource::ListingTableUrl;
+/// # use datafusion_datasource_parquet::file_format::ParquetFormat;/// #
+/// # use datafusion_catalog::Session;
+/// async fn get_listing_table(session: &dyn Session) -> Result<Arc<dyn TableProvider>> {
+/// let table_path = "/path/to/parquet";
+///
+/// // Parse the path
+/// let table_path = ListingTableUrl::parse(table_path)?;
+///
+/// // Create default parquet options
+/// let file_format = ParquetFormat::new();
+/// let listing_options = ListingOptions::new(Arc::new(file_format))
+///   .with_file_extension(".parquet");
+///
+/// // Resolve the schema
+/// let resolved_schema = listing_options
+///    .infer_schema(session, &table_path)
+///    .await?;
+///
+/// let config = ListingTableConfig::new(table_path)
+///   .with_listing_options(listing_options)
+///   .with_schema(resolved_schema);
+///
+/// // Create a new TableProvider
+/// let provider = Arc::new(ListingTable::try_new(config)?);
+///
+/// # Ok(provider)
+/// # }
+/// ```
+#[derive(Debug, Clone)]
+pub struct ListingTable {
+    table_paths: Vec<ListingTableUrl>,
+    /// `file_schema` contains only the columns physically stored in the data files themselves.
+    ///     - Represents the actual fields found in files like Parquet, CSV, etc.
+    ///     - Used when reading the raw data from files
+    file_schema: SchemaRef,
+    /// `table_schema` combines `file_schema` + partition columns
+    ///     - Partition columns are derived from directory paths (not stored in files)
+    ///     - These are columns like "year=2022/month=01" in paths like `/data/year=2022/month=01/file.parquet`
+    table_schema: SchemaRef,
+    /// Indicates how the schema was derived (inferred or explicitly specified)
+    schema_source: SchemaSource,
+    /// Options used to configure the listing table such as the file format
+    /// and partitioning information
+    options: ListingOptions,
+    /// The SQL definition for this table, if any
+    definition: Option<String>,
+    /// Cache for collected file statistics
+    collected_statistics: FileStatisticsCache,
+    /// Constraints applied to this table
+    constraints: Constraints,
+    /// Column default expressions for columns that are not physically present in the data files
+    column_defaults: HashMap<String, Expr>,
+    /// Optional [`SchemaAdapterFactory`] for creating schema adapters
+    schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
+    /// Optional [`PhysicalExprAdapterFactory`] for creating physical expression adapters
+    expr_adapter_factory: Option<Arc<dyn PhysicalExprAdapterFactory>>,
+}
+
+impl ListingTable {
+    /// Create new [`ListingTable`]
+    ///
+    /// See documentation and example on [`ListingTable`] and [`ListingTableConfig`]
+    pub fn try_new(config: ListingTableConfig) -> datafusion_common::Result<Self> {
+        // Extract schema_source before moving other parts of the config
+        let schema_source = config.schema_source();
+
+        let file_schema = config
+            .file_schema
+            .ok_or_else(|| internal_datafusion_err!("No schema provided."))?;
+
+        let options = config
+            .options
+            .ok_or_else(|| internal_datafusion_err!("No ListingOptions provided"))?;
+
+        // Add the partition columns to the file schema
+        let mut builder = SchemaBuilder::from(file_schema.as_ref().to_owned());
+        for (part_col_name, part_col_type) in &options.table_partition_cols {
+            builder.push(Field::new(part_col_name, part_col_type.clone(), false));
+        }
+
+        let table_schema = Arc::new(
+            builder
+                .finish()
+                .with_metadata(file_schema.metadata().clone()),
+        );
+
+        let table = Self {
+            table_paths: config.table_paths,
+            file_schema,
+            table_schema,
+            schema_source,
+            options,
+            definition: None,
+            collected_statistics: Arc::new(DefaultFileStatisticsCache::default()),
+            constraints: Constraints::default(),
+            column_defaults: HashMap::new(),
+            schema_adapter_factory: config.schema_adapter_factory,
+            expr_adapter_factory: config.expr_adapter_factory,
+        };
+
+        Ok(table)
+    }
+
+    /// Assign constraints
+    pub fn with_constraints(mut self, constraints: Constraints) -> Self {
+        self.constraints = constraints;
+        self
+    }
+
+    /// Assign column defaults
+    pub fn with_column_defaults(
+        mut self,
+        column_defaults: HashMap<String, Expr>,
+    ) -> Self {
+        self.column_defaults = column_defaults;
+        self
+    }
+
+    /// Set the [`FileStatisticsCache`] used to cache parquet file statistics.
+    ///
+    /// Setting a statistics cache on the `SessionContext` can avoid refetching statistics
+    /// multiple times in the same session.
+    ///
+    /// If `None`, creates a new [`DefaultFileStatisticsCache`] scoped to this query.
+    pub fn with_cache(mut self, cache: Option<FileStatisticsCache>) -> Self {
+        self.collected_statistics =
+            cache.unwrap_or_else(|| Arc::new(DefaultFileStatisticsCache::default()));
+        self
+    }
+
+    /// Specify the SQL definition for this table, if any
+    pub fn with_definition(mut self, definition: Option<String>) -> Self {
+        self.definition = definition;
+        self
+    }
+
+    /// Get paths ref
+    pub fn table_paths(&self) -> &Vec<ListingTableUrl> {
+        &self.table_paths
+    }
+
+    /// Get options ref
+    pub fn options(&self) -> &ListingOptions {
+        &self.options
+    }
+
+    /// Get the schema source
+    pub fn schema_source(&self) -> SchemaSource {
+        self.schema_source
+    }
+
+    /// Set the [`SchemaAdapterFactory`] for this [`ListingTable`]
+    ///
+    /// The schema adapter factory is used to create schema adapters that can
+    /// handle schema evolution and type conversions when reading files with
+    /// different schemas than the table schema.
+    ///
+    /// # Example: Adding Schema Evolution Support
+    /// ```rust
+    /// # use std::sync::Arc;
+    /// # use datafusion_catalog_listing::{ListingTable, ListingTableConfig, ListingOptions};
+    /// # use datafusion_datasource::ListingTableUrl;
+    /// # use datafusion_datasource::schema_adapter::{DefaultSchemaAdapterFactory, SchemaAdapter};
+    /// # use datafusion_datasource_parquet::file_format::ParquetFormat;
+    /// # use arrow::datatypes::{SchemaRef, Schema, Field, DataType};
+    /// # let table_path = ListingTableUrl::parse("file:///path/to/data").unwrap();
+    /// # let options = ListingOptions::new(Arc::new(ParquetFormat::default()));
+    /// # let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int64, false)]));
+    /// # let config = ListingTableConfig::new(table_path).with_listing_options(options).with_schema(schema);
+    /// # let table = ListingTable::try_new(config).unwrap();
+    /// let table_with_evolution = table
+    ///     .with_schema_adapter_factory(Arc::new(DefaultSchemaAdapterFactory));
+    /// ```
+    /// See [`ListingTableConfig::with_schema_adapter_factory`] for an example of custom SchemaAdapterFactory.
+    pub fn with_schema_adapter_factory(
+        self,
+        schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
+    ) -> Self {
+        Self {
+            schema_adapter_factory: Some(schema_adapter_factory),
+            ..self
+        }
+    }
+
+    /// Get the [`SchemaAdapterFactory`] for this table
+    pub fn schema_adapter_factory(&self) -> Option<&Arc<dyn SchemaAdapterFactory>> {
+        self.schema_adapter_factory.as_ref()
+    }
+
+    /// Creates a schema adapter for mapping between file and table schemas
+    ///
+    /// Uses the configured schema adapter factory if available, otherwise falls back
+    /// to the default implementation.
+    fn create_schema_adapter(&self) -> Box<dyn SchemaAdapter> {
+        let table_schema = self.schema();
+        match &self.schema_adapter_factory {
+            Some(factory) => {
+                factory.create_with_projected_schema(Arc::clone(&table_schema))
+            }
+            None => DefaultSchemaAdapterFactory::from_schema(Arc::clone(&table_schema)),
+        }
+    }
+
+    /// Creates a file source and applies schema adapter factory if available
+    fn create_file_source_with_schema_adapter(
+        &self,
+    ) -> datafusion_common::Result<Arc<dyn FileSource>> {
+        let mut source = self.options.format.file_source();
+        // Apply schema adapter to source if available
+        //
+        // The source will use this SchemaAdapter to adapt data batches as they flow up the plan.
+        // Note: ListingTable also creates a SchemaAdapter in `scan()` but that is only used to adapt collected statistics.
+        if let Some(factory) = &self.schema_adapter_factory {
+            source = source.with_schema_adapter_factory(Arc::clone(factory))?;
+        }
+        Ok(source)
+    }
+
+    /// If file_sort_order is specified, creates the appropriate physical expressions
+    pub fn try_create_output_ordering(
+        &self,
+        execution_props: &ExecutionProps,
+    ) -> datafusion_common::Result<Vec<LexOrdering>> {
+        create_lex_ordering(
+            &self.table_schema,
+            &self.options.file_sort_order,
+            execution_props,
+        )
+    }
+}
+
+// Expressions can be used for partition pruning if they can be evaluated using
+// only the partition columns and there are partition columns.
+fn can_be_evaluated_for_partition_pruning(
+    partition_column_names: &[&str],
+    expr: &Expr,
+) -> bool {
+    !partition_column_names.is_empty()
+        && expr_applicable_for_cols(partition_column_names, expr)
+}
+
+#[async_trait]
+impl TableProvider for ListingTable {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        Arc::clone(&self.table_schema)
+    }
+
+    fn constraints(&self) -> Option<&Constraints> {
+        Some(&self.constraints)
+    }
+
+    fn table_type(&self) -> TableType {
+        TableType::Base
+    }
+
+    async fn scan(
+        &self,
+        state: &dyn Session,
+        projection: Option<&Vec<usize>>,
+        filters: &[Expr],
+        limit: Option<usize>,
+    ) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {
+        let options = ScanArgs::default()
+            .with_projection(projection.map(|p| p.as_slice()))
+            .with_filters(Some(filters))
+            .with_limit(limit);
+        Ok(self.scan_with_args(state, options).await?.into_inner())
+    }
+
+    async fn scan_with_args<'a>(
+        &self,
+        state: &dyn Session,
+        args: ScanArgs<'a>,
+    ) -> datafusion_common::Result<ScanResult> {
+        let projection = args.projection().map(|p| p.to_vec());
+        let filters = args.filters().map(|f| f.to_vec()).unwrap_or_default();
+        let limit = args.limit();
+
+        // extract types of partition columns
+        let table_partition_cols = self
+            .options
+            .table_partition_cols
+            .iter()
+            .map(|col| Ok(self.table_schema.field_with_name(&col.0)?.clone()))
+            .collect::<datafusion_common::Result<Vec<_>>>()?;
+
+        let table_partition_col_names = table_partition_cols
+            .iter()
+            .map(|field| field.name().as_str())
+            .collect::<Vec<_>>();
+
+        // If the filters can be resolved using only partition cols, there is no need to
+        // pushdown it to TableScan, otherwise, `unhandled` pruning predicates will be generated
+        let (partition_filters, filters): (Vec<_>, Vec<_>) =
+            filters.iter().cloned().partition(|filter| {
+                can_be_evaluated_for_partition_pruning(&table_partition_col_names, filter)
+            });
+
+        // We should not limit the number of partitioned files to scan if there are filters and limit
+        // at the same time. This is because the limit should be applied after the filters are applied.
+        let statistic_file_limit = if filters.is_empty() { limit } else { None };
+
+        let (mut partitioned_file_lists, statistics) = self
+            .list_files_for_scan(state, &partition_filters, statistic_file_limit)
+            .await?;
+
+        // if no files need to be read, return an `EmptyExec`
+        if partitioned_file_lists.is_empty() {
+            let projected_schema = project_schema(&self.schema(), projection.as_ref())?;
+            return Ok(ScanResult::new(Arc::new(EmptyExec::new(projected_schema))));
+        }
+
+        let output_ordering = self.try_create_output_ordering(state.execution_props())?;
+        match state
+            .config_options()
+            .execution
+            .split_file_groups_by_statistics
+            .then(|| {
+                output_ordering.first().map(|output_ordering| {
+                    FileScanConfig::split_groups_by_statistics_with_target_partitions(
+                        &self.table_schema,
+                        &partitioned_file_lists,
+                        output_ordering,
+                        self.options.target_partitions,
+                    )
+                })
+            })
+            .flatten()
+        {
+            Some(Err(e)) => log::debug!("failed to split file groups by statistics: {e}"),
+            Some(Ok(new_groups)) => {
+                if new_groups.len() <= self.options.target_partitions {
+                    partitioned_file_lists = new_groups;
+                } else {
+                    log::debug!("attempted to split file groups by statistics, but there were more file groups than target_partitions; falling back to unordered")
+                }
+            }
+            None => {} // no ordering required
+        };
+
+        let Some(object_store_url) =
+            self.table_paths.first().map(ListingTableUrl::object_store)
+        else {
+            return Ok(ScanResult::new(Arc::new(EmptyExec::new(Arc::new(
+                Schema::empty(),
+            )))));
+        };
+
+        let file_source = self.create_file_source_with_schema_adapter()?;
+
+        // create the execution plan
+        let plan = self
+            .options
+            .format
+            .create_physical_plan(
+                state,
+                FileScanConfigBuilder::new(
+                    object_store_url,
+                    Arc::clone(&self.file_schema),
+                    file_source,
+                )
+                .with_file_groups(partitioned_file_lists)
+                .with_constraints(self.constraints.clone())
+                .with_statistics(statistics)
+                .with_projection(projection)
+                .with_limit(limit)
+                .with_output_ordering(output_ordering)
+                .with_table_partition_cols(table_partition_cols)
+                .with_expr_adapter(self.expr_adapter_factory.clone())
+                .build(),
+            )
+            .await?;
+
+        Ok(ScanResult::new(plan))
+    }
+
+    fn supports_filters_pushdown(
+        &self,
+        filters: &[&Expr],
+    ) -> datafusion_common::Result<Vec<TableProviderFilterPushDown>> {
+        let partition_column_names = self
+            .options
+            .table_partition_cols
+            .iter()
+            .map(|col| col.0.as_str())
+            .collect::<Vec<_>>();
+        filters
+            .iter()
+            .map(|filter| {
+                if can_be_evaluated_for_partition_pruning(&partition_column_names, filter)
+                {
+                    // if filter can be handled by partition pruning, it is exact
+                    return Ok(TableProviderFilterPushDown::Exact);
+                }
+
+                Ok(TableProviderFilterPushDown::Inexact)
+            })
+            .collect()
+    }
+
+    fn get_table_definition(&self) -> Option<&str> {
+        self.definition.as_deref()
+    }
+
+    async fn insert_into(
+        &self,
+        state: &dyn Session,
+        input: Arc<dyn ExecutionPlan>,
+        insert_op: InsertOp,
+    ) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {
+        // Check that the schema of the plan matches the schema of this table.
+        self.schema()
+            .logically_equivalent_names_and_types(&input.schema())?;
+
+        let table_path = &self.table_paths()[0];
+        if !table_path.is_collection() {
+            return plan_err!(
+                "Inserting into a ListingTable backed by a single file is not supported, URL is possibly missing a trailing `/`. \
+                To append to an existing file use StreamTable, e.g. by using CREATE UNBOUNDED EXTERNAL TABLE"
+            );
+        }
+
+        // Get the object store for the table path.
+        let store = state.runtime_env().object_store(table_path)?;
+
+        let file_list_stream = pruned_partition_list(
+            state,
+            store.as_ref(),
+            table_path,
+            &[],
+            &self.options.file_extension,
+            &self.options.table_partition_cols,
+        )
+        .await?;
+
+        let file_group = file_list_stream.try_collect::<Vec<_>>().await?.into();
+        let keep_partition_by_columns =
+            state.config_options().execution.keep_partition_by_columns;
+
+        // Sink related option, apart from format
+        let config = FileSinkConfig {
+            original_url: String::default(),
+            object_store_url: self.table_paths()[0].object_store(),
+            table_paths: self.table_paths().clone(),
+            file_group,
+            output_schema: self.schema(),
+            table_partition_cols: self.options.table_partition_cols.clone(),
+            insert_op,
+            keep_partition_by_columns,
+            file_extension: self.options().format.get_ext(),
+        };
+
+        let orderings = self.try_create_output_ordering(state.execution_props())?;
+        // It is sufficient to pass only one of the equivalent orderings:
+        let order_requirements = orderings.into_iter().next().map(Into::into);
+
+        self.options()
+            .format
+            .create_writer_physical_plan(input, state, config, order_requirements)
+            .await
+    }
+
+    fn get_column_default(&self, column: &str) -> Option<&Expr> {
+        self.column_defaults.get(column)
+    }
+}
+
+impl ListingTable {
+    /// Get the list of files for a scan as well as the file level statistics.
+    /// The list is grouped to let the execution plan know how the files should
+    /// be distributed to different threads / executors.
+    pub async fn list_files_for_scan<'a>(
+        &'a self,
+        ctx: &'a dyn Session,
+        filters: &'a [Expr],
+        limit: Option<usize>,
+    ) -> datafusion_common::Result<(Vec<FileGroup>, Statistics)> {
+        let store = if let Some(url) = self.table_paths.first() {
+            ctx.runtime_env().object_store(url)?
+        } else {
+            return Ok((vec![], Statistics::new_unknown(&self.file_schema)));
+        };
+        // list files (with partitions)
+        let file_list = future::try_join_all(self.table_paths.iter().map(|table_path| {
+            pruned_partition_list(
+                ctx,
+                store.as_ref(),
+                table_path,
+                filters,
+                &self.options.file_extension,
+                &self.options.table_partition_cols,
+            )
+        }))
+        .await?;
+        let meta_fetch_concurrency =
+            ctx.config_options().execution.meta_fetch_concurrency;
+        let file_list = stream::iter(file_list).flatten_unordered(meta_fetch_concurrency);
+        // collect the statistics if required by the config
+        let files = file_list
+            .map(|part_file| async {
+                let part_file = part_file?;
+                let statistics = if self.options.collect_stat {
+                    self.do_collect_statistics(ctx, &store, &part_file).await?
+                } else {
+                    Arc::new(Statistics::new_unknown(&self.file_schema))
+                };
+                Ok(part_file.with_statistics(statistics))
+            })
+            .boxed()
+            .buffer_unordered(ctx.config_options().execution.meta_fetch_concurrency);
+
+        let (file_group, inexact_stats) =
+            get_files_with_limit(files, limit, self.options.collect_stat).await?;
+
+        let file_groups = file_group.split_files(self.options.target_partitions);
+        let (mut file_groups, mut stats) = compute_all_files_statistics(
+            file_groups,
+            self.schema(),
+            self.options.collect_stat,
+            inexact_stats,
+        )?;
+
+        let schema_adapter = self.create_schema_adapter();
+        let (schema_mapper, _) = schema_adapter.map_schema(self.file_schema.as_ref())?;
+
+        stats.column_statistics =
+            schema_mapper.map_column_statistics(&stats.column_statistics)?;
+        file_groups.iter_mut().try_for_each(|file_group| {
+            if let Some(stat) = file_group.statistics_mut() {
+                stat.column_statistics =
+                    schema_mapper.map_column_statistics(&stat.column_statistics)?;
+            }
+            Ok::<_, DataFusionError>(())
+        })?;
+        Ok((file_groups, stats))
+    }
+
+    /// Collects statistics for a given partitioned file.
+    ///
+    /// This method first checks if the statistics for the given file are already cached.
+    /// If they are, it returns the cached statistics.
+    /// If they are not, it infers the statistics from the file and stores them in the cache.
+    async fn do_collect_statistics(
+        &self,
+        ctx: &dyn Session,
+        store: &Arc<dyn ObjectStore>,
+        part_file: &PartitionedFile,
+    ) -> datafusion_common::Result<Arc<Statistics>> {
+        match self
+            .collected_statistics
+            .get_with_extra(&part_file.object_meta.location, &part_file.object_meta)
+        {
+            Some(statistics) => Ok(statistics),
+            None => {
+                let statistics = self
+                    .options
+                    .format
+                    .infer_stats(
+                        ctx,
+                        store,
+                        Arc::clone(&self.file_schema),
+                        &part_file.object_meta,
+                    )
+                    .await?;
+                let statistics = Arc::new(statistics);
+                self.collected_statistics.put_with_extra(
+                    &part_file.object_meta.location,
+                    Arc::clone(&statistics),
+                    &part_file.object_meta,
+                );
+                Ok(statistics)
+            }
+        }
+    }
+}
+
+/// Processes a stream of partitioned files and returns a `FileGroup` containing the files.
+///
+/// This function collects files from the provided stream until either:
+/// 1. The stream is exhausted
+/// 2. The accumulated number of rows exceeds the provided `limit` (if specified)
+///
+/// # Arguments
+/// * `files` - A stream of `Result<PartitionedFile>` items to process
+/// * `limit` - An optional row count limit. If provided, the function will stop collecting files
+///   once the accumulated number of rows exceeds this limit
+/// * `collect_stats` - Whether to collect and accumulate statistics from the files
+///
+/// # Returns
+/// A `Result` containing a `FileGroup` with the collected files
+/// and a boolean indicating whether the statistics are inexact.
+///
+/// # Note
+/// The function will continue processing files if statistics are not available or if the
+/// limit is not provided. If `collect_stats` is false, statistics won't be accumulated
+/// but files will still be collected.
+async fn get_files_with_limit(
+    files: impl Stream<Item = datafusion_common::Result<PartitionedFile>>,
+    limit: Option<usize>,
+    collect_stats: bool,
+) -> datafusion_common::Result<(FileGroup, bool)> {
+    let mut file_group = FileGroup::default();
+    // Fusing the stream allows us to call next safely even once it is finished.
+    let mut all_files = Box::pin(files.fuse());
+    enum ProcessingState {
+        ReadingFiles,
+        ReachedLimit,
+    }
+
+    let mut state = ProcessingState::ReadingFiles;
+    let mut num_rows = Precision::Absent;
+
+    while let Some(file_result) = all_files.next().await {
+        // Early exit if we've already reached our limit
+        if matches!(state, ProcessingState::ReachedLimit) {
+            break;
+        }
+
+        let file = file_result?;
+
+        // Update file statistics regardless of state
+        if collect_stats {
+            if let Some(file_stats) = &file.statistics {
+                num_rows = if file_group.is_empty() {
+                    // For the first file, just take its row count
+                    file_stats.num_rows
+                } else {
+                    // For subsequent files, accumulate the counts
+                    num_rows.add(&file_stats.num_rows)
+                };
+            }
+        }
+
+        // Always add the file to our group
+        file_group.push(file);
+
+        // Check if we've hit the limit (if one was specified)
+        if let Some(limit) = limit {
+            if let Precision::Exact(row_count) = num_rows {
+                if row_count > limit {
+                    state = ProcessingState::ReachedLimit;
+                }
+            }
+        }
+    }
+    // If we still have files in the stream, it means that the limit kicked
+    // in, and the statistic could have been different had we processed the
+    // files in a different order.
+    let inexact_stats = all_files.next().await.is_some();
+    Ok((file_group, inexact_stats))
+}
diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml
index f5e51cb236d4..abeb4e66a269 100644
--- a/datafusion/common/Cargo.toml
+++ b/datafusion/common/Cargo.toml
@@ -71,7 +71,7 @@ log = { workspace = true }
 object_store = { workspace = true, optional = true }
 parquet = { workspace = true, optional = true, default-features = true }
 paste = "1.0.15"
-pyo3 = { version = "0.25", optional = true }
+pyo3 = { version = "0.26", optional = true }
 recursive = { workspace = true, optional = true }
 sqlparser = { workspace = true, optional = true }
 tokio = { workspace = true }
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 39d730eaafb4..1713377f8d4d 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -22,18 +22,19 @@ use arrow_ipc::CompressionType;
 #[cfg(feature = "parquet_encryption")]
 use crate::encryption::{FileDecryptionProperties, FileEncryptionProperties};
 use crate::error::_config_err;
-use crate::format::ExplainFormat;
+use crate::format::{ExplainAnalyzeLevel, ExplainFormat};
 use crate::parsers::CompressionTypeVariant;
 use crate::utils::get_available_parallelism;
 use crate::{DataFusionError, Result};
+#[cfg(feature = "parquet_encryption")]
+use hex;
 use std::any::Any;
 use std::collections::{BTreeMap, HashMap};
 use std::error::Error;
 use std::fmt::{self, Display};
 use std::str::FromStr;
-
 #[cfg(feature = "parquet_encryption")]
-use hex;
+use std::sync::Arc;
 
 /// A macro that wraps a configuration struct and automatically derives
 /// [`Default`] and [`ConfigField`] for it, allowing it to be used
@@ -258,7 +259,7 @@ config_namespace! {
 
         /// Configure the SQL dialect used by DataFusion's parser; supported values include: Generic,
         /// MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks.
-        pub dialect: String, default = "generic".to_string()
+        pub dialect: Dialect, default = Dialect::Generic
         // no need to lowercase because `sqlparser::dialect_from_str`] is case-insensitive
 
         /// If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but
@@ -292,6 +293,94 @@ config_namespace! {
     }
 }
 
+/// This is the SQL dialect used by DataFusion's parser.
+/// This mirrors [sqlparser::dialect::Dialect](https://docs.rs/sqlparser/latest/sqlparser/dialect/trait.Dialect.html)
+/// trait in order to offer an easier API and avoid adding the `sqlparser` dependency
+#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
+pub enum Dialect {
+    #[default]
+    Generic,
+    MySQL,
+    PostgreSQL,
+    Hive,
+    SQLite,
+    Snowflake,
+    Redshift,
+    MsSQL,
+    ClickHouse,
+    BigQuery,
+    Ansi,
+    DuckDB,
+    Databricks,
+}
+
+impl AsRef<str> for Dialect {
+    fn as_ref(&self) -> &str {
+        match self {
+            Self::Generic => "generic",
+            Self::MySQL => "mysql",
+            Self::PostgreSQL => "postgresql",
+            Self::Hive => "hive",
+            Self::SQLite => "sqlite",
+            Self::Snowflake => "snowflake",
+            Self::Redshift => "redshift",
+            Self::MsSQL => "mssql",
+            Self::ClickHouse => "clickhouse",
+            Self::BigQuery => "bigquery",
+            Self::Ansi => "ansi",
+            Self::DuckDB => "duckdb",
+            Self::Databricks => "databricks",
+        }
+    }
+}
+
+impl FromStr for Dialect {
+    type Err = DataFusionError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        let value = match s.to_ascii_lowercase().as_str() {
+            "generic" => Self::Generic,
+            "mysql" => Self::MySQL,
+            "postgresql" | "postgres" => Self::PostgreSQL,
+            "hive" => Self::Hive,
+            "sqlite" => Self::SQLite,
+            "snowflake" => Self::Snowflake,
+            "redshift" => Self::Redshift,
+            "mssql" => Self::MsSQL,
+            "clickhouse" => Self::ClickHouse,
+            "bigquery" => Self::BigQuery,
+            "ansi" => Self::Ansi,
+            "duckdb" => Self::DuckDB,
+            "databricks" => Self::Databricks,
+            other => {
+                let error_message = format!(
+                    "Invalid Dialect: {other}. Expected one of: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB, Databricks"
+                );
+                return Err(DataFusionError::Configuration(error_message));
+            }
+        };
+        Ok(value)
+    }
+}
+
+impl ConfigField for Dialect {
+    fn visit<V: Visit>(&self, v: &mut V, key: &str, description: &'static str) {
+        v.some(key, self, description)
+    }
+
+    fn set(&mut self, _: &str, value: &str) -> Result<()> {
+        *self = Self::from_str(value)?;
+        Ok(())
+    }
+}
+
+impl Display for Dialect {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let str = self.as_ref();
+        write!(f, "{str}")
+    }
+}
+
 #[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
 pub enum SpillCompression {
     Zstd,
@@ -745,11 +834,21 @@ config_namespace! {
         /// past window functions, if possible
         pub enable_window_limits: bool, default = true
 
-        /// When set to true attempts to push down dynamic filters generated by operators into the file scan phase.
+        /// When set to true, the optimizer will attempt to push down TopK dynamic filters
+        /// into the file scan phase.
+        pub enable_topk_dynamic_filter_pushdown: bool, default = true
+
+        /// When set to true, the optimizer will attempt to push down Join dynamic filters
+        /// into the file scan phase.
+        pub enable_join_dynamic_filter_pushdown: bool, default = true
+
+        /// When set to true attempts to push down dynamic filters generated by operators (topk & join) into the file scan phase.
         /// For example, for a query such as `SELECT * FROM t ORDER BY timestamp DESC LIMIT 10`, the optimizer
         /// will attempt to push down the current top 10 timestamps that the TopK operator references into the file scans.
         /// This means that if we already have 10 timestamps in the year 2025
         /// any files that only have timestamps in the year 2024 can be skipped / pruned at various stages in the scan.
+        /// The config will suppress `enable_join_dynamic_filter_pushdown` & `enable_topk_dynamic_filter_pushdown`
+        /// So if you disable `enable_topk_dynamic_filter_pushdown`, then enable `enable_dynamic_filter_pushdown`, the `enable_topk_dynamic_filter_pushdown` will be overridden.
         pub enable_dynamic_filter_pushdown: bool, default = true
 
         /// When set to true, the optimizer will insert filters before a join between
@@ -840,6 +939,11 @@ config_namespace! {
         /// HashJoin can work more efficiently than SortMergeJoin but consumes more memory
         pub prefer_hash_join: bool, default = true
 
+        /// When set to true, piecewise merge join is enabled. PiecewiseMergeJoin is currently
+        /// experimental. Physical planner will opt for PiecewiseMergeJoin when there is only
+        /// one range filter.
+        pub enable_piecewise_merge_join: bool, default = false
+
         /// The maximum estimated size in bytes for one input side of a HashJoin
         /// will be collected into a single partition
         pub hash_join_single_partition_threshold: usize, default = 1024 * 1024
@@ -893,6 +997,11 @@ config_namespace! {
         /// (format=tree only) Maximum total width of the rendered tree.
         /// When set to 0, the tree will have no width limit.
         pub tree_maximum_render_width: usize, default = 240
+
+        /// Verbosity level for "EXPLAIN ANALYZE". Default is "dev"
+        /// "summary" shows common metrics for high-level insights.
+        /// "dev" provides deep operator-level introspection for developers.
+        pub analyze_level: ExplainAnalyzeLevel, default = ExplainAnalyzeLevel::Dev
     }
 }
 
@@ -1039,6 +1148,20 @@ impl ConfigOptions {
         };
 
         if prefix == "datafusion" {
+            if key == "optimizer.enable_dynamic_filter_pushdown" {
+                let bool_value = value.parse::<bool>().map_err(|e| {
+                    DataFusionError::Configuration(format!(
+                        "Failed to parse '{value}' as bool: {e}",
+                    ))
+                })?;
+
+                {
+                    self.optimizer.enable_dynamic_filter_pushdown = bool_value;
+                    self.optimizer.enable_topk_dynamic_filter_pushdown = bool_value;
+                    self.optimizer.enable_join_dynamic_filter_pushdown = bool_value;
+                }
+                return Ok(());
+            }
             return ConfigField::set(self, key, value);
         }
 
@@ -2287,13 +2410,13 @@ impl From<ConfigFileEncryptionProperties> for FileEncryptionProperties {
                 hex::decode(&val.aad_prefix_as_hex).expect("Invalid AAD prefix");
             fep = fep.with_aad_prefix(aad_prefix);
         }
-        fep.build().unwrap()
+        Arc::unwrap_or_clone(fep.build().unwrap())
     }
 }
 
 #[cfg(feature = "parquet_encryption")]
-impl From<&FileEncryptionProperties> for ConfigFileEncryptionProperties {
-    fn from(f: &FileEncryptionProperties) -> Self {
+impl From<&Arc<FileEncryptionProperties>> for ConfigFileEncryptionProperties {
+    fn from(f: &Arc<FileEncryptionProperties>) -> Self {
         let (column_names_vec, column_keys_vec, column_metas_vec) = f.column_keys();
 
         let mut column_encryption_properties: HashMap<
@@ -2435,13 +2558,13 @@ impl From<ConfigFileDecryptionProperties> for FileDecryptionProperties {
             fep = fep.with_aad_prefix(aad_prefix);
         }
 
-        fep.build().unwrap()
+        Arc::unwrap_or_clone(fep.build().unwrap())
     }
 }
 
 #[cfg(feature = "parquet_encryption")]
-impl From<&FileDecryptionProperties> for ConfigFileDecryptionProperties {
-    fn from(f: &FileDecryptionProperties) -> Self {
+impl From<&Arc<FileDecryptionProperties>> for ConfigFileDecryptionProperties {
+    fn from(f: &Arc<FileDecryptionProperties>) -> Self {
         let (column_names_vec, column_keys_vec) = f.column_keys();
         let mut column_decryption_properties: HashMap<
             String,
@@ -2712,6 +2835,7 @@ mod tests {
     };
     use std::any::Any;
     use std::collections::HashMap;
+    use std::sync::Arc;
 
     #[derive(Default, Debug, Clone)]
     pub struct TestExtensionConfig {
@@ -2868,16 +2992,15 @@ mod tests {
             .unwrap();
 
         // Test round-trip
-        let config_encrypt: ConfigFileEncryptionProperties =
-            (&file_encryption_properties).into();
-        let encryption_properties_built: FileEncryptionProperties =
-            config_encrypt.clone().into();
+        let config_encrypt =
+            ConfigFileEncryptionProperties::from(&file_encryption_properties);
+        let encryption_properties_built =
+            Arc::new(FileEncryptionProperties::from(config_encrypt.clone()));
         assert_eq!(file_encryption_properties, encryption_properties_built);
 
-        let config_decrypt: ConfigFileDecryptionProperties =
-            (&decryption_properties).into();
-        let decryption_properties_built: FileDecryptionProperties =
-            config_decrypt.clone().into();
+        let config_decrypt = ConfigFileDecryptionProperties::from(&decryption_properties);
+        let decryption_properties_built =
+            Arc::new(FileDecryptionProperties::from(config_decrypt.clone()));
         assert_eq!(decryption_properties, decryption_properties_built);
 
         ///////////////////////////////////////////////////////////////////////////////////
diff --git a/datafusion/common/src/datatype.rs b/datafusion/common/src/datatype.rs
new file mode 100644
index 000000000000..544ec0c2468c
--- /dev/null
+++ b/datafusion/common/src/datatype.rs
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`DataTypeExt`] and [`FieldExt`] extension trait for working with DataTypes to Fields
+
+use crate::arrow::datatypes::{DataType, Field, FieldRef};
+use std::sync::Arc;
+
+/// DataFusion extension methods for Arrow [`DataType`]
+pub trait DataTypeExt {
+    /// Convert the type to field with nullable type and "" name
+    ///
+    /// This is used to track the places where we convert a [`DataType`]
+    /// into a nameless field to interact with an API that is
+    /// capable of representing an extension type and/or nullability.
+    ///
+    /// For example, it will convert a `DataType::Int32` into
+    /// `Field::new("", DataType::Int32, true)`.
+    ///
+    /// ```
+    /// # use datafusion_common::datatype::DataTypeExt;
+    /// # use arrow::datatypes::DataType;
+    /// let dt = DataType::Utf8;
+    /// let field = dt.into_nullable_field();
+    /// // result is a nullable Utf8 field with "" name
+    /// assert_eq!(field.name(), "");
+    /// assert_eq!(field.data_type(), &DataType::Utf8);
+    /// assert!(field.is_nullable());
+    /// ```
+    fn into_nullable_field(self) -> Field;
+
+    /// Convert the type to [`FieldRef`] with nullable type and "" name
+    ///
+    /// Concise wrapper around [`DataTypeExt::into_nullable_field`] that
+    /// constructs a [`FieldRef`].
+    fn into_nullable_field_ref(self) -> FieldRef;
+}
+
+impl DataTypeExt for DataType {
+    fn into_nullable_field(self) -> Field {
+        Field::new("", self, true)
+    }
+
+    fn into_nullable_field_ref(self) -> FieldRef {
+        Arc::new(Field::new("", self, true))
+    }
+}
+
+/// DataFusion extension methods for Arrow [`Field`] and [`FieldRef`]
+pub trait FieldExt {
+    /// Returns a new Field representing a List of this Field's DataType.
+    ///
+    /// For example if input represents an `Int32`, the return value will
+    /// represent a `List<Int32>`.
+    ///
+    /// Example:
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use arrow::datatypes::{DataType, Field};
+    /// # use datafusion_common::datatype::FieldExt;
+    /// // Int32 field
+    /// let int_field = Field::new("my_int", DataType::Int32, true);
+    /// // convert to a List field
+    /// let list_field = int_field.into_list();
+    /// // List<Int32>
+    /// // Note that the item field name has been renamed to "item"
+    /// assert_eq!(list_field.data_type(), &DataType::List(Arc::new(
+    ///     Field::new("item", DataType::Int32, true)
+    /// )));
+    ///
+    fn into_list(self) -> Self;
+
+    /// Return a new Field representing this Field as the item type of a
+    /// [`DataType::FixedSizeList`]
+    ///
+    /// For example if input represents an `Int32`, the return value will
+    /// represent a `FixedSizeList<Int32, size>`.
+    ///
+    /// Example:
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use arrow::datatypes::{DataType, Field};
+    /// # use datafusion_common::datatype::FieldExt;
+    /// // Int32 field
+    /// let int_field = Field::new("my_int", DataType::Int32, true);
+    /// // convert to a FixedSizeList field of size 3
+    /// let fixed_size_list_field = int_field.into_fixed_size_list(3);
+    /// // FixedSizeList<Int32, 3>
+    /// // Note that the item field name has been renamed to "item"
+    /// assert_eq!(
+    ///   fixed_size_list_field.data_type(),
+    ///   &DataType::FixedSizeList(Arc::new(
+    ///    Field::new("item", DataType::Int32, true)),
+    ///    3
+    /// ));
+    ///
+    fn into_fixed_size_list(self, list_size: i32) -> Self;
+
+    /// Update the field to have the default list field name ("item")
+    ///
+    /// Lists are allowed to have an arbitrarily named field; however, a name
+    /// other than 'item' will cause it to fail an == check against a more
+    /// idiomatically created list in arrow-rs which causes issues.
+    ///
+    /// For example, if input represents an `Int32` field named "my_int",
+    /// the return value will represent an `Int32` field named "item".
+    ///
+    /// Example:
+    /// ```
+    /// # use arrow::datatypes::Field;
+    /// # use datafusion_common::datatype::FieldExt;
+    /// let my_field = Field::new("my_int", arrow::datatypes::DataType::Int32, true);
+    /// let item_field = my_field.into_list_item();
+    /// assert_eq!(item_field.name(), Field::LIST_FIELD_DEFAULT_NAME);
+    /// assert_eq!(item_field.name(), "item");
+    /// ```
+    fn into_list_item(self) -> Self;
+}
+
+impl FieldExt for Field {
+    fn into_list(self) -> Self {
+        DataType::List(Arc::new(self.into_list_item())).into_nullable_field()
+    }
+
+    fn into_fixed_size_list(self, list_size: i32) -> Self {
+        DataType::FixedSizeList(self.into_list_item().into(), list_size)
+            .into_nullable_field()
+    }
+
+    fn into_list_item(self) -> Self {
+        if self.name() != Field::LIST_FIELD_DEFAULT_NAME {
+            self.with_name(Field::LIST_FIELD_DEFAULT_NAME)
+        } else {
+            self
+        }
+    }
+}
+
+impl FieldExt for Arc<Field> {
+    fn into_list(self) -> Self {
+        DataType::List(self.into_list_item())
+            .into_nullable_field()
+            .into()
+    }
+
+    fn into_fixed_size_list(self, list_size: i32) -> Self {
+        DataType::FixedSizeList(self.into_list_item(), list_size)
+            .into_nullable_field()
+            .into()
+    }
+
+    fn into_list_item(self) -> Self {
+        if self.name() != Field::LIST_FIELD_DEFAULT_NAME {
+            Arc::unwrap_or_clone(self)
+                .with_name(Field::LIST_FIELD_DEFAULT_NAME)
+                .into()
+        } else {
+            self
+        }
+    }
+}
diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs
index 6866b4011f9e..34a36f543657 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -1417,7 +1417,7 @@ mod tests {
     fn from_qualified_schema_into_arrow_schema() -> Result<()> {
         let schema = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
         let arrow_schema = schema.as_arrow();
-        insta::assert_snapshot!(arrow_schema, @r#"Field { name: "c0", data_type: Boolean, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "c1", data_type: Boolean, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }"#);
+        insta::assert_snapshot!(arrow_schema.to_string(), @r#"Field { "c0": nullable Boolean }, Field { "c1": nullable Boolean }"#);
         Ok(())
     }
 
diff --git a/datafusion/common/src/encryption.rs b/datafusion/common/src/encryption.rs
index b764ad77cff1..2a8cfdbc8996 100644
--- a/datafusion/common/src/encryption.rs
+++ b/datafusion/common/src/encryption.rs
@@ -24,38 +24,10 @@ pub use parquet::encryption::decrypt::FileDecryptionProperties;
 pub use parquet::encryption::encrypt::FileEncryptionProperties;
 
 #[cfg(not(feature = "parquet_encryption"))]
-#[derive(Default, Debug)]
+#[derive(Default, Clone, Debug)]
 pub struct FileDecryptionProperties;
 #[cfg(not(feature = "parquet_encryption"))]
-#[derive(Default, Debug)]
+#[derive(Default, Clone, Debug)]
 pub struct FileEncryptionProperties;
 
 pub use crate::config::{ConfigFileDecryptionProperties, ConfigFileEncryptionProperties};
-
-#[cfg(feature = "parquet_encryption")]
-pub fn map_encryption_to_config_encryption(
-    encryption: Option<&FileEncryptionProperties>,
-) -> Option<ConfigFileEncryptionProperties> {
-    encryption.map(|fe| fe.into())
-}
-
-#[cfg(not(feature = "parquet_encryption"))]
-pub fn map_encryption_to_config_encryption(
-    _encryption: Option<&FileEncryptionProperties>,
-) -> Option<ConfigFileEncryptionProperties> {
-    None
-}
-
-#[cfg(feature = "parquet_encryption")]
-pub fn map_config_decryption_to_decryption(
-    decryption: &ConfigFileDecryptionProperties,
-) -> FileDecryptionProperties {
-    decryption.clone().into()
-}
-
-#[cfg(not(feature = "parquet_encryption"))]
-pub fn map_config_decryption_to_decryption(
-    _decryption: &ConfigFileDecryptionProperties,
-) -> FileDecryptionProperties {
-    FileDecryptionProperties {}
-}
diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer.rs
index 3977f2b489e1..564929c61bab 100644
--- a/datafusion/common/src/file_options/parquet_writer.rs
+++ b/datafusion/common/src/file_options/parquet_writer.rs
@@ -402,15 +402,14 @@ pub(crate) fn parse_statistics_string(str_setting: &str) -> Result<EnabledStatis
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::config::{ParquetColumnOptions, ParquetEncryptionOptions, ParquetOptions};
-    #[cfg(feature = "parquet_encryption")]
-    use crate::encryption::map_encryption_to_config_encryption;
-    use parquet::{
-        basic::Compression,
-        file::properties::{
-            BloomFilterProperties, EnabledStatistics, DEFAULT_BLOOM_FILTER_FPP,
-            DEFAULT_BLOOM_FILTER_NDV,
-        },
+    use crate::config::{
+        ConfigFileEncryptionProperties, ParquetColumnOptions, ParquetEncryptionOptions,
+        ParquetOptions,
+    };
+    use parquet::basic::Compression;
+    use parquet::file::properties::{
+        BloomFilterProperties, EnabledStatistics, DEFAULT_BLOOM_FILTER_FPP,
+        DEFAULT_BLOOM_FILTER_NDV,
     };
     use std::collections::HashMap;
 
@@ -539,7 +538,10 @@ mod tests {
         };
 
         #[cfg(feature = "parquet_encryption")]
-        let fep = map_encryption_to_config_encryption(props.file_encryption_properties());
+        let fep = props
+            .file_encryption_properties()
+            .map(ConfigFileEncryptionProperties::from);
+
         #[cfg(not(feature = "parquet_encryption"))]
         let fep = None;
 
diff --git a/datafusion/common/src/format.rs b/datafusion/common/src/format.rs
index 06ec519ef356..764190e1189b 100644
--- a/datafusion/common/src/format.rs
+++ b/datafusion/common/src/format.rs
@@ -205,3 +205,48 @@ impl ConfigField for ExplainFormat {
         Ok(())
     }
 }
+
+/// Verbosity levels controlling how `EXPLAIN ANALYZE` renders metrics
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum ExplainAnalyzeLevel {
+    /// Show a compact view containing high-level metrics
+    Summary,
+    /// Show a developer-focused view with per-operator details
+    Dev,
+    // When adding new enum, update the error message in `from_str()` accordingly.
+}
+
+impl FromStr for ExplainAnalyzeLevel {
+    type Err = DataFusionError;
+
+    fn from_str(level: &str) -> Result<Self, Self::Err> {
+        match level.to_lowercase().as_str() {
+            "summary" => Ok(ExplainAnalyzeLevel::Summary),
+            "dev" => Ok(ExplainAnalyzeLevel::Dev),
+            other => Err(DataFusionError::Configuration(format!(
+                "Invalid explain analyze level. Expected 'summary' or 'dev'. Got '{other}'"
+            ))),
+        }
+    }
+}
+
+impl Display for ExplainAnalyzeLevel {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let s = match self {
+            ExplainAnalyzeLevel::Summary => "summary",
+            ExplainAnalyzeLevel::Dev => "dev",
+        };
+        write!(f, "{s}")
+    }
+}
+
+impl ConfigField for ExplainAnalyzeLevel {
+    fn visit<V: Visit>(&self, v: &mut V, key: &str, description: &'static str) {
+        v.some(key, self, description)
+    }
+
+    fn set(&mut self, _: &str, value: &str) -> Result<()> {
+        *self = ExplainAnalyzeLevel::from_str(value)?;
+        Ok(())
+    }
+}
diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs
index 24ec9b7be323..76c7b46e3273 100644
--- a/datafusion/common/src/lib.rs
+++ b/datafusion/common/src/lib.rs
@@ -39,6 +39,7 @@ pub mod alias;
 pub mod cast;
 pub mod config;
 pub mod cse;
+pub mod datatype;
 pub mod diagnostic;
 pub mod display;
 pub mod encryption;
@@ -47,6 +48,7 @@ pub mod file_options;
 pub mod format;
 pub mod hash_utils;
 pub mod instant;
+pub mod metadata;
 pub mod nested_struct;
 mod null_equality;
 pub mod parsers;
@@ -108,6 +110,12 @@ pub use error::{
 // The HashMap and HashSet implementations that should be used as the uniform defaults
 pub type HashMap<K, V, S = DefaultHashBuilder> = hashbrown::HashMap<K, V, S>;
 pub type HashSet<T, S = DefaultHashBuilder> = hashbrown::HashSet<T, S>;
+pub mod hash_map {
+    pub use hashbrown::hash_map::Entry;
+}
+pub mod hash_set {
+    pub use hashbrown::hash_set::Entry;
+}
 
 /// Downcast an Arrow Array to a concrete type, return an `DataFusionError::Internal` if the cast is
 /// not possible. In normal usage of DataFusion the downcast should always succeed.
diff --git a/datafusion/common/src/metadata.rs b/datafusion/common/src/metadata.rs
new file mode 100644
index 000000000000..39065808efb9
--- /dev/null
+++ b/datafusion/common/src/metadata.rs
@@ -0,0 +1,371 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{collections::BTreeMap, sync::Arc};
+
+use arrow::datatypes::{DataType, Field};
+use hashbrown::HashMap;
+
+use crate::{error::_plan_err, DataFusionError, ScalarValue};
+
+/// A [`ScalarValue`] with optional [`FieldMetadata`]
+#[derive(Debug, Clone)]
+pub struct ScalarAndMetadata {
+    pub value: ScalarValue,
+    pub metadata: Option<FieldMetadata>,
+}
+
+impl ScalarAndMetadata {
+    /// Create a new Literal from a scalar value with optional [`FieldMetadata`]
+    pub fn new(value: ScalarValue, metadata: Option<FieldMetadata>) -> Self {
+        Self { value, metadata }
+    }
+
+    /// Access the underlying [ScalarValue] storage
+    pub fn value(&self) -> &ScalarValue {
+        &self.value
+    }
+
+    /// Access the [FieldMetadata] attached to this value, if any
+    pub fn metadata(&self) -> Option<&FieldMetadata> {
+        self.metadata.as_ref()
+    }
+
+    /// Consume self and return components
+    pub fn into_inner(self) -> (ScalarValue, Option<FieldMetadata>) {
+        (self.value, self.metadata)
+    }
+
+    /// Cast this values's storage type
+    ///
+    /// This operation assumes that if the underlying [ScalarValue] can be casted
+    /// to a given type that any extension type represented by the metadata is also
+    /// valid.
+    pub fn cast_storage_to(
+        &self,
+        target_type: &DataType,
+    ) -> Result<Self, DataFusionError> {
+        let new_value = self.value().cast_to(target_type)?;
+        Ok(Self::new(new_value, self.metadata.clone()))
+    }
+}
+
+/// create a new ScalarAndMetadata from a ScalarValue without
+/// any metadata
+impl From<ScalarValue> for ScalarAndMetadata {
+    fn from(value: ScalarValue) -> Self {
+        Self::new(value, None)
+    }
+}
+
+/// Assert equality of data types where one or both sides may have field metadata
+///
+/// This currently compares absent metadata (e.g., one side was a DataType) and
+/// empty metadata (e.g., one side was a field where the field had no metadata)
+/// as equal and uses byte-for-byte comparison for the keys and values of the
+/// fields, even though this is potentially too strict for some cases (e.g.,
+/// extension types where extension metadata is represented by JSON, or cases
+/// where field metadata is orthogonal to the interpretation of the data type).
+///
+/// Returns a planning error with suitably formatted type representations if
+/// actual and expected do not compare to equal.
+pub fn check_metadata_with_storage_equal(
+    actual: (
+        &DataType,
+        Option<&std::collections::HashMap<String, String>>,
+    ),
+    expected: (
+        &DataType,
+        Option<&std::collections::HashMap<String, String>>,
+    ),
+    what: &str,
+    context: &str,
+) -> Result<(), DataFusionError> {
+    if actual.0 != expected.0 {
+        return _plan_err!(
+            "Expected {what} of type {}, got {}{context}",
+            format_type_and_metadata(expected.0, expected.1),
+            format_type_and_metadata(actual.0, actual.1)
+        );
+    }
+
+    let metadata_equal = match (actual.1, expected.1) {
+        (None, None) => true,
+        (None, Some(expected_metadata)) => expected_metadata.is_empty(),
+        (Some(actual_metadata), None) => actual_metadata.is_empty(),
+        (Some(actual_metadata), Some(expected_metadata)) => {
+            actual_metadata == expected_metadata
+        }
+    };
+
+    if !metadata_equal {
+        return _plan_err!(
+            "Expected {what} of type {}, got {}{context}",
+            format_type_and_metadata(expected.0, expected.1),
+            format_type_and_metadata(actual.0, actual.1)
+        );
+    }
+
+    Ok(())
+}
+
+/// Given a data type represented by storage and optional metadata, generate
+/// a user-facing string
+///
+/// This function exists to reduce the number of Field debug strings that are
+/// used to communicate type information in error messages and plan explain
+/// renderings.
+pub fn format_type_and_metadata(
+    data_type: &DataType,
+    metadata: Option<&std::collections::HashMap<String, String>>,
+) -> String {
+    match metadata {
+        Some(metadata) if !metadata.is_empty() => {
+            format!("{data_type}<{metadata:?}>")
+        }
+        _ => data_type.to_string(),
+    }
+}
+
+/// Literal metadata
+///
+/// Stores metadata associated with a literal expressions
+/// and is designed to be fast to `clone`.
+///
+/// This structure is used to store metadata associated with a literal expression, and it
+/// corresponds to the `metadata` field on [`Field`].
+///
+/// # Example: Create [`FieldMetadata`] from a [`Field`]
+/// ```
+/// # use std::collections::HashMap;
+/// # use datafusion_common::metadata::FieldMetadata;
+/// # use arrow::datatypes::{Field, DataType};
+/// # let field = Field::new("c1", DataType::Int32, true)
+/// #  .with_metadata(HashMap::from([("foo".to_string(), "bar".to_string())]));
+/// // Create a new `FieldMetadata` instance from a `Field`
+/// let metadata = FieldMetadata::new_from_field(&field);
+/// // There is also a `From` impl:
+/// let metadata = FieldMetadata::from(&field);
+/// ```
+///
+/// # Example: Update a [`Field`] with [`FieldMetadata`]
+/// ```
+/// # use datafusion_common::metadata::FieldMetadata;
+/// # use arrow::datatypes::{Field, DataType};
+/// # let field = Field::new("c1", DataType::Int32, true);
+/// # let metadata = FieldMetadata::new_from_field(&field);
+/// // Add any metadata from `FieldMetadata` to `Field`
+/// let updated_field = metadata.add_to_field(field);
+/// ```
+///
+#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
+pub struct FieldMetadata {
+    /// The inner metadata of a literal expression, which is a map of string
+    /// keys to string values.
+    ///
+    /// Note this is not a `HashMap` because `HashMap` does not provide
+    /// implementations for traits like `Debug` and `Hash`.
+    inner: Arc<BTreeMap<String, String>>,
+}
+
+impl Default for FieldMetadata {
+    fn default() -> Self {
+        Self::new_empty()
+    }
+}
+
+impl FieldMetadata {
+    /// Create a new empty metadata instance.
+    pub fn new_empty() -> Self {
+        Self {
+            inner: Arc::new(BTreeMap::new()),
+        }
+    }
+
+    /// Merges two optional `FieldMetadata` instances, overwriting any existing
+    /// keys in `m` with keys from `n` if present.
+    ///
+    /// This function is commonly used in alias operations, particularly for literals
+    /// with metadata. When creating an alias expression, the metadata from the original
+    /// expression (such as a literal) is combined with any metadata specified on the alias.
+    ///
+    /// # Arguments
+    ///
+    /// * `m` - The first metadata (typically from the original expression like a literal)
+    /// * `n` - The second metadata (typically from the alias definition)
+    ///
+    /// # Merge Strategy
+    ///
+    /// - If both metadata instances exist, they are merged with `n` taking precedence
+    /// - Keys from `n` will overwrite keys from `m` if they have the same name
+    /// - If only one metadata instance exists, it is returned unchanged
+    /// - If neither exists, `None` is returned
+    ///
+    /// # Example usage
+    /// ```rust
+    /// use datafusion_common::metadata::FieldMetadata;
+    /// use std::collections::BTreeMap;
+    ///
+    /// // Create metadata for a literal expression
+    /// let literal_metadata = Some(FieldMetadata::from(BTreeMap::from([
+    ///     ("source".to_string(), "constant".to_string()),
+    ///     ("type".to_string(), "int".to_string()),
+    /// ])));
+    ///
+    /// // Create metadata for an alias
+    /// let alias_metadata = Some(FieldMetadata::from(BTreeMap::from([
+    ///     ("description".to_string(), "answer".to_string()),
+    ///     ("source".to_string(), "user".to_string()), // This will override literal's "source"
+    /// ])));
+    ///
+    /// // Merge the metadata
+    /// let merged = FieldMetadata::merge_options(
+    ///     literal_metadata.as_ref(),
+    ///     alias_metadata.as_ref(),
+    /// );
+    ///
+    /// // Result contains: {"source": "user", "type": "int", "description": "answer"}
+    /// assert!(merged.is_some());
+    /// ```
+    pub fn merge_options(
+        m: Option<&FieldMetadata>,
+        n: Option<&FieldMetadata>,
+    ) -> Option<FieldMetadata> {
+        match (m, n) {
+            (Some(m), Some(n)) => {
+                let mut merged = m.clone();
+                merged.extend(n.clone());
+                Some(merged)
+            }
+            (Some(m), None) => Some(m.clone()),
+            (None, Some(n)) => Some(n.clone()),
+            (None, None) => None,
+        }
+    }
+
+    /// Create a new metadata instance from a `Field`'s metadata.
+    pub fn new_from_field(field: &Field) -> Self {
+        let inner = field
+            .metadata()
+            .iter()
+            .map(|(k, v)| (k.to_string(), v.to_string()))
+            .collect();
+        Self {
+            inner: Arc::new(inner),
+        }
+    }
+
+    /// Create a new metadata instance from a map of string keys to string values.
+    pub fn new(inner: BTreeMap<String, String>) -> Self {
+        Self {
+            inner: Arc::new(inner),
+        }
+    }
+
+    /// Get the inner metadata as a reference to a `BTreeMap`.
+    pub fn inner(&self) -> &BTreeMap<String, String> {
+        &self.inner
+    }
+
+    /// Return the inner metadata
+    pub fn into_inner(self) -> Arc<BTreeMap<String, String>> {
+        self.inner
+    }
+
+    /// Adds metadata from `other` into `self`, overwriting any existing keys.
+    pub fn extend(&mut self, other: Self) {
+        if other.is_empty() {
+            return;
+        }
+        let other = Arc::unwrap_or_clone(other.into_inner());
+        Arc::make_mut(&mut self.inner).extend(other);
+    }
+
+    /// Returns true if the metadata is empty.
+    pub fn is_empty(&self) -> bool {
+        self.inner.is_empty()
+    }
+
+    /// Returns the number of key-value pairs in the metadata.
+    pub fn len(&self) -> usize {
+        self.inner.len()
+    }
+
+    /// Convert this `FieldMetadata` into a `HashMap<String, String>`
+    pub fn to_hashmap(&self) -> std::collections::HashMap<String, String> {
+        self.inner
+            .iter()
+            .map(|(k, v)| (k.to_string(), v.to_string()))
+            .collect()
+    }
+
+    /// Updates the metadata on the Field with this metadata, if it is not empty.
+    pub fn add_to_field(&self, field: Field) -> Field {
+        if self.inner.is_empty() {
+            return field;
+        }
+
+        field.with_metadata(self.to_hashmap())
+    }
+}
+
+impl From<&Field> for FieldMetadata {
+    fn from(field: &Field) -> Self {
+        Self::new_from_field(field)
+    }
+}
+
+impl From<BTreeMap<String, String>> for FieldMetadata {
+    fn from(inner: BTreeMap<String, String>) -> Self {
+        Self::new(inner)
+    }
+}
+
+impl From<std::collections::HashMap<String, String>> for FieldMetadata {
+    fn from(map: std::collections::HashMap<String, String>) -> Self {
+        Self::new(map.into_iter().collect())
+    }
+}
+
+/// From reference
+impl From<&std::collections::HashMap<String, String>> for FieldMetadata {
+    fn from(map: &std::collections::HashMap<String, String>) -> Self {
+        let inner = map
+            .iter()
+            .map(|(k, v)| (k.to_string(), v.to_string()))
+            .collect();
+        Self::new(inner)
+    }
+}
+
+/// From hashbrown map
+impl From<HashMap<String, String>> for FieldMetadata {
+    fn from(map: HashMap<String, String>) -> Self {
+        let inner = map.into_iter().collect();
+        Self::new(inner)
+    }
+}
+
+impl From<&HashMap<String, String>> for FieldMetadata {
+    fn from(map: &HashMap<String, String>) -> Self {
+        let inner = map
+            .into_iter()
+            .map(|(k, v)| (k.to_string(), v.to_string()))
+            .collect();
+        Self::new(inner)
+    }
+}
diff --git a/datafusion/common/src/param_value.rs b/datafusion/common/src/param_value.rs
index 7582cff56f87..ebf68e4dd210 100644
--- a/datafusion/common/src/param_value.rs
+++ b/datafusion/common/src/param_value.rs
@@ -16,22 +16,37 @@
 // under the License.
 
 use crate::error::{_plan_datafusion_err, _plan_err};
+use crate::metadata::{check_metadata_with_storage_equal, ScalarAndMetadata};
 use crate::{Result, ScalarValue};
-use arrow::datatypes::DataType;
+use arrow::datatypes::{DataType, Field, FieldRef};
 use std::collections::HashMap;
 
 /// The parameter value corresponding to the placeholder
 #[derive(Debug, Clone)]
 pub enum ParamValues {
     /// For positional query parameters, like `SELECT * FROM test WHERE a > $1 AND b = $2`
-    List(Vec<ScalarValue>),
+    List(Vec<ScalarAndMetadata>),
     /// For named query parameters, like `SELECT * FROM test WHERE a > $foo AND b = $goo`
-    Map(HashMap<String, ScalarValue>),
+    Map(HashMap<String, ScalarAndMetadata>),
 }
 
 impl ParamValues {
-    /// Verify parameter list length and type
+    /// Verify parameter list length and DataType
+    ///
+    /// Use [`ParamValues::verify_fields`] to ensure field metadata is considered when
+    /// computing type equality.
+    #[deprecated(since = "51.0.0", note = "Use verify_fields instead")]
     pub fn verify(&self, expect: &[DataType]) -> Result<()> {
+        // make dummy Fields
+        let expect = expect
+            .iter()
+            .map(|dt| Field::new("", dt.clone(), true).into())
+            .collect::<Vec<_>>();
+        self.verify_fields(&expect)
+    }
+
+    /// Verify parameter list length and type
+    pub fn verify_fields(&self, expect: &[FieldRef]) -> Result<()> {
         match self {
             ParamValues::List(list) => {
                 // Verify if the number of params matches the number of values
@@ -45,15 +60,16 @@ impl ParamValues {
 
                 // Verify if the types of the params matches the types of the values
                 let iter = expect.iter().zip(list.iter());
-                for (i, (param_type, value)) in iter.enumerate() {
-                    if *param_type != value.data_type() {
-                        return _plan_err!(
-                            "Expected parameter of type {}, got {:?} at index {}",
-                            param_type,
-                            value.data_type(),
-                            i
-                        );
-                    }
+                for (i, (param_type, lit)) in iter.enumerate() {
+                    check_metadata_with_storage_equal(
+                        (
+                            &lit.value.data_type(),
+                            lit.metadata.as_ref().map(|m| m.to_hashmap()).as_ref(),
+                        ),
+                        (param_type.data_type(), Some(param_type.metadata())),
+                        "parameter",
+                        &format!(" at index {i}"),
+                    )?;
                 }
                 Ok(())
             }
@@ -65,7 +81,7 @@ impl ParamValues {
         }
     }
 
-    pub fn get_placeholders_with_values(&self, id: &str) -> Result<ScalarValue> {
+    pub fn get_placeholders_with_values(&self, id: &str) -> Result<ScalarAndMetadata> {
         match self {
             ParamValues::List(list) => {
                 if id.is_empty() {
@@ -99,7 +115,7 @@ impl ParamValues {
 
 impl From<Vec<ScalarValue>> for ParamValues {
     fn from(value: Vec<ScalarValue>) -> Self {
-        Self::List(value)
+        Self::List(value.into_iter().map(ScalarAndMetadata::from).collect())
     }
 }
 
@@ -108,8 +124,10 @@ where
     K: Into<String>,
 {
     fn from(value: Vec<(K, ScalarValue)>) -> Self {
-        let value: HashMap<String, ScalarValue> =
-            value.into_iter().map(|(k, v)| (k.into(), v)).collect();
+        let value: HashMap<String, ScalarAndMetadata> = value
+            .into_iter()
+            .map(|(k, v)| (k.into(), ScalarAndMetadata::from(v)))
+            .collect();
         Self::Map(value)
     }
 }
@@ -119,8 +137,10 @@ where
     K: Into<String>,
 {
     fn from(value: HashMap<K, ScalarValue>) -> Self {
-        let value: HashMap<String, ScalarValue> =
-            value.into_iter().map(|(k, v)| (k.into(), v)).collect();
+        let value: HashMap<String, ScalarAndMetadata> = value
+            .into_iter()
+            .map(|(k, v)| (k.into(), ScalarAndMetadata::from(v)))
+            .collect();
         Self::Map(value)
     }
 }
diff --git a/datafusion/common/src/pyarrow.rs b/datafusion/common/src/pyarrow.rs
index ff413e08ab07..3b7d80b3da78 100644
--- a/datafusion/common/src/pyarrow.rs
+++ b/datafusion/common/src/pyarrow.rs
@@ -22,7 +22,7 @@ use arrow::pyarrow::{FromPyArrow, ToPyArrow};
 use pyo3::exceptions::PyException;
 use pyo3::prelude::PyErr;
 use pyo3::types::{PyAnyMethods, PyList};
-use pyo3::{Bound, FromPyObject, IntoPyObject, PyAny, PyObject, PyResult, Python};
+use pyo3::{Bound, FromPyObject, IntoPyObject, PyAny, PyResult, Python};
 
 use crate::{DataFusionError, ScalarValue};
 
@@ -52,11 +52,11 @@ impl FromPyArrow for ScalarValue {
 }
 
 impl ToPyArrow for ScalarValue {
-    fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+    fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         let array = self.to_array()?;
         // convert to pyarrow array using C data interface
         let pyarray = array.to_data().to_pyarrow(py)?;
-        let pyscalar = pyarray.call_method1(py, "__getitem__", (0,))?;
+        let pyscalar = pyarray.call_method1("__getitem__", (0,))?;
 
         Ok(pyscalar)
     }
@@ -79,23 +79,22 @@ impl<'source> IntoPyObject<'source> for ScalarValue {
         let array = self.to_array()?;
         // convert to pyarrow array using C data interface
         let pyarray = array.to_data().to_pyarrow(py)?;
-        let pyarray_bound = pyarray.bind(py);
-        pyarray_bound.call_method1("__getitem__", (0,))
+        pyarray.call_method1("__getitem__", (0,))
     }
 }
 
 #[cfg(test)]
 mod tests {
     use pyo3::ffi::c_str;
-    use pyo3::prepare_freethreaded_python;
     use pyo3::py_run;
     use pyo3::types::PyDict;
+    use pyo3::Python;
 
     use super::*;
 
     fn init_python() {
-        prepare_freethreaded_python();
-        Python::with_gil(|py| {
+        Python::initialize();
+        Python::attach(|py| {
             if py.run(c_str!("import pyarrow"), None, None).is_err() {
                 let locals = PyDict::new(py);
                 py.run(
@@ -135,12 +134,11 @@ mod tests {
             ScalarValue::Date32(Some(1234)),
         ];
 
-        Python::with_gil(|py| {
+        Python::attach(|py| {
             for scalar in example_scalars.iter() {
-                let result = ScalarValue::from_pyarrow_bound(
-                    scalar.to_pyarrow(py).unwrap().bind(py),
-                )
-                .unwrap();
+                let result =
+                    ScalarValue::from_pyarrow_bound(&scalar.to_pyarrow(py).unwrap())
+                        .unwrap();
                 assert_eq!(scalar, &result);
             }
         });
@@ -150,7 +148,7 @@ mod tests {
     fn test_py_scalar() -> PyResult<()> {
         init_python();
 
-        Python::with_gil(|py| -> PyResult<()> {
+        Python::attach(|py| -> PyResult<()> {
             let scalar_float = ScalarValue::Float64(Some(12.34));
             let py_float = scalar_float
                 .into_pyobject(py)?
diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs
index 60ff1f4b2ed4..a70a027a8fac 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -70,7 +70,7 @@ use arrow::array::{
     TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array,
     UInt64Array, UInt8Array, UnionArray,
 };
-use arrow::buffer::ScalarBuffer;
+use arrow::buffer::{BooleanBuffer, ScalarBuffer};
 use arrow::compute::kernels::cast::{cast_with_options, CastOptions};
 use arrow::compute::kernels::numeric::{
     add, add_wrapping, div, mul, mul_wrapping, rem, sub, sub_wrapping,
@@ -2888,9 +2888,17 @@ impl ScalarValue {
             ScalarValue::Decimal256(e, precision, scale) => Arc::new(
                 ScalarValue::build_decimal256_array(*e, *precision, *scale, size)?,
             ),
-            ScalarValue::Boolean(e) => {
-                Arc::new(BooleanArray::from(vec![*e; size])) as ArrayRef
-            }
+            ScalarValue::Boolean(e) => match e {
+                None => new_null_array(&DataType::Boolean, size),
+                Some(true) => {
+                    Arc::new(BooleanArray::new(BooleanBuffer::new_set(size), None))
+                        as ArrayRef
+                }
+                Some(false) => {
+                    Arc::new(BooleanArray::new(BooleanBuffer::new_unset(size), None))
+                        as ArrayRef
+                }
+            },
             ScalarValue::Float64(e) => {
                 build_array_from_option!(Float64, Float64Array, e, size)
             }
@@ -2973,15 +2981,13 @@ impl ScalarValue {
                 Some(value) => Arc::new(
                     repeat_n(Some(value.as_slice()), size).collect::<BinaryArray>(),
                 ),
-                None => Arc::new(repeat_n(None::<&str>, size).collect::<BinaryArray>()),
+                None => new_null_array(&DataType::Binary, size),
             },
             ScalarValue::BinaryView(e) => match e {
                 Some(value) => Arc::new(
                     repeat_n(Some(value.as_slice()), size).collect::<BinaryViewArray>(),
                 ),
-                None => {
-                    Arc::new(repeat_n(None::<&str>, size).collect::<BinaryViewArray>())
-                }
+                None => new_null_array(&DataType::BinaryView, size),
             },
             ScalarValue::FixedSizeBinary(s, e) => match e {
                 Some(value) => Arc::new(
@@ -2991,21 +2997,13 @@ impl ScalarValue {
                     )
                     .unwrap(),
                 ),
-                None => Arc::new(
-                    FixedSizeBinaryArray::try_from_sparse_iter_with_size(
-                        repeat_n(None::<&[u8]>, size),
-                        *s,
-                    )
-                    .unwrap(),
-                ),
+                None => Arc::new(FixedSizeBinaryArray::new_null(*s, size)),
             },
             ScalarValue::LargeBinary(e) => match e {
                 Some(value) => Arc::new(
                     repeat_n(Some(value.as_slice()), size).collect::<LargeBinaryArray>(),
                 ),
-                None => {
-                    Arc::new(repeat_n(None::<&str>, size).collect::<LargeBinaryArray>())
-                }
+                None => new_null_array(&DataType::LargeBinary, size),
             },
             ScalarValue::List(arr) => {
                 if size == 1 {
diff --git a/datafusion/common/src/table_reference.rs b/datafusion/common/src/table_reference.rs
index 7cf8e7af1a79..574465856760 100644
--- a/datafusion/common/src/table_reference.rs
+++ b/datafusion/common/src/table_reference.rs
@@ -269,24 +269,41 @@ impl TableReference {
     }
 
     /// Forms a [`TableReference`] by parsing `s` as a multipart SQL
-    /// identifier. See docs on [`TableReference`] for more details.
+    /// identifier, normalizing `s` to lowercase.
+    /// See docs on [`TableReference`] for more details.
     pub fn parse_str(s: &str) -> Self {
-        let mut parts = parse_identifiers_normalized(s, false);
+        Self::parse_str_normalized(s, false)
+    }
+
+    /// Forms a [`TableReference`] by parsing `s` as a multipart SQL
+    /// identifier, normalizing `s` to lowercase if `ignore_case` is `false`.
+    /// See docs on [`TableReference`] for more details.
+    pub fn parse_str_normalized(s: &str, ignore_case: bool) -> Self {
+        let table_parts = parse_identifiers_normalized(s, ignore_case);
 
+        Self::from_vec(table_parts).unwrap_or_else(|| Self::Bare { table: s.into() })
+    }
+
+    /// Consume a vector of identifier parts to compose a [`TableReference`]. The input vector
+    /// should contain 1 <= N <= 3 elements in the following sequence:
+    /// ```no_rust
+    /// [<catalog>, <schema>, table]
+    /// ```
+    fn from_vec(mut parts: Vec<String>) -> Option<Self> {
         match parts.len() {
-            1 => Self::Bare {
-                table: parts.remove(0).into(),
-            },
-            2 => Self::Partial {
-                schema: parts.remove(0).into(),
-                table: parts.remove(0).into(),
-            },
-            3 => Self::Full {
-                catalog: parts.remove(0).into(),
-                schema: parts.remove(0).into(),
-                table: parts.remove(0).into(),
-            },
-            _ => Self::Bare { table: s.into() },
+            1 => Some(Self::Bare {
+                table: parts.pop()?.into(),
+            }),
+            2 => Some(Self::Partial {
+                table: parts.pop()?.into(),
+                schema: parts.pop()?.into(),
+            }),
+            3 => Some(Self::Full {
+                table: parts.pop()?.into(),
+                schema: parts.pop()?.into(),
+                catalog: parts.pop()?.into(),
+            }),
+            _ => None,
         }
     }
 
diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs
index c72e3b3a8df7..045c02a5a2aa 100644
--- a/datafusion/common/src/utils/mod.rs
+++ b/datafusion/common/src/utils/mod.rs
@@ -285,6 +285,9 @@ pub(crate) fn parse_identifiers(s: &str) -> Result<Vec<Ident>> {
     Ok(idents)
 }
 
+/// Parse a string into a vector of identifiers.
+///
+/// Note: If ignore_case is false, the string will be normalized to lowercase.
 #[cfg(feature = "sql")]
 pub(crate) fn parse_identifiers_normalized(s: &str, ignore_case: bool) -> Vec<String> {
     parse_identifiers(s)
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index d3bc4546588d..22c9f43a902e 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -47,7 +47,7 @@ compression = [
     "bzip2",
     "flate2",
     "zstd",
-    "arrow-ipc/zstd",
+    "datafusion-datasource-arrow/compression",
     "datafusion-datasource/compression",
 ]
 crypto_expressions = ["datafusion-functions/crypto_expressions"]
@@ -109,17 +109,17 @@ extended_tests = []
 
 [dependencies]
 arrow = { workspace = true }
-arrow-ipc = { workspace = true }
 arrow-schema = { workspace = true, features = ["canonical_extension_types"] }
 async-trait = { workspace = true }
 bytes = { workspace = true }
-bzip2 = { version = "0.6.0", optional = true }
+bzip2 = { version = "0.6.1", optional = true }
 chrono = { workspace = true }
 datafusion-catalog = { workspace = true }
 datafusion-catalog-listing = { workspace = true }
 datafusion-common = { workspace = true, features = ["object_store"] }
 datafusion-common-runtime = { workspace = true }
 datafusion-datasource = { workspace = true }
+datafusion-datasource-arrow = { workspace = true }
 datafusion-datasource-avro = { workspace = true, optional = true }
 datafusion-datasource-csv = { workspace = true }
 datafusion-datasource-json = { workspace = true }
diff --git a/datafusion/core/benches/parquet_query_sql.rs b/datafusion/core/benches/parquet_query_sql.rs
index 14dcdf15f173..e2b381048013 100644
--- a/datafusion/core/benches/parquet_query_sql.rs
+++ b/datafusion/core/benches/parquet_query_sql.rs
@@ -166,11 +166,12 @@ fn generate_file() -> NamedTempFile {
     }
 
     let metadata = writer.close().unwrap();
+    let file_metadata = metadata.file_metadata();
     assert_eq!(
-        metadata.num_rows as usize,
+        file_metadata.num_rows() as usize,
         WRITE_RECORD_BATCH_SIZE * NUM_BATCHES
     );
-    assert_eq!(metadata.row_groups.len(), EXPECTED_ROW_GROUPS);
+    assert_eq!(metadata.row_groups().len(), EXPECTED_ROW_GROUPS);
 
     println!(
         "Generated parquet file in {} seconds",
diff --git a/datafusion/core/benches/sql_planner.rs b/datafusion/core/benches/sql_planner.rs
index 3be8668b2b8c..83563099cad6 100644
--- a/datafusion/core/benches/sql_planner.rs
+++ b/datafusion/core/benches/sql_planner.rs
@@ -30,7 +30,7 @@ use criterion::Bencher;
 use datafusion::datasource::MemTable;
 use datafusion::execution::context::SessionContext;
 use datafusion::prelude::DataFrame;
-use datafusion_common::ScalarValue;
+use datafusion_common::{config::Dialect, ScalarValue};
 use datafusion_expr::Expr::Literal;
 use datafusion_expr::{cast, col, lit, not, try_cast, when};
 use datafusion_functions::expr_fn::{
@@ -288,7 +288,10 @@ fn benchmark_with_param_values_many_columns(
     }
     // SELECT max(attr0), ..., max(attrN) FROM t1.
     let query = format!("SELECT {aggregates} FROM t1");
-    let statement = ctx.state().sql_to_statement(&query, "Generic").unwrap();
+    let statement = ctx
+        .state()
+        .sql_to_statement(&query, &Dialect::Generic)
+        .unwrap();
     let plan =
         rt.block_on(async { ctx.state().statement_to_plan(statement).await.unwrap() });
     b.iter(|| {
diff --git a/datafusion/core/src/dataframe/parquet.rs b/datafusion/core/src/dataframe/parquet.rs
index d46a902ca513..930b4fad1d9b 100644
--- a/datafusion/core/src/dataframe/parquet.rs
+++ b/datafusion/core/src/dataframe/parquet.rs
@@ -116,6 +116,8 @@ mod tests {
     use datafusion_execution::config::SessionConfig;
     use datafusion_expr::{col, lit};
 
+    #[cfg(feature = "parquet_encryption")]
+    use datafusion_common::config::ConfigFileEncryptionProperties;
     use object_store::local::LocalFileSystem;
     use parquet::file::reader::FileReader;
     use tempfile::TempDir;
@@ -280,7 +282,8 @@ mod tests {
 
         // Write encrypted parquet using write_parquet
         let mut options = TableParquetOptions::default();
-        options.crypto.file_encryption = Some((&encrypt).into());
+        options.crypto.file_encryption =
+            Some(ConfigFileEncryptionProperties::from(&encrypt));
         options.global.allow_single_file_parallelism = allow_single_file_parallelism;
 
         df.write_parquet(
diff --git a/datafusion/core/src/datasource/dynamic_file.rs b/datafusion/core/src/datasource/dynamic_file.rs
index b30d53e58691..256a11ba693b 100644
--- a/datafusion/core/src/datasource/dynamic_file.rs
+++ b/datafusion/core/src/datasource/dynamic_file.rs
@@ -20,6 +20,7 @@
 
 use std::sync::Arc;
 
+use crate::datasource::listing::ListingTableConfigExt;
 use crate::datasource::listing::{ListingTable, ListingTableConfig, ListingTableUrl};
 use crate::datasource::TableProvider;
 use crate::error::Result;
diff --git a/datafusion/core/src/datasource/file_format/arrow.rs b/datafusion/core/src/datasource/file_format/arrow.rs
index 25bc166d657a..8701f96eb3b8 100644
--- a/datafusion/core/src/datasource/file_format/arrow.rs
+++ b/datafusion/core/src/datasource/file_format/arrow.rs
@@ -15,510 +15,5 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! [`ArrowFormat`]: Apache Arrow [`FileFormat`] abstractions
-//!
-//! Works with files following the [Arrow IPC format](https://arrow.apache.org/docs/format/Columnar.html#ipc-file-format)
-
-use std::any::Any;
-use std::borrow::Cow;
-use std::collections::HashMap;
-use std::fmt::{self, Debug};
-use std::sync::Arc;
-
-use super::file_compression_type::FileCompressionType;
-use super::write::demux::DemuxedStreamReceiver;
-use super::write::SharedBuffer;
-use super::FileFormatFactory;
-use crate::datasource::file_format::write::get_writer_schema;
-use crate::datasource::file_format::FileFormat;
-use crate::datasource::physical_plan::{ArrowSource, FileSink, FileSinkConfig};
-use crate::error::Result;
-use crate::physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan};
-
-use arrow::datatypes::{Schema, SchemaRef};
-use arrow::error::ArrowError;
-use arrow::ipc::convert::fb_to_schema;
-use arrow::ipc::reader::FileReader;
-use arrow::ipc::writer::IpcWriteOptions;
-use arrow::ipc::{root_as_message, CompressionType};
-use datafusion_catalog::Session;
-use datafusion_common::parsers::CompressionTypeVariant;
-use datafusion_common::{
-    internal_datafusion_err, not_impl_err, DataFusionError, GetExt, Statistics,
-    DEFAULT_ARROW_EXTENSION,
-};
-use datafusion_common_runtime::{JoinSet, SpawnedTask};
-use datafusion_datasource::display::FileGroupDisplay;
-use datafusion_datasource::file::FileSource;
-use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder};
-use datafusion_datasource::sink::{DataSink, DataSinkExec};
-use datafusion_datasource::write::ObjectWriterBuilder;
-use datafusion_execution::{SendableRecordBatchStream, TaskContext};
-use datafusion_expr::dml::InsertOp;
-use datafusion_physical_expr_common::sort_expr::LexRequirement;
-
-use async_trait::async_trait;
-use bytes::Bytes;
-use datafusion_datasource::source::DataSourceExec;
-use futures::stream::BoxStream;
-use futures::StreamExt;
-use object_store::{GetResultPayload, ObjectMeta, ObjectStore};
-use tokio::io::AsyncWriteExt;
-
-/// Initial writing buffer size. Note this is just a size hint for efficiency. It
-/// will grow beyond the set value if needed.
-const INITIAL_BUFFER_BYTES: usize = 1048576;
-
-/// If the buffered Arrow data exceeds this size, it is flushed to object store
-const BUFFER_FLUSH_BYTES: usize = 1024000;
-
-#[derive(Default, Debug)]
-/// Factory struct used to create [ArrowFormat]
-pub struct ArrowFormatFactory;
-
-impl ArrowFormatFactory {
-    /// Creates an instance of [ArrowFormatFactory]
-    pub fn new() -> Self {
-        Self {}
-    }
-}
-
-impl FileFormatFactory for ArrowFormatFactory {
-    fn create(
-        &self,
-        _state: &dyn Session,
-        _format_options: &HashMap<String, String>,
-    ) -> Result<Arc<dyn FileFormat>> {
-        Ok(Arc::new(ArrowFormat))
-    }
-
-    fn default(&self) -> Arc<dyn FileFormat> {
-        Arc::new(ArrowFormat)
-    }
-
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-}
-
-impl GetExt for ArrowFormatFactory {
-    fn get_ext(&self) -> String {
-        // Removes the dot, i.e. ".parquet" -> "parquet"
-        DEFAULT_ARROW_EXTENSION[1..].to_string()
-    }
-}
-
-/// Arrow `FileFormat` implementation.
-#[derive(Default, Debug)]
-pub struct ArrowFormat;
-
-#[async_trait]
-impl FileFormat for ArrowFormat {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn get_ext(&self) -> String {
-        ArrowFormatFactory::new().get_ext()
-    }
-
-    fn get_ext_with_compression(
-        &self,
-        file_compression_type: &FileCompressionType,
-    ) -> Result<String> {
-        let ext = self.get_ext();
-        match file_compression_type.get_variant() {
-            CompressionTypeVariant::UNCOMPRESSED => Ok(ext),
-            _ => Err(internal_datafusion_err!(
-                "Arrow FileFormat does not support compression."
-            )),
-        }
-    }
-
-    fn compression_type(&self) -> Option<FileCompressionType> {
-        None
-    }
-
-    async fn infer_schema(
-        &self,
-        _state: &dyn Session,
-        store: &Arc<dyn ObjectStore>,
-        objects: &[ObjectMeta],
-    ) -> Result<SchemaRef> {
-        let mut schemas = vec![];
-        for object in objects {
-            let r = store.as_ref().get(&object.location).await?;
-            let schema = match r.payload {
-                #[cfg(not(target_arch = "wasm32"))]
-                GetResultPayload::File(mut file, _) => {
-                    let reader = FileReader::try_new(&mut file, None)?;
-                    reader.schema()
-                }
-                GetResultPayload::Stream(stream) => {
-                    infer_schema_from_file_stream(stream).await?
-                }
-            };
-            schemas.push(schema.as_ref().clone());
-        }
-        let merged_schema = Schema::try_merge(schemas)?;
-        Ok(Arc::new(merged_schema))
-    }
-
-    async fn infer_stats(
-        &self,
-        _state: &dyn Session,
-        _store: &Arc<dyn ObjectStore>,
-        table_schema: SchemaRef,
-        _object: &ObjectMeta,
-    ) -> Result<Statistics> {
-        Ok(Statistics::new_unknown(&table_schema))
-    }
-
-    async fn create_physical_plan(
-        &self,
-        _state: &dyn Session,
-        conf: FileScanConfig,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        let source = Arc::new(ArrowSource::default());
-        let config = FileScanConfigBuilder::from(conf)
-            .with_source(source)
-            .build();
-
-        Ok(DataSourceExec::from_data_source(config))
-    }
-
-    async fn create_writer_physical_plan(
-        &self,
-        input: Arc<dyn ExecutionPlan>,
-        _state: &dyn Session,
-        conf: FileSinkConfig,
-        order_requirements: Option<LexRequirement>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        if conf.insert_op != InsertOp::Append {
-            return not_impl_err!("Overwrites are not implemented yet for Arrow format");
-        }
-
-        let sink = Arc::new(ArrowFileSink::new(conf));
-
-        Ok(Arc::new(DataSinkExec::new(input, sink, order_requirements)) as _)
-    }
-
-    fn file_source(&self) -> Arc<dyn FileSource> {
-        Arc::new(ArrowSource::default())
-    }
-}
-
-/// Implements [`FileSink`] for writing to arrow_ipc files
-struct ArrowFileSink {
-    config: FileSinkConfig,
-}
-
-impl ArrowFileSink {
-    fn new(config: FileSinkConfig) -> Self {
-        Self { config }
-    }
-}
-
-#[async_trait]
-impl FileSink for ArrowFileSink {
-    fn config(&self) -> &FileSinkConfig {
-        &self.config
-    }
-
-    async fn spawn_writer_tasks_and_join(
-        &self,
-        context: &Arc<TaskContext>,
-        demux_task: SpawnedTask<Result<()>>,
-        mut file_stream_rx: DemuxedStreamReceiver,
-        object_store: Arc<dyn ObjectStore>,
-    ) -> Result<u64> {
-        let mut file_write_tasks: JoinSet<std::result::Result<usize, DataFusionError>> =
-            JoinSet::new();
-
-        let ipc_options =
-            IpcWriteOptions::try_new(64, false, arrow_ipc::MetadataVersion::V5)?
-                .try_with_compression(Some(CompressionType::LZ4_FRAME))?;
-        while let Some((path, mut rx)) = file_stream_rx.recv().await {
-            let shared_buffer = SharedBuffer::new(INITIAL_BUFFER_BYTES);
-            let mut arrow_writer = arrow_ipc::writer::FileWriter::try_new_with_options(
-                shared_buffer.clone(),
-                &get_writer_schema(&self.config),
-                ipc_options.clone(),
-            )?;
-            let mut object_store_writer = ObjectWriterBuilder::new(
-                FileCompressionType::UNCOMPRESSED,
-                &path,
-                Arc::clone(&object_store),
-            )
-            .with_buffer_size(Some(
-                context
-                    .session_config()
-                    .options()
-                    .execution
-                    .objectstore_writer_buffer_size,
-            ))
-            .build()?;
-            file_write_tasks.spawn(async move {
-                let mut row_count = 0;
-                while let Some(batch) = rx.recv().await {
-                    row_count += batch.num_rows();
-                    arrow_writer.write(&batch)?;
-                    let mut buff_to_flush = shared_buffer.buffer.try_lock().unwrap();
-                    if buff_to_flush.len() > BUFFER_FLUSH_BYTES {
-                        object_store_writer
-                            .write_all(buff_to_flush.as_slice())
-                            .await?;
-                        buff_to_flush.clear();
-                    }
-                }
-                arrow_writer.finish()?;
-                let final_buff = shared_buffer.buffer.try_lock().unwrap();
-
-                object_store_writer.write_all(final_buff.as_slice()).await?;
-                object_store_writer.shutdown().await?;
-                Ok(row_count)
-            });
-        }
-
-        let mut row_count = 0;
-        while let Some(result) = file_write_tasks.join_next().await {
-            match result {
-                Ok(r) => {
-                    row_count += r?;
-                }
-                Err(e) => {
-                    if e.is_panic() {
-                        std::panic::resume_unwind(e.into_panic());
-                    } else {
-                        unreachable!();
-                    }
-                }
-            }
-        }
-
-        demux_task
-            .join_unwind()
-            .await
-            .map_err(|e| DataFusionError::ExecutionJoin(Box::new(e)))??;
-        Ok(row_count as u64)
-    }
-}
-
-impl Debug for ArrowFileSink {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("ArrowFileSink").finish()
-    }
-}
-
-impl DisplayAs for ArrowFileSink {
-    fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match t {
-            DisplayFormatType::Default | DisplayFormatType::Verbose => {
-                write!(f, "ArrowFileSink(file_groups=",)?;
-                FileGroupDisplay(&self.config.file_group).fmt_as(t, f)?;
-                write!(f, ")")
-            }
-            DisplayFormatType::TreeRender => {
-                writeln!(f, "format: arrow")?;
-                write!(f, "file={}", &self.config.original_url)
-            }
-        }
-    }
-}
-
-#[async_trait]
-impl DataSink for ArrowFileSink {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> &SchemaRef {
-        self.config.output_schema()
-    }
-
-    async fn write_all(
-        &self,
-        data: SendableRecordBatchStream,
-        context: &Arc<TaskContext>,
-    ) -> Result<u64> {
-        FileSink::write_all(self, data, context).await
-    }
-}
-
-const ARROW_MAGIC: [u8; 6] = [b'A', b'R', b'R', b'O', b'W', b'1'];
-const CONTINUATION_MARKER: [u8; 4] = [0xff; 4];
-
-/// Custom implementation of inferring schema. Should eventually be moved upstream to arrow-rs.
-/// See <https://github.com/apache/arrow-rs/issues/5021>
-async fn infer_schema_from_file_stream(
-    mut stream: BoxStream<'static, object_store::Result<Bytes>>,
-) -> Result<SchemaRef> {
-    // Expected format:
-    // <magic number "ARROW1"> - 6 bytes
-    // <empty padding bytes [to 8 byte boundary]> - 2 bytes
-    // <continuation: 0xFFFFFFFF> - 4 bytes, not present below v0.15.0
-    // <metadata_size: int32> - 4 bytes
-    // <metadata_flatbuffer: bytes>
-    // <rest of file bytes>
-
-    // So in first read we need at least all known sized sections,
-    // which is 6 + 2 + 4 + 4 = 16 bytes.
-    let bytes = collect_at_least_n_bytes(&mut stream, 16, None).await?;
-
-    // Files should start with these magic bytes
-    if bytes[0..6] != ARROW_MAGIC {
-        return Err(ArrowError::ParseError(
-            "Arrow file does not contain correct header".to_string(),
-        ))?;
-    }
-
-    // Since continuation marker bytes added in later versions
-    let (meta_len, rest_of_bytes_start_index) = if bytes[8..12] == CONTINUATION_MARKER {
-        (&bytes[12..16], 16)
-    } else {
-        (&bytes[8..12], 12)
-    };
-
-    let meta_len = [meta_len[0], meta_len[1], meta_len[2], meta_len[3]];
-    let meta_len = i32::from_le_bytes(meta_len);
-
-    // Read bytes for Schema message
-    let block_data = if bytes[rest_of_bytes_start_index..].len() < meta_len as usize {
-        // Need to read more bytes to decode Message
-        let mut block_data = Vec::with_capacity(meta_len as usize);
-        // In case we had some spare bytes in our initial read chunk
-        block_data.extend_from_slice(&bytes[rest_of_bytes_start_index..]);
-        let size_to_read = meta_len as usize - block_data.len();
-        let block_data =
-            collect_at_least_n_bytes(&mut stream, size_to_read, Some(block_data)).await?;
-        Cow::Owned(block_data)
-    } else {
-        // Already have the bytes we need
-        let end_index = meta_len as usize + rest_of_bytes_start_index;
-        let block_data = &bytes[rest_of_bytes_start_index..end_index];
-        Cow::Borrowed(block_data)
-    };
-
-    // Decode Schema message
-    let message = root_as_message(&block_data).map_err(|err| {
-        ArrowError::ParseError(format!("Unable to read IPC message as metadata: {err:?}"))
-    })?;
-    let ipc_schema = message.header_as_schema().ok_or_else(|| {
-        ArrowError::IpcError("Unable to read IPC message as schema".to_string())
-    })?;
-    let schema = fb_to_schema(ipc_schema);
-
-    Ok(Arc::new(schema))
-}
-
-async fn collect_at_least_n_bytes(
-    stream: &mut BoxStream<'static, object_store::Result<Bytes>>,
-    n: usize,
-    extend_from: Option<Vec<u8>>,
-) -> Result<Vec<u8>> {
-    let mut buf = extend_from.unwrap_or_else(|| Vec::with_capacity(n));
-    // If extending existing buffer then ensure we read n additional bytes
-    let n = n + buf.len();
-    while let Some(bytes) = stream.next().await.transpose()? {
-        buf.extend_from_slice(&bytes);
-        if buf.len() >= n {
-            break;
-        }
-    }
-    if buf.len() < n {
-        return Err(ArrowError::ParseError(
-            "Unexpected end of byte stream for Arrow IPC file".to_string(),
-        ))?;
-    }
-    Ok(buf)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::execution::context::SessionContext;
-
-    use chrono::DateTime;
-    use object_store::{chunked::ChunkedStore, memory::InMemory, path::Path};
-
-    #[tokio::test]
-    async fn test_infer_schema_stream() -> Result<()> {
-        let mut bytes = std::fs::read("tests/data/example.arrow")?;
-        bytes.truncate(bytes.len() - 20); // mangle end to show we don't need to read whole file
-        let location = Path::parse("example.arrow")?;
-        let in_memory_store: Arc<dyn ObjectStore> = Arc::new(InMemory::new());
-        in_memory_store.put(&location, bytes.into()).await?;
-
-        let session_ctx = SessionContext::new();
-        let state = session_ctx.state();
-        let object_meta = ObjectMeta {
-            location,
-            last_modified: DateTime::default(),
-            size: u64::MAX,
-            e_tag: None,
-            version: None,
-        };
-
-        let arrow_format = ArrowFormat {};
-        let expected = vec!["f0: Int64", "f1: Utf8", "f2: Boolean"];
-
-        // Test chunk sizes where too small so we keep having to read more bytes
-        // And when large enough that first read contains all we need
-        for chunk_size in [7, 3000] {
-            let store = Arc::new(ChunkedStore::new(in_memory_store.clone(), chunk_size));
-            let inferred_schema = arrow_format
-                .infer_schema(
-                    &state,
-                    &(store.clone() as Arc<dyn ObjectStore>),
-                    std::slice::from_ref(&object_meta),
-                )
-                .await?;
-            let actual_fields = inferred_schema
-                .fields()
-                .iter()
-                .map(|f| format!("{}: {:?}", f.name(), f.data_type()))
-                .collect::<Vec<_>>();
-            assert_eq!(expected, actual_fields);
-        }
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_infer_schema_short_stream() -> Result<()> {
-        let mut bytes = std::fs::read("tests/data/example.arrow")?;
-        bytes.truncate(20); // should cause error that file shorter than expected
-        let location = Path::parse("example.arrow")?;
-        let in_memory_store: Arc<dyn ObjectStore> = Arc::new(InMemory::new());
-        in_memory_store.put(&location, bytes.into()).await?;
-
-        let session_ctx = SessionContext::new();
-        let state = session_ctx.state();
-        let object_meta = ObjectMeta {
-            location,
-            last_modified: DateTime::default(),
-            size: u64::MAX,
-            e_tag: None,
-            version: None,
-        };
-
-        let arrow_format = ArrowFormat {};
-
-        let store = Arc::new(ChunkedStore::new(in_memory_store.clone(), 7));
-        let err = arrow_format
-            .infer_schema(
-                &state,
-                &(store.clone() as Arc<dyn ObjectStore>),
-                std::slice::from_ref(&object_meta),
-            )
-            .await;
-
-        assert!(err.is_err());
-        assert_eq!(
-            "Arrow error: Parser error: Unexpected end of byte stream for Arrow IPC file",
-            err.unwrap_err().to_string().lines().next().unwrap()
-        );
-
-        Ok(())
-    }
-}
+//! Re-exports the [`datafusion_datasource_arrow::file_format`] module, and contains tests for it.
+pub use datafusion_datasource_arrow::file_format::*;
diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs
index 088c4408fff5..1781ea569d90 100644
--- a/datafusion/core/src/datasource/file_format/parquet.rs
+++ b/datafusion/core/src/datasource/file_format/parquet.rs
@@ -154,7 +154,6 @@ mod tests {
     use futures::stream::BoxStream;
     use futures::StreamExt;
     use insta::assert_snapshot;
-    use log::error;
     use object_store::local::LocalFileSystem;
     use object_store::ObjectMeta;
     use object_store::{
@@ -163,9 +162,10 @@ mod tests {
     };
     use parquet::arrow::arrow_reader::ArrowReaderOptions;
     use parquet::arrow::ParquetRecordBatchStreamBuilder;
-    use parquet::file::metadata::{KeyValue, ParquetColumnIndex, ParquetOffsetIndex};
-    use parquet::file::page_index::index::Index;
-    use parquet::format::FileMetaData;
+    use parquet::file::metadata::{
+        KeyValue, ParquetColumnIndex, ParquetMetaData, ParquetOffsetIndex,
+    };
+    use parquet::file::page_index::column_index::ColumnIndexMetaData;
     use tokio::fs::File;
 
     enum ForceViews {
@@ -1144,18 +1144,14 @@ mod tests {
 
         // 325 pages in int_col
         assert_eq!(int_col_offset.len(), 325);
-        match int_col_index {
-            Index::INT32(index) => {
-                assert_eq!(index.indexes.len(), 325);
-                for min_max in index.clone().indexes {
-                    assert!(min_max.min.is_some());
-                    assert!(min_max.max.is_some());
-                    assert!(min_max.null_count.is_some());
-                }
-            }
-            _ => {
-                error!("fail to read page index.")
-            }
+        let ColumnIndexMetaData::INT32(index) = int_col_index else {
+            panic!("fail to read page index.")
+        };
+        assert_eq!(index.min_values().len(), 325);
+        assert_eq!(index.max_values().len(), 325);
+        // all values are non null
+        for idx in 0..325 {
+            assert_eq!(index.null_count(idx), Some(0));
         }
     }
 
@@ -1556,7 +1552,7 @@ mod tests {
         Ok(parquet_sink)
     }
 
-    fn get_written(parquet_sink: Arc<ParquetSink>) -> Result<(Path, FileMetaData)> {
+    fn get_written(parquet_sink: Arc<ParquetSink>) -> Result<(Path, ParquetMetaData)> {
         let mut written = parquet_sink.written();
         let written = written.drain();
         assert_eq!(
@@ -1566,28 +1562,33 @@ mod tests {
             written.len()
         );
 
-        let (path, file_metadata) = written.take(1).next().unwrap();
-        Ok((path, file_metadata))
+        let (path, parquet_meta_data) = written.take(1).next().unwrap();
+        Ok((path, parquet_meta_data))
     }
 
-    fn assert_file_metadata(file_metadata: FileMetaData, expected_kv: &Vec<KeyValue>) {
-        let FileMetaData {
-            num_rows,
-            schema,
-            key_value_metadata,
-            ..
-        } = file_metadata;
-        assert_eq!(num_rows, 2, "file metadata to have 2 rows");
+    fn assert_file_metadata(
+        parquet_meta_data: ParquetMetaData,
+        expected_kv: &Vec<KeyValue>,
+    ) {
+        let file_metadata = parquet_meta_data.file_metadata();
+        let schema_descr = file_metadata.schema_descr();
+        assert_eq!(file_metadata.num_rows(), 2, "file metadata to have 2 rows");
         assert!(
-            schema.iter().any(|col_schema| col_schema.name == "a"),
+            schema_descr
+                .columns()
+                .iter()
+                .any(|col_schema| col_schema.name() == "a"),
             "output file metadata should contain col a"
         );
         assert!(
-            schema.iter().any(|col_schema| col_schema.name == "b"),
+            schema_descr
+                .columns()
+                .iter()
+                .any(|col_schema| col_schema.name() == "b"),
             "output file metadata should contain col b"
         );
 
-        let mut key_value_metadata = key_value_metadata.unwrap();
+        let mut key_value_metadata = file_metadata.key_value_metadata().unwrap().clone();
         key_value_metadata.sort_by(|a, b| a.key.cmp(&b.key));
         assert_eq!(&key_value_metadata, expected_kv);
     }
@@ -1644,13 +1645,11 @@ mod tests {
 
         // check the file metadata includes partitions
         let mut expected_partitions = std::collections::HashSet::from(["a=foo", "a=bar"]);
-        for (
-            path,
-            FileMetaData {
-                num_rows, schema, ..
-            },
-        ) in written.take(2)
-        {
+        for (path, parquet_metadata) in written.take(2) {
+            let file_metadata = parquet_metadata.file_metadata();
+            let schema = file_metadata.schema_descr();
+            let num_rows = file_metadata.num_rows();
+
             let path_parts = path.parts().collect::<Vec<_>>();
             assert_eq!(path_parts.len(), 2, "should have path prefix");
 
@@ -1663,11 +1662,17 @@ mod tests {
 
             assert_eq!(num_rows, 1, "file metadata to have 1 row");
             assert!(
-                !schema.iter().any(|col_schema| col_schema.name == "a"),
+                !schema
+                    .columns()
+                    .iter()
+                    .any(|col_schema| col_schema.name() == "a"),
                 "output file metadata will not contain partitioned col a"
             );
             assert!(
-                schema.iter().any(|col_schema| col_schema.name == "b"),
+                schema
+                    .columns()
+                    .iter()
+                    .any(|col_schema| col_schema.name() == "b"),
                 "output file metadata should contain col b"
             );
         }
diff --git a/datafusion/core/src/datasource/listing/mod.rs b/datafusion/core/src/datasource/listing/mod.rs
index a58db55bccb6..c206566a6594 100644
--- a/datafusion/core/src/datasource/listing/mod.rs
+++ b/datafusion/core/src/datasource/listing/mod.rs
@@ -20,7 +20,8 @@
 
 mod table;
 pub use datafusion_catalog_listing::helpers;
+pub use datafusion_catalog_listing::{ListingOptions, ListingTable, ListingTableConfig};
 pub use datafusion_datasource::{
     FileRange, ListingTableUrl, PartitionedFile, PartitionedFileStream,
 };
-pub use table::{ListingOptions, ListingTable, ListingTableConfig};
+pub use table::ListingTableConfigExt;
diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
index 3ce58938d77e..3333b7067620 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -15,226 +15,42 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! The table implementation.
-
-use super::{
-    helpers::{expr_applicable_for_cols, pruned_partition_list},
-    ListingTableUrl, PartitionedFile,
-};
-use crate::{
-    datasource::file_format::{file_compression_type::FileCompressionType, FileFormat},
-    datasource::physical_plan::FileSinkConfig,
-    execution::context::SessionState,
-};
-use arrow::datatypes::{DataType, Field, SchemaBuilder, SchemaRef};
-use arrow_schema::Schema;
+use crate::execution::SessionState;
 use async_trait::async_trait;
-use datafusion_catalog::{ScanArgs, ScanResult, Session, TableProvider};
-use datafusion_common::{
-    config_datafusion_err, config_err, internal_datafusion_err, internal_err, plan_err,
-    project_schema, stats::Precision, Constraints, DataFusionError, Result, SchemaExt,
-};
-use datafusion_datasource::{
-    compute_all_files_statistics,
-    file::FileSource,
-    file_groups::FileGroup,
-    file_scan_config::{FileScanConfig, FileScanConfigBuilder},
-    schema_adapter::{DefaultSchemaAdapterFactory, SchemaAdapter, SchemaAdapterFactory},
-};
-use datafusion_execution::{
-    cache::{cache_manager::FileStatisticsCache, cache_unit::DefaultFileStatisticsCache},
-    config::SessionConfig,
-};
-use datafusion_expr::execution_props::ExecutionProps;
-use datafusion_expr::{
-    dml::InsertOp, Expr, SortExpr, TableProviderFilterPushDown, TableType,
-};
-use datafusion_physical_expr::create_lex_ordering;
-use datafusion_physical_expr_adapter::PhysicalExprAdapterFactory;
-use datafusion_physical_expr_common::sort_expr::LexOrdering;
-use datafusion_physical_plan::{empty::EmptyExec, ExecutionPlan, Statistics};
-use futures::{future, stream, Stream, StreamExt, TryStreamExt};
-use itertools::Itertools;
-use object_store::ObjectStore;
-use std::{any::Any, collections::HashMap, str::FromStr, sync::Arc};
-
-/// Indicates the source of the schema for a [`ListingTable`]
-// PartialEq required for assert_eq! in tests
-#[derive(Debug, Clone, Copy, PartialEq, Default)]
-pub enum SchemaSource {
-    /// Schema is not yet set (initial state)
-    #[default]
-    Unset,
-    /// Schema was inferred from first table_path
-    Inferred,
-    /// Schema was specified explicitly via with_schema
-    Specified,
-}
+use datafusion_catalog_listing::{ListingOptions, ListingTableConfig};
+use datafusion_common::{config_datafusion_err, internal_datafusion_err};
+use datafusion_session::Session;
+use futures::StreamExt;
+use std::collections::HashMap;
 
-/// Configuration for creating a [`ListingTable`]
-///
-/// # Schema Evolution Support
-///
-/// This configuration supports schema evolution through the optional
-/// [`SchemaAdapterFactory`]. You might want to override the default factory when you need:
-///
-/// - **Type coercion requirements**: When you need custom logic for converting between
-///   different Arrow data types (e.g., Int32 ↔ Int64, Utf8 ↔ LargeUtf8)
-/// - **Column mapping**: You need to map columns with a legacy name to a new name
-/// - **Custom handling of missing columns**: By default they are filled in with nulls, but you may e.g. want to fill them in with `0` or `""`.
+/// Extension trait for [`ListingTableConfig`] that supports inferring schemas
 ///
-/// If not specified, a [`DefaultSchemaAdapterFactory`] will be used, which handles
-/// basic schema compatibility cases.
-///
-#[derive(Debug, Clone, Default)]
-pub struct ListingTableConfig {
-    /// Paths on the `ObjectStore` for creating `ListingTable`.
-    /// They should share the same schema and object store.
-    pub table_paths: Vec<ListingTableUrl>,
-    /// Optional `SchemaRef` for the to be created `ListingTable`.
-    ///
-    /// See details on [`ListingTableConfig::with_schema`]
-    pub file_schema: Option<SchemaRef>,
-    /// Optional [`ListingOptions`] for the to be created [`ListingTable`].
-    ///
-    /// See details on [`ListingTableConfig::with_listing_options`]
-    pub options: Option<ListingOptions>,
-    /// Tracks the source of the schema information
-    schema_source: SchemaSource,
-    /// Optional [`SchemaAdapterFactory`] for creating schema adapters
-    schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
-    /// Optional [`PhysicalExprAdapterFactory`] for creating physical expression adapters
-    expr_adapter_factory: Option<Arc<dyn PhysicalExprAdapterFactory>>,
-}
-
-impl ListingTableConfig {
-    /// Creates new [`ListingTableConfig`] for reading the specified URL
-    pub fn new(table_path: ListingTableUrl) -> Self {
-        Self {
-            table_paths: vec![table_path],
-            ..Default::default()
-        }
-    }
-
-    /// Creates new [`ListingTableConfig`] with multiple table paths.
-    ///
-    /// See [`Self::infer_options`] for details on what happens with multiple paths
-    pub fn new_with_multi_paths(table_paths: Vec<ListingTableUrl>) -> Self {
-        Self {
-            table_paths,
-            ..Default::default()
-        }
-    }
-
-    /// Returns the source of the schema for this configuration
-    pub fn schema_source(&self) -> SchemaSource {
-        self.schema_source
-    }
-    /// Set the `schema` for the overall [`ListingTable`]
-    ///
-    /// [`ListingTable`] will automatically coerce, when possible, the schema
-    /// for individual files to match this schema.
-    ///
-    /// If a schema is not provided, it is inferred using
-    /// [`Self::infer_schema`].
-    ///
-    /// If the schema is provided, it must contain only the fields in the file
-    /// without the table partitioning columns.
-    ///
-    /// # Example: Specifying Table Schema
-    /// ```rust
-    /// # use std::sync::Arc;
-    /// # use datafusion::datasource::listing::{ListingTableConfig, ListingOptions, ListingTableUrl};
-    /// # use datafusion::datasource::file_format::parquet::ParquetFormat;
-    /// # use arrow::datatypes::{Schema, Field, DataType};
-    /// # let table_paths = ListingTableUrl::parse("file:///path/to/data").unwrap();
-    /// # let listing_options = ListingOptions::new(Arc::new(ParquetFormat::default()));
-    /// let schema = Arc::new(Schema::new(vec![
-    ///     Field::new("id", DataType::Int64, false),
-    ///     Field::new("name", DataType::Utf8, true),
-    /// ]));
-    ///
-    /// let config = ListingTableConfig::new(table_paths)
-    ///     .with_listing_options(listing_options)  // Set options first
-    ///     .with_schema(schema);                    // Then set schema
-    /// ```
-    pub fn with_schema(self, schema: SchemaRef) -> Self {
-        // Note: We preserve existing options state, but downstream code may expect
-        // options to be set. Consider calling with_listing_options() or infer_options()
-        // before operations that require options to be present.
-        debug_assert!(
-            self.options.is_some() || cfg!(test),
-            "ListingTableConfig::with_schema called without options set. \
-             Consider calling with_listing_options() or infer_options() first to avoid panics in downstream code."
-        );
-
-        Self {
-            file_schema: Some(schema),
-            schema_source: SchemaSource::Specified,
-            ..self
-        }
-    }
-
-    /// Add `listing_options` to [`ListingTableConfig`]
-    ///
-    /// If not provided, format and other options are inferred via
-    /// [`Self::infer_options`].
-    ///
-    /// # Example: Configuring Parquet Files with Custom Options
-    /// ```rust
-    /// # use std::sync::Arc;
-    /// # use datafusion::datasource::listing::{ListingTableConfig, ListingOptions, ListingTableUrl};
-    /// # use datafusion::datasource::file_format::parquet::ParquetFormat;
-    /// # let table_paths = ListingTableUrl::parse("file:///path/to/data").unwrap();
-    /// let options = ListingOptions::new(Arc::new(ParquetFormat::default()))
-    ///     .with_file_extension(".parquet")
-    ///     .with_collect_stat(true);
-    ///
-    /// let config = ListingTableConfig::new(table_paths)
-    ///     .with_listing_options(options);  // Configure file format and options
-    /// ```
-    pub fn with_listing_options(self, listing_options: ListingOptions) -> Self {
-        // Note: This method properly sets options, but be aware that downstream
-        // methods like infer_schema() and try_new() require both schema and options
-        // to be set to function correctly.
-        debug_assert!(
-            !self.table_paths.is_empty() || cfg!(test),
-            "ListingTableConfig::with_listing_options called without table_paths set. \
-             Consider calling new() or new_with_multi_paths() first to establish table paths."
-        );
-
-        Self {
-            options: Some(listing_options),
-            ..self
-        }
-    }
-
-    /// Returns a tuple of `(file_extension, optional compression_extension)`
-    ///
-    /// For example a path ending with blah.test.csv.gz returns `("csv", Some("gz"))`
-    /// For example a path ending with blah.test.csv returns `("csv", None)`
-    fn infer_file_extension_and_compression_type(
-        path: &str,
-    ) -> Result<(String, Option<String>)> {
-        let mut exts = path.rsplit('.');
-
-        let split = exts.next().unwrap_or("");
-
-        let file_compression_type = FileCompressionType::from_str(split)
-            .unwrap_or(FileCompressionType::UNCOMPRESSED);
-
-        if file_compression_type.is_compressed() {
-            let split2 = exts.next().unwrap_or("");
-            Ok((split2.to_string(), Some(split.to_string())))
-        } else {
-            Ok((split.to_string(), None))
-        }
-    }
-
+/// This trait exists because the following inference methods only
+/// work for [`SessionState`] implementations of [`Session`].
+/// See [`ListingTableConfig`] for the remaining inference methods.
+#[async_trait]
+pub trait ListingTableConfigExt {
     /// Infer `ListingOptions` based on `table_path` and file suffix.
     ///
     /// The format is inferred based on the first `table_path`.
-    pub async fn infer_options(self, state: &dyn Session) -> Result<Self> {
+    async fn infer_options(
+        self,
+        state: &dyn Session,
+    ) -> datafusion_common::Result<ListingTableConfig>;
+
+    /// Convenience method to call both [`Self::infer_options`] and [`ListingTableConfig::infer_schema`]
+    async fn infer(
+        self,
+        state: &dyn Session,
+    ) -> datafusion_common::Result<ListingTableConfig>;
+}
+
+#[async_trait]
+impl ListingTableConfigExt for ListingTableConfig {
+    async fn infer_options(
+        self,
+        state: &dyn Session,
+    ) -> datafusion_common::Result<ListingTableConfig> {
         let store = if let Some(url) = self.table_paths.first() {
             state.runtime_env().object_store(url)?
         } else {
@@ -281,1299 +97,19 @@ impl ListingTableConfig {
             .with_target_partitions(state.config().target_partitions())
             .with_collect_stat(state.config().collect_statistics());
 
-        Ok(Self {
-            table_paths: self.table_paths,
-            file_schema: self.file_schema,
-            options: Some(listing_options),
-            schema_source: self.schema_source,
-            schema_adapter_factory: self.schema_adapter_factory,
-            expr_adapter_factory: self.expr_adapter_factory,
-        })
+        Ok(self.with_listing_options(listing_options))
     }
 
-    /// Infer the [`SchemaRef`] based on `table_path`s.
-    ///
-    /// This method infers the table schema using the first `table_path`.
-    /// See [`ListingOptions::infer_schema`] for more details
-    ///
-    /// # Errors
-    /// * if `self.options` is not set. See [`Self::with_listing_options`]
-    pub async fn infer_schema(self, state: &dyn Session) -> Result<Self> {
-        match self.options {
-            Some(options) => {
-                let ListingTableConfig {
-                    table_paths,
-                    file_schema,
-                    options: _,
-                    schema_source,
-                    schema_adapter_factory,
-                    expr_adapter_factory: physical_expr_adapter_factory,
-                } = self;
-
-                let (schema, new_schema_source) = match file_schema {
-                    Some(schema) => (schema, schema_source), // Keep existing source if schema exists
-                    None => {
-                        if let Some(url) = table_paths.first() {
-                            (
-                                options.infer_schema(state, url).await?,
-                                SchemaSource::Inferred,
-                            )
-                        } else {
-                            (Arc::new(Schema::empty()), SchemaSource::Inferred)
-                        }
-                    }
-                };
-
-                Ok(Self {
-                    table_paths,
-                    file_schema: Some(schema),
-                    options: Some(options),
-                    schema_source: new_schema_source,
-                    schema_adapter_factory,
-                    expr_adapter_factory: physical_expr_adapter_factory,
-                })
-            }
-            None => internal_err!("No `ListingOptions` set for inferring schema"),
-        }
-    }
-
-    /// Convenience method to call both [`Self::infer_options`] and [`Self::infer_schema`]
-    pub async fn infer(self, state: &dyn Session) -> Result<Self> {
+    async fn infer(self, state: &dyn Session) -> datafusion_common::Result<Self> {
         self.infer_options(state).await?.infer_schema(state).await
     }
-
-    /// Infer the partition columns from `table_paths`.
-    ///
-    /// # Errors
-    /// * if `self.options` is not set. See [`Self::with_listing_options`]
-    pub async fn infer_partitions_from_path(self, state: &dyn Session) -> Result<Self> {
-        match self.options {
-            Some(options) => {
-                let Some(url) = self.table_paths.first() else {
-                    return config_err!("No table path found");
-                };
-                let partitions = options
-                    .infer_partitions(state, url)
-                    .await?
-                    .into_iter()
-                    .map(|col_name| {
-                        (
-                            col_name,
-                            DataType::Dictionary(
-                                Box::new(DataType::UInt16),
-                                Box::new(DataType::Utf8),
-                            ),
-                        )
-                    })
-                    .collect::<Vec<_>>();
-                let options = options.with_table_partition_cols(partitions);
-                Ok(Self {
-                    table_paths: self.table_paths,
-                    file_schema: self.file_schema,
-                    options: Some(options),
-                    schema_source: self.schema_source,
-                    schema_adapter_factory: self.schema_adapter_factory,
-                    expr_adapter_factory: self.expr_adapter_factory,
-                })
-            }
-            None => config_err!("No `ListingOptions` set for inferring schema"),
-        }
-    }
-
-    /// Set the [`SchemaAdapterFactory`] for the [`ListingTable`]
-    ///
-    /// The schema adapter factory is used to create schema adapters that can
-    /// handle schema evolution and type conversions when reading files with
-    /// different schemas than the table schema.
-    ///
-    /// If not provided, a default schema adapter factory will be used.
-    ///
-    /// # Example: Custom Schema Adapter for Type Coercion
-    /// ```rust
-    /// # use std::sync::Arc;
-    /// # use datafusion::datasource::listing::{ListingTableConfig, ListingOptions, ListingTableUrl};
-    /// # use datafusion::datasource::schema_adapter::{SchemaAdapterFactory, SchemaAdapter};
-    /// # use datafusion::datasource::file_format::parquet::ParquetFormat;
-    /// # use arrow::datatypes::{SchemaRef, Schema, Field, DataType};
-    /// #
-    /// # #[derive(Debug)]
-    /// # struct MySchemaAdapterFactory;
-    /// # impl SchemaAdapterFactory for MySchemaAdapterFactory {
-    /// #     fn create(&self, _projected_table_schema: SchemaRef, _file_schema: SchemaRef) -> Box<dyn SchemaAdapter> {
-    /// #         unimplemented!()
-    /// #     }
-    /// # }
-    /// # let table_paths = ListingTableUrl::parse("file:///path/to/data").unwrap();
-    /// # let listing_options = ListingOptions::new(Arc::new(ParquetFormat::default()));
-    /// # let table_schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int64, false)]));
-    /// let config = ListingTableConfig::new(table_paths)
-    ///     .with_listing_options(listing_options)
-    ///     .with_schema(table_schema)
-    ///     .with_schema_adapter_factory(Arc::new(MySchemaAdapterFactory));
-    /// ```
-    pub fn with_schema_adapter_factory(
-        self,
-        schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
-    ) -> Self {
-        Self {
-            schema_adapter_factory: Some(schema_adapter_factory),
-            ..self
-        }
-    }
-
-    /// Get the [`SchemaAdapterFactory`] for this configuration
-    pub fn schema_adapter_factory(&self) -> Option<&Arc<dyn SchemaAdapterFactory>> {
-        self.schema_adapter_factory.as_ref()
-    }
-
-    /// Set the [`PhysicalExprAdapterFactory`] for the [`ListingTable`]
-    ///
-    /// The expression adapter factory is used to create physical expression adapters that can
-    /// handle schema evolution and type conversions when evaluating expressions
-    /// with different schemas than the table schema.
-    ///
-    /// If not provided, a default physical expression adapter factory will be used unless a custom
-    /// `SchemaAdapterFactory` is set, in which case only the `SchemaAdapterFactory` will be used.
-    ///
-    /// See <https://github.com/apache/datafusion/issues/16800> for details on this transition.
-    pub fn with_expr_adapter_factory(
-        self,
-        expr_adapter_factory: Arc<dyn PhysicalExprAdapterFactory>,
-    ) -> Self {
-        Self {
-            expr_adapter_factory: Some(expr_adapter_factory),
-            ..self
-        }
-    }
-}
-
-/// Options for creating a [`ListingTable`]
-#[derive(Clone, Debug)]
-pub struct ListingOptions {
-    /// A suffix on which files should be filtered (leave empty to
-    /// keep all files on the path)
-    pub file_extension: String,
-    /// The file format
-    pub format: Arc<dyn FileFormat>,
-    /// The expected partition column names in the folder structure.
-    /// See [Self::with_table_partition_cols] for details
-    pub table_partition_cols: Vec<(String, DataType)>,
-    /// Set true to try to guess statistics from the files.
-    /// This can add a lot of overhead as it will usually require files
-    /// to be opened and at least partially parsed.
-    pub collect_stat: bool,
-    /// Group files to avoid that the number of partitions exceeds
-    /// this limit
-    pub target_partitions: usize,
-    /// Optional pre-known sort order(s). Must be `SortExpr`s.
-    ///
-    /// DataFusion may take advantage of this ordering to omit sorts
-    /// or use more efficient algorithms. Currently sortedness must be
-    /// provided if it is known by some external mechanism, but may in
-    /// the future be automatically determined, for example using
-    /// parquet metadata.
-    ///
-    /// See <https://github.com/apache/datafusion/issues/4177>
-    ///
-    /// NOTE: This attribute stores all equivalent orderings (the outer `Vec`)
-    ///       where each ordering consists of an individual lexicographic
-    ///       ordering (encapsulated by a `Vec<Expr>`). If there aren't
-    ///       multiple equivalent orderings, the outer `Vec` will have a
-    ///       single element.
-    pub file_sort_order: Vec<Vec<SortExpr>>,
-}
-
-impl ListingOptions {
-    /// Creates an options instance with the given format
-    /// Default values:
-    /// - use default file extension filter
-    /// - no input partition to discover
-    /// - one target partition
-    /// - do not collect statistics
-    pub fn new(format: Arc<dyn FileFormat>) -> Self {
-        Self {
-            file_extension: format.get_ext(),
-            format,
-            table_partition_cols: vec![],
-            collect_stat: false,
-            target_partitions: 1,
-            file_sort_order: vec![],
-        }
-    }
-
-    /// Set options from [`SessionConfig`] and returns self.
-    ///
-    /// Currently this sets `target_partitions` and `collect_stat`
-    /// but if more options are added in the future that need to be coordinated
-    /// they will be synchronized through this method.
-    pub fn with_session_config_options(mut self, config: &SessionConfig) -> Self {
-        self = self.with_target_partitions(config.target_partitions());
-        self = self.with_collect_stat(config.collect_statistics());
-        self
-    }
-
-    /// Set file extension on [`ListingOptions`] and returns self.
-    ///
-    /// # Example
-    /// ```
-    /// # use std::sync::Arc;
-    /// # use datafusion::prelude::SessionContext;
-    /// # use datafusion::datasource::{listing::ListingOptions, file_format::parquet::ParquetFormat};
-    ///
-    /// let listing_options = ListingOptions::new(Arc::new(
-    ///     ParquetFormat::default()
-    ///   ))
-    ///   .with_file_extension(".parquet");
-    ///
-    /// assert_eq!(listing_options.file_extension, ".parquet");
-    /// ```
-    pub fn with_file_extension(mut self, file_extension: impl Into<String>) -> Self {
-        self.file_extension = file_extension.into();
-        self
-    }
-
-    /// Optionally set file extension on [`ListingOptions`] and returns self.
-    ///
-    /// If `file_extension` is `None`, the file extension will not be changed
-    ///
-    /// # Example
-    /// ```
-    /// # use std::sync::Arc;
-    /// # use datafusion::prelude::SessionContext;
-    /// # use datafusion::datasource::{listing::ListingOptions, file_format::parquet::ParquetFormat};
-    /// let extension = Some(".parquet");
-    /// let listing_options = ListingOptions::new(Arc::new(
-    ///     ParquetFormat::default()
-    ///   ))
-    ///   .with_file_extension_opt(extension);
-    ///
-    /// assert_eq!(listing_options.file_extension, ".parquet");
-    /// ```
-    pub fn with_file_extension_opt<S>(mut self, file_extension: Option<S>) -> Self
-    where
-        S: Into<String>,
-    {
-        if let Some(file_extension) = file_extension {
-            self.file_extension = file_extension.into();
-        }
-        self
-    }
-
-    /// Set `table partition columns` on [`ListingOptions`] and returns self.
-    ///
-    /// "partition columns," used to support [Hive Partitioning], are
-    /// columns added to the data that is read, based on the folder
-    /// structure where the data resides.
-    ///
-    /// For example, give the following files in your filesystem:
-    ///
-    /// ```text
-    /// /mnt/nyctaxi/year=2022/month=01/tripdata.parquet
-    /// /mnt/nyctaxi/year=2021/month=12/tripdata.parquet
-    /// /mnt/nyctaxi/year=2021/month=11/tripdata.parquet
-    /// ```
-    ///
-    /// A [`ListingTable`] created at `/mnt/nyctaxi/` with partition
-    /// columns "year" and "month" will include new `year` and `month`
-    /// columns while reading the files. The `year` column would have
-    /// value `2022` and the `month` column would have value `01` for
-    /// the rows read from
-    /// `/mnt/nyctaxi/year=2022/month=01/tripdata.parquet`
-    ///
-    ///# Notes
-    ///
-    /// - If only one level (e.g. `year` in the example above) is
-    ///   specified, the other levels are ignored but the files are
-    ///   still read.
-    ///
-    /// - Files that don't follow this partitioning scheme will be
-    ///   ignored.
-    ///
-    /// - Since the columns have the same value for all rows read from
-    ///   each individual file (such as dates), they are typically
-    ///   dictionary encoded for efficiency. You may use
-    ///   [`wrap_partition_type_in_dict`] to request a
-    ///   dictionary-encoded type.
-    ///
-    /// - The partition columns are solely extracted from the file path. Especially they are NOT part of the parquet files itself.
-    ///
-    /// # Example
-    ///
-    /// ```
-    /// # use std::sync::Arc;
-    /// # use arrow::datatypes::DataType;
-    /// # use datafusion::prelude::col;
-    /// # use datafusion::datasource::{listing::ListingOptions, file_format::parquet::ParquetFormat};
-    ///
-    /// // listing options for files with paths such as  `/mnt/data/col_a=x/col_b=y/data.parquet`
-    /// // `col_a` and `col_b` will be included in the data read from those files
-    /// let listing_options = ListingOptions::new(Arc::new(
-    ///     ParquetFormat::default()
-    ///   ))
-    ///   .with_table_partition_cols(vec![("col_a".to_string(), DataType::Utf8),
-    ///       ("col_b".to_string(), DataType::Utf8)]);
-    ///
-    /// assert_eq!(listing_options.table_partition_cols, vec![("col_a".to_string(), DataType::Utf8),
-    ///     ("col_b".to_string(), DataType::Utf8)]);
-    /// ```
-    ///
-    /// [Hive Partitioning]: https://docs.cloudera.com/HDPDocuments/HDP2/HDP-2.1.3/bk_system-admin-guide/content/hive_partitioned_tables.html
-    /// [`wrap_partition_type_in_dict`]: crate::datasource::physical_plan::wrap_partition_type_in_dict
-    pub fn with_table_partition_cols(
-        mut self,
-        table_partition_cols: Vec<(String, DataType)>,
-    ) -> Self {
-        self.table_partition_cols = table_partition_cols;
-        self
-    }
-
-    /// Set stat collection on [`ListingOptions`] and returns self.
-    ///
-    /// ```
-    /// # use std::sync::Arc;
-    /// # use datafusion::datasource::{listing::ListingOptions, file_format::parquet::ParquetFormat};
-    ///
-    /// let listing_options = ListingOptions::new(Arc::new(
-    ///     ParquetFormat::default()
-    ///   ))
-    ///   .with_collect_stat(true);
-    ///
-    /// assert_eq!(listing_options.collect_stat, true);
-    /// ```
-    pub fn with_collect_stat(mut self, collect_stat: bool) -> Self {
-        self.collect_stat = collect_stat;
-        self
-    }
-
-    /// Set number of target partitions on [`ListingOptions`] and returns self.
-    ///
-    /// ```
-    /// # use std::sync::Arc;
-    /// # use datafusion::datasource::{listing::ListingOptions, file_format::parquet::ParquetFormat};
-    ///
-    /// let listing_options = ListingOptions::new(Arc::new(
-    ///     ParquetFormat::default()
-    ///   ))
-    ///   .with_target_partitions(8);
-    ///
-    /// assert_eq!(listing_options.target_partitions, 8);
-    /// ```
-    pub fn with_target_partitions(mut self, target_partitions: usize) -> Self {
-        self.target_partitions = target_partitions;
-        self
-    }
-
-    /// Set file sort order on [`ListingOptions`] and returns self.
-    ///
-    /// ```
-    /// # use std::sync::Arc;
-    /// # use datafusion::prelude::col;
-    /// # use datafusion::datasource::{listing::ListingOptions, file_format::parquet::ParquetFormat};
-    ///
-    ///  // Tell datafusion that the files are sorted by column "a"
-    ///  let file_sort_order = vec![vec![
-    ///    col("a").sort(true, true)
-    ///  ]];
-    ///
-    /// let listing_options = ListingOptions::new(Arc::new(
-    ///     ParquetFormat::default()
-    ///   ))
-    ///   .with_file_sort_order(file_sort_order.clone());
-    ///
-    /// assert_eq!(listing_options.file_sort_order, file_sort_order);
-    /// ```
-    pub fn with_file_sort_order(mut self, file_sort_order: Vec<Vec<SortExpr>>) -> Self {
-        self.file_sort_order = file_sort_order;
-        self
-    }
-
-    /// Infer the schema of the files at the given path on the provided object store.
-    ///
-    /// If the table_path contains one or more files (i.e. it is a directory /
-    /// prefix of files) their schema is merged by calling [`FileFormat::infer_schema`]
-    ///
-    /// Note: The inferred schema does not include any partitioning columns.
-    ///
-    /// This method is called as part of creating a [`ListingTable`].
-    pub async fn infer_schema<'a>(
-        &'a self,
-        state: &dyn Session,
-        table_path: &'a ListingTableUrl,
-    ) -> Result<SchemaRef> {
-        let store = state.runtime_env().object_store(table_path)?;
-
-        let files: Vec<_> = table_path
-            .list_all_files(state, store.as_ref(), &self.file_extension)
-            .await?
-            // Empty files cannot affect schema but may throw when trying to read for it
-            .try_filter(|object_meta| future::ready(object_meta.size > 0))
-            .try_collect()
-            .await?;
-
-        let schema = self.format.infer_schema(state, &store, &files).await?;
-
-        Ok(schema)
-    }
-
-    /// Infers the partition columns stored in `LOCATION` and compares
-    /// them with the columns provided in `PARTITIONED BY` to help prevent
-    /// accidental corrupts of partitioned tables.
-    ///
-    /// Allows specifying partial partitions.
-    pub async fn validate_partitions(
-        &self,
-        state: &dyn Session,
-        table_path: &ListingTableUrl,
-    ) -> Result<()> {
-        if self.table_partition_cols.is_empty() {
-            return Ok(());
-        }
-
-        if !table_path.is_collection() {
-            return plan_err!(
-                "Can't create a partitioned table backed by a single file, \
-                perhaps the URL is missing a trailing slash?"
-            );
-        }
-
-        let inferred = self.infer_partitions(state, table_path).await?;
-
-        // no partitioned files found on disk
-        if inferred.is_empty() {
-            return Ok(());
-        }
-
-        let table_partition_names = self
-            .table_partition_cols
-            .iter()
-            .map(|(col_name, _)| col_name.clone())
-            .collect_vec();
-
-        if inferred.len() < table_partition_names.len() {
-            return plan_err!(
-                "Inferred partitions to be {:?}, but got {:?}",
-                inferred,
-                table_partition_names
-            );
-        }
-
-        // match prefix to allow creating tables with partial partitions
-        for (idx, col) in table_partition_names.iter().enumerate() {
-            if &inferred[idx] != col {
-                return plan_err!(
-                    "Inferred partitions to be {:?}, but got {:?}",
-                    inferred,
-                    table_partition_names
-                );
-            }
-        }
-
-        Ok(())
-    }
-
-    /// Infer the partitioning at the given path on the provided object store.
-    /// For performance reasons, it doesn't read all the files on disk
-    /// and therefore may fail to detect invalid partitioning.
-    pub(crate) async fn infer_partitions(
-        &self,
-        state: &dyn Session,
-        table_path: &ListingTableUrl,
-    ) -> Result<Vec<String>> {
-        let store = state.runtime_env().object_store(table_path)?;
-
-        // only use 10 files for inference
-        // This can fail to detect inconsistent partition keys
-        // A DFS traversal approach of the store can help here
-        let files: Vec<_> = table_path
-            .list_all_files(state, store.as_ref(), &self.file_extension)
-            .await?
-            .take(10)
-            .try_collect()
-            .await?;
-
-        let stripped_path_parts = files.iter().map(|file| {
-            table_path
-                .strip_prefix(&file.location)
-                .unwrap()
-                .collect_vec()
-        });
-
-        let partition_keys = stripped_path_parts
-            .map(|path_parts| {
-                path_parts
-                    .into_iter()
-                    .rev()
-                    .skip(1) // get parents only; skip the file itself
-                    .rev()
-                    // Partitions are expected to follow the format "column_name=value", so we
-                    // should ignore any path part that cannot be parsed into the expected format
-                    .filter(|s| s.contains('='))
-                    .map(|s| s.split('=').take(1).collect())
-                    .collect_vec()
-            })
-            .collect_vec();
-
-        match partition_keys.into_iter().all_equal_value() {
-            Ok(v) => Ok(v),
-            Err(None) => Ok(vec![]),
-            Err(Some(diff)) => {
-                let mut sorted_diff = [diff.0, diff.1];
-                sorted_diff.sort();
-                plan_err!("Found mixed partition values on disk {:?}", sorted_diff)
-            }
-        }
-    }
-}
-
-/// Built in [`TableProvider`] that reads data from one or more files as a single table.
-///
-/// The files are read using an  [`ObjectStore`] instance, for example from
-/// local files or objects from AWS S3.
-///
-/// # Features:
-/// * Reading multiple files as a single table
-/// * Hive style partitioning (e.g., directories named `date=2024-06-01`)
-/// * Merges schemas from files with compatible but not identical schemas (see [`ListingTableConfig::file_schema`])
-/// * `limit`, `filter` and `projection` pushdown for formats that support it (e.g.,
-///   Parquet)
-/// * Statistics collection and pruning based on file metadata
-/// * Pre-existing sort order (see [`ListingOptions::file_sort_order`])
-/// * Metadata caching to speed up repeated queries (see [`FileMetadataCache`])
-/// * Statistics caching (see [`FileStatisticsCache`])
-///
-/// [`FileMetadataCache`]: datafusion_execution::cache::cache_manager::FileMetadataCache
-///
-/// # Reading Directories and Hive Style Partitioning
-///
-/// For example, given the `table1` directory (or object store prefix)
-///
-/// ```text
-/// table1
-///  ├── file1.parquet
-///  └── file2.parquet
-/// ```
-///
-/// A `ListingTable` would read the files `file1.parquet` and `file2.parquet` as
-/// a single table, merging the schemas if the files have compatible but not
-/// identical schemas.
-///
-/// Given the `table2` directory (or object store prefix)
-///
-/// ```text
-/// table2
-///  ├── date=2024-06-01
-///  │    ├── file3.parquet
-///  │    └── file4.parquet
-///  └── date=2024-06-02
-///       └── file5.parquet
-/// ```
-///
-/// A `ListingTable` would read the files `file3.parquet`, `file4.parquet`, and
-/// `file5.parquet` as a single table, again merging schemas if necessary.
-///
-/// Given the hive style partitioning structure (e.g,. directories named
-/// `date=2024-06-01` and `date=2026-06-02`), `ListingTable` also adds a `date`
-/// column when reading the table:
-/// * The files in `table2/date=2024-06-01` will have the value `2024-06-01`
-/// * The files in `table2/date=2024-06-02` will have the value `2024-06-02`.
-///
-/// If the query has a predicate like `WHERE date = '2024-06-01'`
-/// only the corresponding directory will be read.
-///
-/// # See Also
-///
-/// 1. [`ListingTableConfig`]: Configuration options
-/// 1. [`DataSourceExec`]: `ExecutionPlan` used by `ListingTable`
-///
-/// [`DataSourceExec`]: crate::datasource::source::DataSourceExec
-///
-/// # Caching Metadata
-///
-/// Some formats, such as Parquet, use the `FileMetadataCache` to cache file
-/// metadata that is needed to execute but expensive to read, such as row
-/// groups and statistics. The cache is scoped to the [`SessionContext`] and can
-/// be configured via the [runtime config options].
-///
-/// [`SessionContext`]: crate::prelude::SessionContext
-/// [runtime config options]: https://datafusion.apache.org/user-guide/configs.html#runtime-configuration-settings
-///
-/// # Example: Read a directory of parquet files using a [`ListingTable`]
-///
-/// ```no_run
-/// # use datafusion::prelude::SessionContext;
-/// # use datafusion::error::Result;
-/// # use std::sync::Arc;
-/// # use datafusion::datasource::{
-/// #   listing::{
-/// #      ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
-/// #   },
-/// #   file_format::parquet::ParquetFormat,
-/// # };
-/// # #[tokio::main]
-/// # async fn main() -> Result<()> {
-/// let ctx = SessionContext::new();
-/// let session_state = ctx.state();
-/// let table_path = "/path/to/parquet";
-///
-/// // Parse the path
-/// let table_path = ListingTableUrl::parse(table_path)?;
-///
-/// // Create default parquet options
-/// let file_format = ParquetFormat::new();
-/// let listing_options = ListingOptions::new(Arc::new(file_format))
-///   .with_file_extension(".parquet");
-///
-/// // Resolve the schema
-/// let resolved_schema = listing_options
-///    .infer_schema(&session_state, &table_path)
-///    .await?;
-///
-/// let config = ListingTableConfig::new(table_path)
-///   .with_listing_options(listing_options)
-///   .with_schema(resolved_schema);
-///
-/// // Create a new TableProvider
-/// let provider = Arc::new(ListingTable::try_new(config)?);
-///
-/// // This provider can now be read as a dataframe:
-/// let df = ctx.read_table(provider.clone());
-///
-/// // or registered as a named table:
-/// ctx.register_table("my_table", provider);
-///
-/// # Ok(())
-/// # }
-/// ```
-#[derive(Debug, Clone)]
-pub struct ListingTable {
-    table_paths: Vec<ListingTableUrl>,
-    /// `file_schema` contains only the columns physically stored in the data files themselves.
-    ///     - Represents the actual fields found in files like Parquet, CSV, etc.
-    ///     - Used when reading the raw data from files
-    file_schema: SchemaRef,
-    /// `table_schema` combines `file_schema` + partition columns
-    ///     - Partition columns are derived from directory paths (not stored in files)
-    ///     - These are columns like "year=2022/month=01" in paths like `/data/year=2022/month=01/file.parquet`
-    table_schema: SchemaRef,
-    /// Indicates how the schema was derived (inferred or explicitly specified)
-    schema_source: SchemaSource,
-    /// Options used to configure the listing table such as the file format
-    /// and partitioning information
-    options: ListingOptions,
-    /// The SQL definition for this table, if any
-    definition: Option<String>,
-    /// Cache for collected file statistics
-    collected_statistics: FileStatisticsCache,
-    /// Constraints applied to this table
-    constraints: Constraints,
-    /// Column default expressions for columns that are not physically present in the data files
-    column_defaults: HashMap<String, Expr>,
-    /// Optional [`SchemaAdapterFactory`] for creating schema adapters
-    schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
-    /// Optional [`PhysicalExprAdapterFactory`] for creating physical expression adapters
-    expr_adapter_factory: Option<Arc<dyn PhysicalExprAdapterFactory>>,
-}
-
-impl ListingTable {
-    /// Create new [`ListingTable`]
-    ///
-    /// See documentation and example on [`ListingTable`] and [`ListingTableConfig`]
-    pub fn try_new(config: ListingTableConfig) -> Result<Self> {
-        // Extract schema_source before moving other parts of the config
-        let schema_source = config.schema_source();
-
-        let file_schema = config
-            .file_schema
-            .ok_or_else(|| internal_datafusion_err!("No schema provided."))?;
-
-        let options = config
-            .options
-            .ok_or_else(|| internal_datafusion_err!("No ListingOptions provided"))?;
-
-        // Add the partition columns to the file schema
-        let mut builder = SchemaBuilder::from(file_schema.as_ref().to_owned());
-        for (part_col_name, part_col_type) in &options.table_partition_cols {
-            builder.push(Field::new(part_col_name, part_col_type.clone(), false));
-        }
-
-        let table_schema = Arc::new(
-            builder
-                .finish()
-                .with_metadata(file_schema.metadata().clone()),
-        );
-
-        let table = Self {
-            table_paths: config.table_paths,
-            file_schema,
-            table_schema,
-            schema_source,
-            options,
-            definition: None,
-            collected_statistics: Arc::new(DefaultFileStatisticsCache::default()),
-            constraints: Constraints::default(),
-            column_defaults: HashMap::new(),
-            schema_adapter_factory: config.schema_adapter_factory,
-            expr_adapter_factory: config.expr_adapter_factory,
-        };
-
-        Ok(table)
-    }
-
-    /// Assign constraints
-    pub fn with_constraints(mut self, constraints: Constraints) -> Self {
-        self.constraints = constraints;
-        self
-    }
-
-    /// Assign column defaults
-    pub fn with_column_defaults(
-        mut self,
-        column_defaults: HashMap<String, Expr>,
-    ) -> Self {
-        self.column_defaults = column_defaults;
-        self
-    }
-
-    /// Set the [`FileStatisticsCache`] used to cache parquet file statistics.
-    ///
-    /// Setting a statistics cache on the `SessionContext` can avoid refetching statistics
-    /// multiple times in the same session.
-    ///
-    /// If `None`, creates a new [`DefaultFileStatisticsCache`] scoped to this query.
-    pub fn with_cache(mut self, cache: Option<FileStatisticsCache>) -> Self {
-        self.collected_statistics =
-            cache.unwrap_or_else(|| Arc::new(DefaultFileStatisticsCache::default()));
-        self
-    }
-
-    /// Specify the SQL definition for this table, if any
-    pub fn with_definition(mut self, definition: Option<String>) -> Self {
-        self.definition = definition;
-        self
-    }
-
-    /// Get paths ref
-    pub fn table_paths(&self) -> &Vec<ListingTableUrl> {
-        &self.table_paths
-    }
-
-    /// Get options ref
-    pub fn options(&self) -> &ListingOptions {
-        &self.options
-    }
-
-    /// Get the schema source
-    pub fn schema_source(&self) -> SchemaSource {
-        self.schema_source
-    }
-
-    /// Set the [`SchemaAdapterFactory`] for this [`ListingTable`]
-    ///
-    /// The schema adapter factory is used to create schema adapters that can
-    /// handle schema evolution and type conversions when reading files with
-    /// different schemas than the table schema.
-    ///
-    /// # Example: Adding Schema Evolution Support
-    /// ```rust
-    /// # use std::sync::Arc;
-    /// # use datafusion::datasource::listing::{ListingTable, ListingTableConfig, ListingOptions, ListingTableUrl};
-    /// # use datafusion::datasource::schema_adapter::{DefaultSchemaAdapterFactory, SchemaAdapter};
-    /// # use datafusion::datasource::file_format::parquet::ParquetFormat;
-    /// # use arrow::datatypes::{SchemaRef, Schema, Field, DataType};
-    /// # let table_path = ListingTableUrl::parse("file:///path/to/data").unwrap();
-    /// # let options = ListingOptions::new(Arc::new(ParquetFormat::default()));
-    /// # let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int64, false)]));
-    /// # let config = ListingTableConfig::new(table_path).with_listing_options(options).with_schema(schema);
-    /// # let table = ListingTable::try_new(config).unwrap();
-    /// let table_with_evolution = table
-    ///     .with_schema_adapter_factory(Arc::new(DefaultSchemaAdapterFactory));
-    /// ```
-    /// See [`ListingTableConfig::with_schema_adapter_factory`] for an example of custom SchemaAdapterFactory.
-    pub fn with_schema_adapter_factory(
-        self,
-        schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
-    ) -> Self {
-        Self {
-            schema_adapter_factory: Some(schema_adapter_factory),
-            ..self
-        }
-    }
-
-    /// Get the [`SchemaAdapterFactory`] for this table
-    pub fn schema_adapter_factory(&self) -> Option<&Arc<dyn SchemaAdapterFactory>> {
-        self.schema_adapter_factory.as_ref()
-    }
-
-    /// Creates a schema adapter for mapping between file and table schemas
-    ///
-    /// Uses the configured schema adapter factory if available, otherwise falls back
-    /// to the default implementation.
-    fn create_schema_adapter(&self) -> Box<dyn SchemaAdapter> {
-        let table_schema = self.schema();
-        match &self.schema_adapter_factory {
-            Some(factory) => {
-                factory.create_with_projected_schema(Arc::clone(&table_schema))
-            }
-            None => DefaultSchemaAdapterFactory::from_schema(Arc::clone(&table_schema)),
-        }
-    }
-
-    /// Creates a file source and applies schema adapter factory if available
-    fn create_file_source_with_schema_adapter(&self) -> Result<Arc<dyn FileSource>> {
-        let mut source = self.options.format.file_source();
-        // Apply schema adapter to source if available
-        //
-        // The source will use this SchemaAdapter to adapt data batches as they flow up the plan.
-        // Note: ListingTable also creates a SchemaAdapter in `scan()` but that is only used to adapt collected statistics.
-        if let Some(factory) = &self.schema_adapter_factory {
-            source = source.with_schema_adapter_factory(Arc::clone(factory))?;
-        }
-        Ok(source)
-    }
-
-    /// If file_sort_order is specified, creates the appropriate physical expressions
-    fn try_create_output_ordering(
-        &self,
-        execution_props: &ExecutionProps,
-    ) -> Result<Vec<LexOrdering>> {
-        create_lex_ordering(
-            &self.table_schema,
-            &self.options.file_sort_order,
-            execution_props,
-        )
-    }
-}
-
-// Expressions can be used for partition pruning if they can be evaluated using
-// only the partition columns and there are partition columns.
-fn can_be_evaluated_for_partition_pruning(
-    partition_column_names: &[&str],
-    expr: &Expr,
-) -> bool {
-    !partition_column_names.is_empty()
-        && expr_applicable_for_cols(partition_column_names, expr)
-}
-
-#[async_trait]
-impl TableProvider for ListingTable {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        Arc::clone(&self.table_schema)
-    }
-
-    fn constraints(&self) -> Option<&Constraints> {
-        Some(&self.constraints)
-    }
-
-    fn table_type(&self) -> TableType {
-        TableType::Base
-    }
-
-    async fn scan(
-        &self,
-        state: &dyn Session,
-        projection: Option<&Vec<usize>>,
-        filters: &[Expr],
-        limit: Option<usize>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        let options = ScanArgs::default()
-            .with_projection(projection.map(|p| p.as_slice()))
-            .with_filters(Some(filters))
-            .with_limit(limit);
-        Ok(self.scan_with_args(state, options).await?.into_inner())
-    }
-
-    async fn scan_with_args<'a>(
-        &self,
-        state: &dyn Session,
-        args: ScanArgs<'a>,
-    ) -> Result<ScanResult> {
-        let projection = args.projection().map(|p| p.to_vec());
-        let filters = args.filters().map(|f| f.to_vec()).unwrap_or_default();
-        let limit = args.limit();
-
-        // extract types of partition columns
-        let table_partition_cols = self
-            .options
-            .table_partition_cols
-            .iter()
-            .map(|col| Ok(self.table_schema.field_with_name(&col.0)?.clone()))
-            .collect::<Result<Vec<_>>>()?;
-
-        let table_partition_col_names = table_partition_cols
-            .iter()
-            .map(|field| field.name().as_str())
-            .collect::<Vec<_>>();
-
-        // If the filters can be resolved using only partition cols, there is no need to
-        // pushdown it to TableScan, otherwise, `unhandled` pruning predicates will be generated
-        let (partition_filters, filters): (Vec<_>, Vec<_>) =
-            filters.iter().cloned().partition(|filter| {
-                can_be_evaluated_for_partition_pruning(&table_partition_col_names, filter)
-            });
-
-        // We should not limit the number of partitioned files to scan if there are filters and limit
-        // at the same time. This is because the limit should be applied after the filters are applied.
-        let statistic_file_limit = if filters.is_empty() { limit } else { None };
-
-        let (mut partitioned_file_lists, statistics) = self
-            .list_files_for_scan(state, &partition_filters, statistic_file_limit)
-            .await?;
-
-        // if no files need to be read, return an `EmptyExec`
-        if partitioned_file_lists.is_empty() {
-            let projected_schema = project_schema(&self.schema(), projection.as_ref())?;
-            return Ok(ScanResult::new(Arc::new(EmptyExec::new(projected_schema))));
-        }
-
-        let output_ordering = self.try_create_output_ordering(state.execution_props())?;
-        match state
-            .config_options()
-            .execution
-            .split_file_groups_by_statistics
-            .then(|| {
-                output_ordering.first().map(|output_ordering| {
-                    FileScanConfig::split_groups_by_statistics_with_target_partitions(
-                        &self.table_schema,
-                        &partitioned_file_lists,
-                        output_ordering,
-                        self.options.target_partitions,
-                    )
-                })
-            })
-            .flatten()
-        {
-            Some(Err(e)) => log::debug!("failed to split file groups by statistics: {e}"),
-            Some(Ok(new_groups)) => {
-                if new_groups.len() <= self.options.target_partitions {
-                    partitioned_file_lists = new_groups;
-                } else {
-                    log::debug!("attempted to split file groups by statistics, but there were more file groups than target_partitions; falling back to unordered")
-                }
-            }
-            None => {} // no ordering required
-        };
-
-        let Some(object_store_url) =
-            self.table_paths.first().map(ListingTableUrl::object_store)
-        else {
-            return Ok(ScanResult::new(Arc::new(EmptyExec::new(Arc::new(
-                Schema::empty(),
-            )))));
-        };
-
-        let file_source = self.create_file_source_with_schema_adapter()?;
-
-        // create the execution plan
-        let plan = self
-            .options
-            .format
-            .create_physical_plan(
-                state,
-                FileScanConfigBuilder::new(
-                    object_store_url,
-                    Arc::clone(&self.file_schema),
-                    file_source,
-                )
-                .with_file_groups(partitioned_file_lists)
-                .with_constraints(self.constraints.clone())
-                .with_statistics(statistics)
-                .with_projection(projection)
-                .with_limit(limit)
-                .with_output_ordering(output_ordering)
-                .with_table_partition_cols(table_partition_cols)
-                .with_expr_adapter(self.expr_adapter_factory.clone())
-                .build(),
-            )
-            .await?;
-
-        Ok(ScanResult::new(plan))
-    }
-
-    fn supports_filters_pushdown(
-        &self,
-        filters: &[&Expr],
-    ) -> Result<Vec<TableProviderFilterPushDown>> {
-        let partition_column_names = self
-            .options
-            .table_partition_cols
-            .iter()
-            .map(|col| col.0.as_str())
-            .collect::<Vec<_>>();
-        filters
-            .iter()
-            .map(|filter| {
-                if can_be_evaluated_for_partition_pruning(&partition_column_names, filter)
-                {
-                    // if filter can be handled by partition pruning, it is exact
-                    return Ok(TableProviderFilterPushDown::Exact);
-                }
-
-                Ok(TableProviderFilterPushDown::Inexact)
-            })
-            .collect()
-    }
-
-    fn get_table_definition(&self) -> Option<&str> {
-        self.definition.as_deref()
-    }
-
-    async fn insert_into(
-        &self,
-        state: &dyn Session,
-        input: Arc<dyn ExecutionPlan>,
-        insert_op: InsertOp,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        // Check that the schema of the plan matches the schema of this table.
-        self.schema()
-            .logically_equivalent_names_and_types(&input.schema())?;
-
-        let table_path = &self.table_paths()[0];
-        if !table_path.is_collection() {
-            return plan_err!(
-                "Inserting into a ListingTable backed by a single file is not supported, URL is possibly missing a trailing `/`. \
-                To append to an existing file use StreamTable, e.g. by using CREATE UNBOUNDED EXTERNAL TABLE"
-            );
-        }
-
-        // Get the object store for the table path.
-        let store = state.runtime_env().object_store(table_path)?;
-
-        let file_list_stream = pruned_partition_list(
-            state,
-            store.as_ref(),
-            table_path,
-            &[],
-            &self.options.file_extension,
-            &self.options.table_partition_cols,
-        )
-        .await?;
-
-        let file_group = file_list_stream.try_collect::<Vec<_>>().await?.into();
-        let keep_partition_by_columns =
-            state.config_options().execution.keep_partition_by_columns;
-
-        // Sink related option, apart from format
-        let config = FileSinkConfig {
-            original_url: String::default(),
-            object_store_url: self.table_paths()[0].object_store(),
-            table_paths: self.table_paths().clone(),
-            file_group,
-            output_schema: self.schema(),
-            table_partition_cols: self.options.table_partition_cols.clone(),
-            insert_op,
-            keep_partition_by_columns,
-            file_extension: self.options().format.get_ext(),
-        };
-
-        let orderings = self.try_create_output_ordering(state.execution_props())?;
-        // It is sufficient to pass only one of the equivalent orderings:
-        let order_requirements = orderings.into_iter().next().map(Into::into);
-
-        self.options()
-            .format
-            .create_writer_physical_plan(input, state, config, order_requirements)
-            .await
-    }
-
-    fn get_column_default(&self, column: &str) -> Option<&Expr> {
-        self.column_defaults.get(column)
-    }
-}
-
-impl ListingTable {
-    /// Get the list of files for a scan as well as the file level statistics.
-    /// The list is grouped to let the execution plan know how the files should
-    /// be distributed to different threads / executors.
-    async fn list_files_for_scan<'a>(
-        &'a self,
-        ctx: &'a dyn Session,
-        filters: &'a [Expr],
-        limit: Option<usize>,
-    ) -> Result<(Vec<FileGroup>, Statistics)> {
-        let store = if let Some(url) = self.table_paths.first() {
-            ctx.runtime_env().object_store(url)?
-        } else {
-            return Ok((vec![], Statistics::new_unknown(&self.file_schema)));
-        };
-        // list files (with partitions)
-        let file_list = future::try_join_all(self.table_paths.iter().map(|table_path| {
-            pruned_partition_list(
-                ctx,
-                store.as_ref(),
-                table_path,
-                filters,
-                &self.options.file_extension,
-                &self.options.table_partition_cols,
-            )
-        }))
-        .await?;
-        let meta_fetch_concurrency =
-            ctx.config_options().execution.meta_fetch_concurrency;
-        let file_list = stream::iter(file_list).flatten_unordered(meta_fetch_concurrency);
-        // collect the statistics if required by the config
-        let files = file_list
-            .map(|part_file| async {
-                let part_file = part_file?;
-                let statistics = if self.options.collect_stat {
-                    self.do_collect_statistics(ctx, &store, &part_file).await?
-                } else {
-                    Arc::new(Statistics::new_unknown(&self.file_schema))
-                };
-                Ok(part_file.with_statistics(statistics))
-            })
-            .boxed()
-            .buffer_unordered(ctx.config_options().execution.meta_fetch_concurrency);
-
-        let (file_group, inexact_stats) =
-            get_files_with_limit(files, limit, self.options.collect_stat).await?;
-
-        let file_groups = file_group.split_files(self.options.target_partitions);
-        let (mut file_groups, mut stats) = compute_all_files_statistics(
-            file_groups,
-            self.schema(),
-            self.options.collect_stat,
-            inexact_stats,
-        )?;
-
-        let schema_adapter = self.create_schema_adapter();
-        let (schema_mapper, _) = schema_adapter.map_schema(self.file_schema.as_ref())?;
-
-        stats.column_statistics =
-            schema_mapper.map_column_statistics(&stats.column_statistics)?;
-        file_groups.iter_mut().try_for_each(|file_group| {
-            if let Some(stat) = file_group.statistics_mut() {
-                stat.column_statistics =
-                    schema_mapper.map_column_statistics(&stat.column_statistics)?;
-            }
-            Ok::<_, DataFusionError>(())
-        })?;
-        Ok((file_groups, stats))
-    }
-
-    /// Collects statistics for a given partitioned file.
-    ///
-    /// This method first checks if the statistics for the given file are already cached.
-    /// If they are, it returns the cached statistics.
-    /// If they are not, it infers the statistics from the file and stores them in the cache.
-    async fn do_collect_statistics(
-        &self,
-        ctx: &dyn Session,
-        store: &Arc<dyn ObjectStore>,
-        part_file: &PartitionedFile,
-    ) -> Result<Arc<Statistics>> {
-        match self
-            .collected_statistics
-            .get_with_extra(&part_file.object_meta.location, &part_file.object_meta)
-        {
-            Some(statistics) => Ok(statistics),
-            None => {
-                let statistics = self
-                    .options
-                    .format
-                    .infer_stats(
-                        ctx,
-                        store,
-                        Arc::clone(&self.file_schema),
-                        &part_file.object_meta,
-                    )
-                    .await?;
-                let statistics = Arc::new(statistics);
-                self.collected_statistics.put_with_extra(
-                    &part_file.object_meta.location,
-                    Arc::clone(&statistics),
-                    &part_file.object_meta,
-                );
-                Ok(statistics)
-            }
-        }
-    }
-}
-
-/// Processes a stream of partitioned files and returns a `FileGroup` containing the files.
-///
-/// This function collects files from the provided stream until either:
-/// 1. The stream is exhausted
-/// 2. The accumulated number of rows exceeds the provided `limit` (if specified)
-///
-/// # Arguments
-/// * `files` - A stream of `Result<PartitionedFile>` items to process
-/// * `limit` - An optional row count limit. If provided, the function will stop collecting files
-///   once the accumulated number of rows exceeds this limit
-/// * `collect_stats` - Whether to collect and accumulate statistics from the files
-///
-/// # Returns
-/// A `Result` containing a `FileGroup` with the collected files
-/// and a boolean indicating whether the statistics are inexact.
-///
-/// # Note
-/// The function will continue processing files if statistics are not available or if the
-/// limit is not provided. If `collect_stats` is false, statistics won't be accumulated
-/// but files will still be collected.
-async fn get_files_with_limit(
-    files: impl Stream<Item = Result<PartitionedFile>>,
-    limit: Option<usize>,
-    collect_stats: bool,
-) -> Result<(FileGroup, bool)> {
-    let mut file_group = FileGroup::default();
-    // Fusing the stream allows us to call next safely even once it is finished.
-    let mut all_files = Box::pin(files.fuse());
-    enum ProcessingState {
-        ReadingFiles,
-        ReachedLimit,
-    }
-
-    let mut state = ProcessingState::ReadingFiles;
-    let mut num_rows = Precision::Absent;
-
-    while let Some(file_result) = all_files.next().await {
-        // Early exit if we've already reached our limit
-        if matches!(state, ProcessingState::ReachedLimit) {
-            break;
-        }
-
-        let file = file_result?;
-
-        // Update file statistics regardless of state
-        if collect_stats {
-            if let Some(file_stats) = &file.statistics {
-                num_rows = if file_group.is_empty() {
-                    // For the first file, just take its row count
-                    file_stats.num_rows
-                } else {
-                    // For subsequent files, accumulate the counts
-                    num_rows.add(&file_stats.num_rows)
-                };
-            }
-        }
-
-        // Always add the file to our group
-        file_group.push(file);
-
-        // Check if we've hit the limit (if one was specified)
-        if let Some(limit) = limit {
-            if let Precision::Exact(row_count) = num_rows {
-                if row_count > limit {
-                    state = ProcessingState::ReachedLimit;
-                }
-            }
-        }
-    }
-    // If we still have files in the stream, it means that the limit kicked
-    // in, and the statistic could have been different had we processed the
-    // files in a different order.
-    let inexact_stats = all_files.next().await.is_some();
-    Ok((file_group, inexact_stats))
 }
 
 #[cfg(test)]
 mod tests {
-    use super::*;
     #[cfg(feature = "parquet")]
     use crate::datasource::file_format::parquet::ParquetFormat;
+    use crate::datasource::listing::table::ListingTableConfigExt;
     use crate::prelude::*;
     use crate::{
         datasource::{
@@ -1587,21 +123,34 @@ mod tests {
         },
     };
     use arrow::{compute::SortOptions, record_batch::RecordBatch};
+    use arrow_schema::{DataType, Field, Schema, SchemaRef};
+    use datafusion_catalog::TableProvider;
+    use datafusion_catalog_listing::{
+        ListingOptions, ListingTable, ListingTableConfig, SchemaSource,
+    };
     use datafusion_common::{
-        assert_contains,
+        assert_contains, plan_err,
         stats::Precision,
         test_util::{batches_to_string, datafusion_test_data},
-        ColumnStatistics, ScalarValue,
+        ColumnStatistics, DataFusionError, Result, ScalarValue,
     };
+    use datafusion_datasource::file_compression_type::FileCompressionType;
+    use datafusion_datasource::file_format::FileFormat;
     use datafusion_datasource::schema_adapter::{
         SchemaAdapter, SchemaAdapterFactory, SchemaMapper,
     };
+    use datafusion_datasource::ListingTableUrl;
+    use datafusion_expr::dml::InsertOp;
     use datafusion_expr::{BinaryExpr, LogicalPlanBuilder, Operator};
     use datafusion_physical_expr::expressions::binary;
     use datafusion_physical_expr::PhysicalSortExpr;
+    use datafusion_physical_expr_common::sort_expr::LexOrdering;
+    use datafusion_physical_plan::empty::EmptyExec;
     use datafusion_physical_plan::{collect, ExecutionPlanProperties};
     use rstest::rstest;
+    use std::collections::HashMap;
     use std::io::Write;
+    use std::sync::Arc;
     use tempfile::TempDir;
     use url::Url;
 
@@ -1638,10 +187,13 @@ mod tests {
         let ctx = SessionContext::new();
         let testdata = datafusion_test_data();
         let filename = format!("{testdata}/aggregate_simple.csv");
-        let table_path = ListingTableUrl::parse(filename).unwrap();
+        let table_path = ListingTableUrl::parse(filename)?;
 
         // Test default schema source
-        let config = ListingTableConfig::new(table_path.clone());
+        let format = CsvFormat::default();
+        let options = ListingOptions::new(Arc::new(format));
+        let config =
+            ListingTableConfig::new(table_path.clone()).with_listing_options(options);
         assert_eq!(config.schema_source(), SchemaSource::Unset);
 
         // Test schema source after setting a schema explicitly
@@ -1650,18 +202,13 @@ mod tests {
         assert_eq!(config_with_schema.schema_source(), SchemaSource::Specified);
 
         // Test schema source after inferring schema
-        let format = CsvFormat::default();
-        let options = ListingOptions::new(Arc::new(format));
-        let config_with_options = config.with_listing_options(options.clone());
-        assert_eq!(config_with_options.schema_source(), SchemaSource::Unset);
+        assert_eq!(config.schema_source(), SchemaSource::Unset);
 
-        let config_with_inferred = config_with_options.infer_schema(&ctx.state()).await?;
+        let config_with_inferred = config.infer_schema(&ctx.state()).await?;
         assert_eq!(config_with_inferred.schema_source(), SchemaSource::Inferred);
 
         // Test schema preservation through operations
-        let config_with_schema_and_options = config_with_schema
-            .clone()
-            .with_listing_options(options.clone());
+        let config_with_schema_and_options = config_with_schema.clone();
         assert_eq!(
             config_with_schema_and_options.schema_source(),
             SchemaSource::Specified
@@ -1836,7 +383,7 @@ mod tests {
             .with_table_partition_cols(vec![(String::from("p1"), DataType::Utf8)])
             .with_target_partitions(4);
 
-        let table_path = ListingTableUrl::parse("test:///table/").unwrap();
+        let table_path = ListingTableUrl::parse("test:///table/")?;
         let file_schema =
             Arc::new(Schema::new(vec![Field::new("a", DataType::Boolean, false)]));
         let config = ListingTableConfig::new(table_path)
@@ -1872,7 +419,7 @@ mod tests {
     ) -> Result<Arc<dyn TableProvider>> {
         let testdata = crate::test_util::parquet_test_data();
         let filename = format!("{testdata}/{name}");
-        let table_path = ListingTableUrl::parse(filename).unwrap();
+        let table_path = ListingTableUrl::parse(filename)?;
 
         let config = ListingTableConfig::new(table_path)
             .infer(&ctx.state())
@@ -1899,7 +446,7 @@ mod tests {
 
         let schema = Schema::new(vec![Field::new("a", DataType::Boolean, false)]);
 
-        let table_path = ListingTableUrl::parse(table_prefix).unwrap();
+        let table_path = ListingTableUrl::parse(table_prefix)?;
         let config = ListingTableConfig::new(table_path)
             .with_listing_options(opt)
             .with_schema(Arc::new(schema));
@@ -2458,7 +1005,7 @@ mod tests {
     async fn test_infer_options_compressed_csv() -> Result<()> {
         let testdata = crate::test_util::arrow_test_data();
         let filename = format!("{testdata}/csv/aggregate_test_100.csv.gz");
-        let table_path = ListingTableUrl::parse(filename).unwrap();
+        let table_path = ListingTableUrl::parse(filename)?;
 
         let ctx = SessionContext::new();
 
@@ -2479,12 +1026,15 @@ mod tests {
 
         let testdata = datafusion_test_data();
         let filename = format!("{testdata}/aggregate_simple.csv");
-        let table_path = ListingTableUrl::parse(filename).unwrap();
+        let table_path = ListingTableUrl::parse(filename)?;
 
         let provided_schema = create_test_schema();
 
-        let config =
-            ListingTableConfig::new(table_path).with_schema(Arc::clone(&provided_schema));
+        let format = CsvFormat::default();
+        let options = ListingOptions::new(Arc::new(format));
+        let config = ListingTableConfig::new(table_path)
+            .with_listing_options(options)
+            .with_schema(Arc::clone(&provided_schema));
 
         let config = config.infer(&ctx.state()).await?;
 
@@ -2549,8 +1099,8 @@ mod tests {
             table_path1.clone(),
             table_path2.clone(),
         ])
-        .with_schema(schema_3cols)
-        .with_listing_options(options.clone());
+        .with_listing_options(options.clone())
+        .with_schema(schema_3cols);
         let config2 = config2.infer_schema(&ctx.state()).await?;
         assert_eq!(config2.schema_source(), SchemaSource::Specified);
 
@@ -2573,8 +1123,8 @@ mod tests {
             table_path1.clone(),
             table_path2.clone(),
         ])
-        .with_schema(schema_4cols)
-        .with_listing_options(options.clone());
+        .with_listing_options(options.clone())
+        .with_schema(schema_4cols);
         let config3 = config3.infer_schema(&ctx.state()).await?;
         assert_eq!(config3.schema_source(), SchemaSource::Specified);
 
@@ -2732,6 +1282,52 @@ mod tests {
         Ok(())
     }
 
+    #[tokio::test]
+    async fn test_listing_table_prunes_extra_files_in_hive() -> Result<()> {
+        let files = [
+            "bucket/test/pid=1/file1",
+            "bucket/test/pid=1/file2",
+            "bucket/test/pid=2/file3",
+            "bucket/test/pid=2/file4",
+            "bucket/test/other/file5",
+        ];
+
+        let ctx = SessionContext::new();
+        register_test_store(&ctx, &files.iter().map(|f| (*f, 10)).collect::<Vec<_>>());
+
+        let opt = ListingOptions::new(Arc::new(JsonFormat::default()))
+            .with_file_extension_opt(Some(""))
+            .with_table_partition_cols(vec![("pid".to_string(), DataType::Int32)]);
+
+        let table_path = ListingTableUrl::parse("test:///bucket/test/").unwrap();
+        let schema = Schema::new(vec![Field::new("a", DataType::Boolean, false)]);
+        let config = ListingTableConfig::new(table_path)
+            .with_listing_options(opt)
+            .with_schema(Arc::new(schema));
+
+        let table = ListingTable::try_new(config)?;
+
+        let (file_list, _) = table.list_files_for_scan(&ctx.state(), &[], None).await?;
+        assert_eq!(file_list.len(), 1);
+
+        let files = file_list[0].clone();
+
+        assert_eq!(
+            files
+                .iter()
+                .map(|f| f.path().to_string())
+                .collect::<Vec<_>>(),
+            vec![
+                "bucket/test/pid=1/file1",
+                "bucket/test/pid=1/file2",
+                "bucket/test/pid=2/file3",
+                "bucket/test/pid=2/file4",
+            ]
+        );
+
+        Ok(())
+    }
+
     #[cfg(feature = "parquet")]
     #[tokio::test]
     async fn test_table_stats_behaviors() -> Result<()> {
@@ -2739,7 +1335,7 @@ mod tests {
 
         let testdata = crate::test_util::parquet_test_data();
         let filename = format!("{}/{}", testdata, "alltypes_plain.parquet");
-        let table_path = ListingTableUrl::parse(filename).unwrap();
+        let table_path = ListingTableUrl::parse(filename)?;
 
         let ctx = SessionContext::new();
         let state = ctx.state();
@@ -2750,6 +1346,7 @@ mod tests {
         let config_default = ListingTableConfig::new(table_path.clone())
             .with_listing_options(opt_default)
             .with_schema(schema_default);
+
         let table_default = ListingTable::try_new(config_default)?;
 
         let exec_default = table_default.scan(&state, None, &[], None).await?;
@@ -2885,7 +1482,7 @@ mod tests {
         let format = JsonFormat::default();
         let opt = ListingOptions::new(Arc::new(format)).with_collect_stat(false);
         let schema = Schema::new(vec![Field::new("a", DataType::Boolean, false)]);
-        let table_path = ListingTableUrl::parse("test:///table/").unwrap();
+        let table_path = ListingTableUrl::parse("test:///table/")?;
 
         let config = ListingTableConfig::new(table_path)
             .with_listing_options(opt)
@@ -3099,7 +1696,7 @@ mod tests {
         let format = JsonFormat::default();
         let opt = ListingOptions::new(Arc::new(format)).with_collect_stat(collect_stat);
         let schema = Schema::new(vec![Field::new("a", DataType::Boolean, false)]);
-        let table_path = ListingTableUrl::parse("test:///table/").unwrap();
+        let table_path = ListingTableUrl::parse("test:///table/")?;
 
         let config = ListingTableConfig::new(table_path)
             .with_listing_options(opt)
diff --git a/datafusion/core/src/datasource/physical_plan/arrow.rs b/datafusion/core/src/datasource/physical_plan/arrow.rs
new file mode 100644
index 000000000000..392eaa8c4be4
--- /dev/null
+++ b/datafusion/core/src/datasource/physical_plan/arrow.rs
@@ -0,0 +1,23 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Reexports the [`datafusion_datasource_arrow::source`] module, containing [Arrow] based [`FileSource`].
+//!
+//! [Arrow]: https://arrow.apache.org/docs/python/ipc.html
+//! [`FileSource`]: datafusion_datasource::file::FileSource
+
+pub use datafusion_datasource_arrow::source::*;
diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs
index e33761a0abb3..b2ef51a76f89 100644
--- a/datafusion/core/src/datasource/physical_plan/csv.rs
+++ b/datafusion/core/src/datasource/physical_plan/csv.rs
@@ -121,7 +121,7 @@ mod tests {
         .with_projection(Some(vec![0, 2, 4]))
         .build();
 
-        assert_eq!(13, config.file_schema.fields().len());
+        assert_eq!(13, config.file_schema().fields().len());
         let csv = DataSourceExec::from_data_source(config);
 
         assert_eq!(3, csv.schema().fields().len());
@@ -185,7 +185,7 @@ mod tests {
         .with_file_compression_type(file_compression_type.to_owned())
         .with_projection(Some(vec![4, 0, 2]))
         .build();
-        assert_eq!(13, config.file_schema.fields().len());
+        assert_eq!(13, config.file_schema().fields().len());
         let csv = DataSourceExec::from_data_source(config);
         assert_eq!(3, csv.schema().fields().len());
 
@@ -250,7 +250,7 @@ mod tests {
         .with_file_compression_type(file_compression_type.to_owned())
         .with_limit(Some(5))
         .build();
-        assert_eq!(13, config.file_schema.fields().len());
+        assert_eq!(13, config.file_schema().fields().len());
         let csv = DataSourceExec::from_data_source(config);
         assert_eq!(13, csv.schema().fields().len());
 
@@ -313,7 +313,7 @@ mod tests {
         .with_file_compression_type(file_compression_type.to_owned())
         .with_limit(Some(5))
         .build();
-        assert_eq!(14, config.file_schema.fields().len());
+        assert_eq!(14, config.file_schema().fields().len());
         let csv = DataSourceExec::from_data_source(config);
         assert_eq!(14, csv.schema().fields().len());
 
@@ -349,7 +349,7 @@ mod tests {
         let filename = "aggregate_test_100.csv";
         let tmp_dir = TempDir::new()?;
 
-        let file_groups = partitioned_file_groups(
+        let mut file_groups = partitioned_file_groups(
             path.as_str(),
             filename,
             1,
@@ -357,30 +357,29 @@ mod tests {
             file_compression_type.to_owned(),
             tmp_dir.path(),
         )?;
+        // Add partition columns / values
+        file_groups[0][0].partition_values = vec![ScalarValue::from("2021-10-26")];
+
+        let num_file_schema_fields = file_schema.fields().len();
 
         let source = Arc::new(CsvSource::new(true, b',', b'"'));
-        let mut config = FileScanConfigBuilder::from(partitioned_csv_config(
+        let config = FileScanConfigBuilder::from(partitioned_csv_config(
             file_schema,
             file_groups,
             source,
         ))
         .with_newlines_in_values(false)
         .with_file_compression_type(file_compression_type.to_owned())
-        .build();
-
-        // Add partition columns
-        config.table_partition_cols =
-            vec![Arc::new(Field::new("date", DataType::Utf8, false))];
-        config.file_groups[0][0].partition_values = vec![ScalarValue::from("2021-10-26")];
-
+        .with_table_partition_cols(vec![Field::new("date", DataType::Utf8, false)])
         // We should be able to project on the partition column
         // Which is supposed to be after the file fields
-        config.projection = Some(vec![0, config.file_schema.fields().len()]);
+        .with_projection(Some(vec![0, num_file_schema_fields]))
+        .build();
 
         // we don't have `/date=xx/` in the path but that is ok because
         // partitions are resolved during scan anyway
 
-        assert_eq!(13, config.file_schema.fields().len());
+        assert_eq!(13, config.file_schema().fields().len());
         let csv = DataSourceExec::from_data_source(config);
         assert_eq!(2, csv.schema().fields().len());
 
diff --git a/datafusion/core/src/datasource/physical_plan/mod.rs b/datafusion/core/src/datasource/physical_plan/mod.rs
index 3a9dedaa028f..1ac292e260fd 100644
--- a/datafusion/core/src/datasource/physical_plan/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/mod.rs
@@ -17,7 +17,7 @@
 
 //! Execution plans that read file formats
 
-mod arrow_file;
+pub mod arrow;
 pub mod csv;
 pub mod json;
 
@@ -35,10 +35,9 @@ pub use datafusion_datasource_parquet::source::ParquetSource;
 #[cfg(feature = "parquet")]
 pub use datafusion_datasource_parquet::{ParquetFileMetrics, ParquetFileReaderFactory};
 
-pub use arrow_file::ArrowSource;
-
 pub use json::{JsonOpener, JsonSource};
 
+pub use arrow::{ArrowOpener, ArrowSource};
 pub use csv::{CsvOpener, CsvSource};
 pub use datafusion_datasource::file::FileSource;
 pub use datafusion_datasource::file_groups::FileGroup;
diff --git a/datafusion/core/src/datasource/physical_plan/parquet.rs b/datafusion/core/src/datasource/physical_plan/parquet.rs
index d0774e57174e..10a475c1cc9a 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet.rs
@@ -64,7 +64,9 @@ mod tests {
     use datafusion_physical_expr::planner::logical2physical;
     use datafusion_physical_plan::analyze::AnalyzeExec;
     use datafusion_physical_plan::collect;
-    use datafusion_physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet};
+    use datafusion_physical_plan::metrics::{
+        ExecutionPlanMetricsSet, MetricType, MetricsSet,
+    };
     use datafusion_physical_plan::{ExecutionPlan, ExecutionPlanProperties};
 
     use chrono::{TimeZone, Utc};
@@ -238,6 +240,7 @@ mod tests {
             let analyze_exec = Arc::new(AnalyzeExec::new(
                 false,
                 false,
+                vec![MetricType::SUMMARY, MetricType::DEV],
                 // use a new ParquetSource to avoid sharing execution metrics
                 self.build_parquet_exec(
                     Arc::clone(table_schema),
diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index a8148b80495e..448ee5264afd 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -64,12 +64,13 @@ use datafusion_catalog::{
     DynamicFileCatalog, TableFunction, TableFunctionImpl, UrlTableFactory,
 };
 use datafusion_common::config::ConfigOptions;
+use datafusion_common::metadata::ScalarAndMetadata;
 use datafusion_common::{
     config::{ConfigExtension, TableOptions},
     exec_datafusion_err, exec_err, internal_datafusion_err, not_impl_err,
     plan_datafusion_err, plan_err,
     tree_node::{TreeNodeRecursion, TreeNodeVisitor},
-    DFSchema, DataFusionError, ParamValues, ScalarValue, SchemaReference, TableReference,
+    DFSchema, DataFusionError, ParamValues, SchemaReference, TableReference,
 };
 pub use datafusion_execution::config::SessionConfig;
 use datafusion_execution::registry::SerializerRegistry;
@@ -505,6 +506,13 @@ impl SessionContext {
         self.runtime_env().register_object_store(url, object_store)
     }
 
+    /// Deregisters an [`ObjectStore`] associated with the specific URL prefix.
+    ///
+    /// See [`RuntimeEnv::deregister_object_store`] for more details.
+    pub fn deregister_object_store(&self, url: &Url) -> Result<Arc<dyn ObjectStore>> {
+        self.runtime_env().deregister_object_store(url)
+    }
+
     /// Registers the [`RecordBatch`] as the specified table name
     pub fn register_batch(
         &self,
@@ -708,15 +716,15 @@ impl SessionContext {
             LogicalPlan::Statement(Statement::Prepare(Prepare {
                 name,
                 input,
-                data_types,
+                fields,
             })) => {
                 // The number of parameters must match the specified data types length.
-                if !data_types.is_empty() {
+                if !fields.is_empty() {
                     let param_names = input.get_parameter_names()?;
-                    if param_names.len() != data_types.len() {
+                    if param_names.len() != fields.len() {
                         return plan_err!(
                             "Prepare specifies {} data types but query has {} parameters",
-                            data_types.len(),
+                            fields.len(),
                             param_names.len()
                         );
                     }
@@ -726,7 +734,7 @@ impl SessionContext {
                 // not currently feasible. This is because `now()` would be optimized to a
                 // constant value, causing each EXECUTE to yield the same result, which is
                 // incorrect behavior.
-                self.state.write().store_prepared(name, data_types, input)?;
+                self.state.write().store_prepared(name, fields, input)?;
                 self.return_empty_dataframe()
             }
             LogicalPlan::Statement(Statement::Execute(execute)) => {
@@ -1072,6 +1080,26 @@ impl SessionContext {
         } else {
             let mut state = self.state.write();
             state.config_mut().options_mut().set(&variable, &value)?;
+
+            // Re-initialize any UDFs that depend on configuration
+            // This allows both built-in and custom functions to respond to configuration changes
+            let config_options = state.config().options();
+
+            // Collect updated UDFs in a separate vector
+            let udfs_to_update: Vec<_> = state
+                .scalar_functions()
+                .values()
+                .filter_map(|udf| {
+                    udf.inner()
+                        .with_updated_config(config_options)
+                        .map(Arc::new)
+                })
+                .collect();
+
+            for udf in udfs_to_update {
+                state.register_udf(udf)?;
+            }
+
             drop(state);
         }
 
@@ -1238,28 +1266,30 @@ impl SessionContext {
         })?;
 
         // Only allow literals as parameters for now.
-        let mut params: Vec<ScalarValue> = parameters
+        let mut params: Vec<ScalarAndMetadata> = parameters
             .into_iter()
             .map(|e| match e {
-                Expr::Literal(scalar, _) => Ok(scalar),
+                Expr::Literal(scalar, metadata) => {
+                    Ok(ScalarAndMetadata::new(scalar, metadata))
+                }
                 _ => not_impl_err!("Unsupported parameter type: {}", e),
             })
             .collect::<Result<_>>()?;
 
         // If the prepared statement provides data types, cast the params to those types.
-        if !prepared.data_types.is_empty() {
-            if params.len() != prepared.data_types.len() {
+        if !prepared.fields.is_empty() {
+            if params.len() != prepared.fields.len() {
                 return exec_err!(
                     "Prepared statement '{}' expects {} parameters, but {} provided",
                     name,
-                    prepared.data_types.len(),
+                    prepared.fields.len(),
                     params.len()
                 );
             }
             params = params
                 .into_iter()
-                .zip(prepared.data_types.iter())
-                .map(|(e, dt)| e.cast_to(dt))
+                .zip(prepared.fields.iter())
+                .map(|(e, dt)| -> Result<_> { e.cast_storage_to(dt.data_type()) })
                 .collect::<Result<_>>()?;
         }
 
diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
index b04004dd495c..561e0c363a37 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -30,15 +30,14 @@ use crate::datasource::provider_as_source;
 use crate::execution::context::{EmptySerializerRegistry, FunctionFactory, QueryPlanner};
 use crate::execution::SessionStateDefaults;
 use crate::physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner};
+use arrow_schema::{DataType, FieldRef};
 use datafusion_catalog::information_schema::{
     InformationSchemaProvider, INFORMATION_SCHEMA,
 };
-
-use arrow::datatypes::DataType;
 use datafusion_catalog::MemoryCatalogProviderList;
 use datafusion_catalog::{TableFunction, TableFunctionImpl};
 use datafusion_common::alias::AliasGenerator;
-use datafusion_common::config::{ConfigExtension, ConfigOptions, TableOptions};
+use datafusion_common::config::{ConfigExtension, ConfigOptions, Dialect, TableOptions};
 use datafusion_common::display::{PlanType, StringifiedPlan, ToStringifiedPlan};
 use datafusion_common::tree_node::TreeNode;
 use datafusion_common::{
@@ -116,11 +115,11 @@ use uuid::Uuid;
 /// # #[tokio::main]
 /// # async fn main() -> Result<()> {
 ///     let state = SessionStateBuilder::new()
-///         .with_config(SessionConfig::new())  
+///         .with_config(SessionConfig::new())
 ///         .with_runtime_env(Arc::new(RuntimeEnv::default()))
 ///         .with_default_features()
 ///         .build();
-///     Ok(())  
+///     Ok(())
 /// # }
 /// ```
 ///
@@ -374,7 +373,7 @@ impl SessionState {
     pub fn sql_to_statement(
         &self,
         sql: &str,
-        dialect: &str,
+        dialect: &Dialect,
     ) -> datafusion_common::Result<Statement> {
         let dialect = dialect_from_str(dialect).ok_or_else(|| {
             plan_datafusion_err!(
@@ -411,7 +410,7 @@ impl SessionState {
     pub fn sql_to_expr(
         &self,
         sql: &str,
-        dialect: &str,
+        dialect: &Dialect,
     ) -> datafusion_common::Result<SQLExpr> {
         self.sql_to_expr_with_alias(sql, dialect).map(|x| x.expr)
     }
@@ -423,7 +422,7 @@ impl SessionState {
     pub fn sql_to_expr_with_alias(
         &self,
         sql: &str,
-        dialect: &str,
+        dialect: &Dialect,
     ) -> datafusion_common::Result<SQLExprWithAlias> {
         let dialect = dialect_from_str(dialect).ok_or_else(|| {
             plan_datafusion_err!(
@@ -527,8 +526,8 @@ impl SessionState {
         &self,
         sql: &str,
     ) -> datafusion_common::Result<LogicalPlan> {
-        let dialect = self.config.options().sql_parser.dialect.as_str();
-        let statement = self.sql_to_statement(sql, dialect)?;
+        let dialect = self.config.options().sql_parser.dialect;
+        let statement = self.sql_to_statement(sql, &dialect)?;
         let plan = self.statement_to_plan(statement).await?;
         Ok(plan)
     }
@@ -542,9 +541,9 @@ impl SessionState {
         sql: &str,
         df_schema: &DFSchema,
     ) -> datafusion_common::Result<Expr> {
-        let dialect = self.config.options().sql_parser.dialect.as_str();
+        let dialect = self.config.options().sql_parser.dialect;
 
-        let sql_expr = self.sql_to_expr_with_alias(sql, dialect)?;
+        let sql_expr = self.sql_to_expr_with_alias(sql, &dialect)?;
 
         let provider = SessionContextProvider {
             state: self,
@@ -873,12 +872,12 @@ impl SessionState {
     pub(crate) fn store_prepared(
         &mut self,
         name: String,
-        data_types: Vec<DataType>,
+        fields: Vec<FieldRef>,
         plan: Arc<LogicalPlan>,
     ) -> datafusion_common::Result<()> {
         match self.prepared_plans.entry(name) {
             Entry::Vacant(e) => {
-                e.insert(Arc::new(PreparedPlan { data_types, plan }));
+                e.insert(Arc::new(PreparedPlan { fields, plan }));
                 Ok(())
             }
             Entry::Occupied(e) => {
@@ -1323,7 +1322,7 @@ impl SessionStateBuilder {
     /// let url = Url::try_from("file://").unwrap();
     /// let object_store = object_store::local::LocalFileSystem::new();
     /// let state = SessionStateBuilder::new()
-    ///     .with_config(SessionConfig::new())  
+    ///     .with_config(SessionConfig::new())
     ///     .with_object_store(&url, Arc::new(object_store))
     ///     .with_default_features()
     ///     .build();
@@ -1419,12 +1418,31 @@ impl SessionStateBuilder {
         }
 
         if let Some(scalar_functions) = scalar_functions {
-            scalar_functions.into_iter().for_each(|udf| {
-                let existing_udf = state.register_udf(udf);
-                if let Ok(Some(existing_udf)) = existing_udf {
-                    debug!("Overwrote an existing UDF: {}", existing_udf.name());
+            for udf in scalar_functions {
+                let config_options = state.config().options();
+                match udf.inner().with_updated_config(config_options) {
+                    Some(new_udf) => {
+                        if let Err(err) = state.register_udf(Arc::new(new_udf)) {
+                            debug!(
+                                "Failed to re-register updated UDF '{}': {}",
+                                udf.name(),
+                                err
+                            );
+                        }
+                    }
+                    None => match state.register_udf(Arc::clone(&udf)) {
+                        Ok(Some(existing)) => {
+                            debug!("Overwrote existing UDF '{}'", existing.name());
+                        }
+                        Ok(None) => {
+                            debug!("Registered UDF '{}'", udf.name());
+                        }
+                        Err(err) => {
+                            debug!("Failed to register UDF '{}': {}", udf.name(), err);
+                        }
+                    },
                 }
-            });
+            }
         }
 
         if let Some(aggregate_functions) = aggregate_functions {
@@ -2012,7 +2030,7 @@ impl SimplifyInfo for SessionSimplifyProvider<'_> {
 #[derive(Debug)]
 pub(crate) struct PreparedPlan {
     /// Data types of the parameters
-    pub(crate) data_types: Vec<DataType>,
+    pub(crate) fields: Vec<FieldRef>,
     /// The prepared logical plan
     pub(crate) plan: Arc<LogicalPlan>,
 }
@@ -2034,6 +2052,7 @@ mod tests {
     use arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray};
     use arrow::datatypes::{DataType, Field, Schema};
     use datafusion_catalog::MemoryCatalogProviderList;
+    use datafusion_common::config::Dialect;
     use datafusion_common::DFSchema;
     use datafusion_common::Result;
     use datafusion_execution::config::SessionConfig;
@@ -2059,8 +2078,8 @@ mod tests {
             let sql = "[1,2,3]";
             let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
             let df_schema = DFSchema::try_from(schema)?;
-            let dialect = state.config.options().sql_parser.dialect.as_str();
-            let sql_expr = state.sql_to_expr(sql, dialect)?;
+            let dialect = state.config.options().sql_parser.dialect;
+            let sql_expr = state.sql_to_expr(sql, &dialect)?;
 
             let query = SqlToRel::new_with_options(&provider, state.get_parser_options());
             query.sql_to_expr(sql_expr, &df_schema, &mut PlannerContext::new())
@@ -2218,7 +2237,8 @@ mod tests {
             }
 
             let state = &context_provider.state;
-            let statement = state.sql_to_statement("select count(*) from t", "mysql")?;
+            let statement =
+                state.sql_to_statement("select count(*) from t", &Dialect::MySQL)?;
             let plan = SqlToRel::new(&context_provider).statement_to_plan(statement)?;
             state.create_physical_plan(&plan).await
         }
diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index e7ace544a11c..78db28eaacc7 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -443,7 +443,30 @@
 //! other operators read a single [`RecordBatch`] from their input to produce a
 //! single [`RecordBatch`] as output.
 //!
-//! For example, given this SQL query:
+//! For example, given this SQL:
+//!
+//! ```sql
+//! SELECT name FROM 'data.parquet' WHERE id > 10
+//! ```
+//!
+//! An simplified DataFusion execution plan is shown below. It first reads
+//! data from the Parquet file, then applies the filter, then the projection,
+//! and finally produces output. Each step processes one [`RecordBatch`] at a
+//! time. Multiple batches are processed concurrently on different CPU cores
+//! for plans with multiple partitions.
+//!
+//! ```text
+//! ┌─────────────┐    ┌──────────────┐    ┌────────────────┐    ┌──────────────────┐    ┌──────────┐
+//! │ Parquet     │───▶│ DataSource   │───▶│ FilterExec     │───▶│ ProjectionExec   │───▶│ Results  │
+//! │ File        │    │              │    │                │    │                  │    │          │
+//! └─────────────┘    └──────────────┘    └────────────────┘    └──────────────────┘    └──────────┘
+//!                    (reads data)        (id > 10)             (keeps "name" col)
+//!                    RecordBatch ───▶    RecordBatch ────▶     RecordBatch ────▶        RecordBatch
+//! ```
+//!
+//! DataFusion uses the classic "pull" based control flow (explained more in the
+//! next section) to implement streaming execution. As an example,
+//! consider the following SQL query:
 //!
 //! ```sql
 //! SELECT date_trunc('month', time) FROM data WHERE id IN (10,20,30);
@@ -897,6 +920,12 @@ doc_comment::doctest!("../../../README.md", readme_example_test);
 // For example, if `user_guide_expressions(line 123)` fails,
 // go to `docs/source/user-guide/expressions.md` to find the relevant problem.
 //
+#[cfg(doctest)]
+doc_comment::doctest!(
+    "../../../docs/source/user-guide/arrow-introduction.md",
+    user_guide_arrow_introduction
+);
+
 #[cfg(doctest)]
 doc_comment::doctest!(
     "../../../docs/source/user-guide/concepts-readings-events.md",
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index c28e56790e66..c280b50a9f07 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -62,6 +62,7 @@ use arrow::compute::SortOptions;
 use arrow::datatypes::Schema;
 use datafusion_catalog::ScanArgs;
 use datafusion_common::display::ToStringifiedPlan;
+use datafusion_common::format::ExplainAnalyzeLevel;
 use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor};
 use datafusion_common::TableReference;
 use datafusion_common::{
@@ -77,10 +78,11 @@ use datafusion_expr::expr::{
 };
 use datafusion_expr::expr_rewriter::unnormalize_cols;
 use datafusion_expr::logical_plan::builder::wrap_projection_for_join_if_necessary;
+use datafusion_expr::utils::split_conjunction;
 use datafusion_expr::{
-    Analyze, DescribeTable, DmlStatement, Explain, ExplainFormat, Extension, FetchType,
-    Filter, JoinType, RecursiveQuery, SkipType, StringifiedPlan, WindowFrame,
-    WindowFrameBound, WriteOp,
+    Analyze, BinaryExpr, DescribeTable, DmlStatement, Explain, ExplainFormat, Extension,
+    FetchType, Filter, JoinType, Operator, RecursiveQuery, SkipType, StringifiedPlan,
+    WindowFrame, WindowFrameBound, WriteOp,
 };
 use datafusion_physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctionExpr};
 use datafusion_physical_expr::expressions::Literal;
@@ -90,6 +92,8 @@ use datafusion_physical_expr::{
 use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use datafusion_physical_plan::empty::EmptyExec;
 use datafusion_physical_plan::execution_plan::InvariantLevel;
+use datafusion_physical_plan::joins::PiecewiseMergeJoinExec;
+use datafusion_physical_plan::metrics::MetricType;
 use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
 use datafusion_physical_plan::recursive_query::RecursiveQueryExec;
 use datafusion_physical_plan::unnest::ListUnnest;
@@ -985,7 +989,7 @@ impl DefaultPhysicalPlanner {
                     struct_type_columns.clone(),
                     schema,
                     options.clone(),
-                ))
+                )?)
             }
 
             // 2 Children
@@ -1131,8 +1135,42 @@ impl DefaultPhysicalPlanner {
                     })
                     .collect::<Result<join_utils::JoinOn>>()?;
 
+                // TODO: `num_range_filters` can be used later on for ASOF joins (`num_range_filters > 1`)
+                let mut num_range_filters = 0;
+                let mut range_filters: Vec<Expr> = Vec::new();
+                let mut total_filters = 0;
+
                 let join_filter = match filter {
                     Some(expr) => {
+                        let split_expr = split_conjunction(expr);
+                        for expr in split_expr.iter() {
+                            match *expr {
+                                Expr::BinaryExpr(BinaryExpr {
+                                    left: _,
+                                    right: _,
+                                    op,
+                                }) => {
+                                    if matches!(
+                                        op,
+                                        Operator::Lt
+                                            | Operator::LtEq
+                                            | Operator::Gt
+                                            | Operator::GtEq
+                                    ) {
+                                        range_filters.push((**expr).clone());
+                                        num_range_filters += 1;
+                                    }
+                                    total_filters += 1;
+                                }
+                                // TODO: Want to deal with `Expr::Between` for IEJoins, it counts as two range predicates
+                                // which is why it is not dealt with in PWMJ
+                                // Expr::Between(_) => {},
+                                _ => {
+                                    total_filters += 1;
+                                }
+                            }
+                        }
+
                         // Extract columns from filter expression and saved in a HashSet
                         let cols = expr.column_refs();
 
@@ -1188,6 +1226,7 @@ impl DefaultPhysicalPlanner {
                         )?;
                         let filter_schema =
                             Schema::new_with_metadata(filter_fields, metadata);
+
                         let filter_expr = create_physical_expr(
                             expr,
                             &filter_df_schema,
@@ -1210,10 +1249,125 @@ impl DefaultPhysicalPlanner {
                 let prefer_hash_join =
                     session_state.config_options().optimizer.prefer_hash_join;
 
+                // TODO: Allow PWMJ to deal with residual equijoin conditions
                 let join: Arc<dyn ExecutionPlan> = if join_on.is_empty() {
                     if join_filter.is_none() && matches!(join_type, JoinType::Inner) {
                         // cross join if there is no join conditions and no join filter set
                         Arc::new(CrossJoinExec::new(physical_left, physical_right))
+                    } else if num_range_filters == 1
+                        && total_filters == 1
+                        && !matches!(
+                            join_type,
+                            JoinType::LeftSemi
+                                | JoinType::RightSemi
+                                | JoinType::LeftAnti
+                                | JoinType::RightAnti
+                                | JoinType::LeftMark
+                                | JoinType::RightMark
+                        )
+                        && session_state
+                            .config_options()
+                            .optimizer
+                            .enable_piecewise_merge_join
+                    {
+                        let Expr::BinaryExpr(be) = &range_filters[0] else {
+                            return plan_err!(
+                                "Unsupported expression for PWMJ: Expected `Expr::BinaryExpr`"
+                            );
+                        };
+
+                        let mut op = be.op;
+                        if !matches!(
+                            op,
+                            Operator::Lt | Operator::LtEq | Operator::Gt | Operator::GtEq
+                        ) {
+                            return plan_err!(
+                                "Unsupported operator for PWMJ: {:?}. Expected one of <, <=, >, >=",
+                                op
+                            );
+                        }
+
+                        fn reverse_ineq(op: Operator) -> Operator {
+                            match op {
+                                Operator::Lt => Operator::Gt,
+                                Operator::LtEq => Operator::GtEq,
+                                Operator::Gt => Operator::Lt,
+                                Operator::GtEq => Operator::LtEq,
+                                _ => op,
+                            }
+                        }
+
+                        #[derive(Clone, Copy, Debug, PartialEq, Eq)]
+                        enum Side {
+                            Left,
+                            Right,
+                            Both,
+                        }
+
+                        let side_of = |e: &Expr| -> Result<Side> {
+                            let cols = e.column_refs();
+                            let any_left = cols
+                                .iter()
+                                .any(|c| left_df_schema.index_of_column(c).is_ok());
+                            let any_right = cols
+                                .iter()
+                                .any(|c| right_df_schema.index_of_column(c).is_ok());
+
+                            Ok(match (any_left, any_right) {
+                                (true, false) => Side::Left,
+                                (false, true) => Side::Right,
+                                (true, true) => Side::Both,
+                                _ => unreachable!(),
+                            })
+                        };
+
+                        let mut lhs_logical = &be.left;
+                        let mut rhs_logical = &be.right;
+
+                        let left_side = side_of(lhs_logical)?;
+                        let right_side = side_of(rhs_logical)?;
+                        if matches!(left_side, Side::Both)
+                            || matches!(right_side, Side::Both)
+                        {
+                            return Ok(Arc::new(NestedLoopJoinExec::try_new(
+                                physical_left,
+                                physical_right,
+                                join_filter,
+                                join_type,
+                                None,
+                            )?));
+                        }
+
+                        if left_side == Side::Right && right_side == Side::Left {
+                            std::mem::swap(&mut lhs_logical, &mut rhs_logical);
+                            op = reverse_ineq(op);
+                        } else if !(left_side == Side::Left && right_side == Side::Right)
+                        {
+                            return plan_err!(
+                                "Unsupported operator for PWMJ: {:?}. Expected one of <, <=, >, >=",
+                                op
+                            );
+                        }
+
+                        let on_left = create_physical_expr(
+                            lhs_logical,
+                            left_df_schema,
+                            session_state.execution_props(),
+                        )?;
+                        let on_right = create_physical_expr(
+                            rhs_logical,
+                            right_df_schema,
+                            session_state.execution_props(),
+                        )?;
+
+                        Arc::new(PiecewiseMergeJoinExec::try_new(
+                            physical_left,
+                            physical_right,
+                            (on_left, on_right),
+                            op,
+                            *join_type,
+                            session_state.config().target_partitions(),
+                        )?)
                     } else {
                         // there is no equal join condition, use the nested loop join
                         Arc::new(NestedLoopJoinExec::try_new(
@@ -2073,9 +2227,15 @@ impl DefaultPhysicalPlanner {
         let input = self.create_physical_plan(&a.input, session_state).await?;
         let schema = Arc::clone(a.schema.inner());
         let show_statistics = session_state.config_options().explain.show_statistics;
+        let analyze_level = session_state.config_options().explain.analyze_level;
+        let metric_types = match analyze_level {
+            ExplainAnalyzeLevel::Summary => vec![MetricType::SUMMARY],
+            ExplainAnalyzeLevel::Dev => vec![MetricType::SUMMARY, MetricType::DEV],
+        };
         Ok(Arc::new(AnalyzeExec::new(
             a.verbose,
             show_statistics,
+            metric_types,
             input,
             schema,
         )))
@@ -2484,7 +2644,7 @@ mod tests {
         // verify that the plan correctly casts u8 to i64
         // the cast from u8 to i64 for literal will be simplified, and get lit(int64(5))
         // the cast here is implicit so has CastOptions with safe=true
-        let expected = r#"BinaryExpr { left: Column { name: "c7", index: 2 }, op: Lt, right: Literal { value: Int64(5), field: Field { name: "lit", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} } }, fail_on_overflow: false }"#;
+        let expected = r#"BinaryExpr { left: Column { name: "c7", index: 2 }, op: Lt, right: Literal { value: Int64(5), field: Field { name: "lit", data_type: Int64 } }, fail_on_overflow: false"#;
 
         assert_contains!(format!("{exec_plan:?}"), expected);
         Ok(())
@@ -2544,9 +2704,6 @@ mod tests {
                                 name: "lit",
                                 data_type: Utf8,
                                 nullable: true,
-                                dict_id: 0,
-                                dict_is_ordered: false,
-                                metadata: {},
                             },
                         },
                         "c1",
@@ -2558,9 +2715,6 @@ mod tests {
                                 name: "lit",
                                 data_type: Int64,
                                 nullable: true,
-                                dict_id: 0,
-                                dict_is_ordered: false,
-                                metadata: {},
                             },
                         },
                         "c2",
@@ -2572,9 +2726,6 @@ mod tests {
                                 name: "lit",
                                 data_type: Int64,
                                 nullable: true,
-                                dict_id: 0,
-                                dict_is_ordered: false,
-                                metadata: {},
                             },
                         },
                         "c3",
@@ -2683,9 +2834,6 @@ mod tests {
                                 name: "lit",
                                 data_type: Utf8,
                                 nullable: true,
-                                dict_id: 0,
-                                dict_is_ordered: false,
-                                metadata: {},
                             },
                         },
                         "c1",
@@ -2697,9 +2845,6 @@ mod tests {
                                 name: "lit",
                                 data_type: Int64,
                                 nullable: true,
-                                dict_id: 0,
-                                dict_is_ordered: false,
-                                metadata: {},
                             },
                         },
                         "c2",
@@ -2711,9 +2856,6 @@ mod tests {
                                 name: "lit",
                                 data_type: Int64,
                                 nullable: true,
-                                dict_id: 0,
-                                dict_is_ordered: false,
-                                metadata: {},
                             },
                         },
                         "c3",
@@ -2887,7 +3029,7 @@ mod tests {
             .expect_err("planning error")
             .strip_backtrace();
 
-        insta::assert_snapshot!(e, @r#"Error during planning: Extension planner for NoOp created an ExecutionPlan with mismatched schema. LogicalPlan schema: DFSchema { inner: Schema { fields: [Field { name: "a", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }], metadata: {} }, field_qualifiers: [None], functional_dependencies: FunctionalDependencies { deps: [] } }, ExecutionPlan schema: Schema { fields: [Field { name: "b", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }], metadata: {} }"#);
+        insta::assert_snapshot!(e, @r#"Error during planning: Extension planner for NoOp created an ExecutionPlan with mismatched schema. LogicalPlan schema: DFSchema { inner: Schema { fields: [Field { name: "a", data_type: Int32 }], metadata: {} }, field_qualifiers: [None], functional_dependencies: FunctionalDependencies { deps: [] } }, ExecutionPlan schema: Schema { fields: [Field { name: "b", data_type: Int32 }], metadata: {} }"#);
     }
 
     #[tokio::test]
@@ -2903,7 +3045,7 @@ mod tests {
         let execution_plan = plan(&logical_plan).await?;
         // verify that the plan correctly adds cast from Int64(1) to Utf8, and the const will be evaluated.
 
-        let expected = "expr: [ProjectionExpr { expr: BinaryExpr { left: BinaryExpr { left: Column { name: \"c1\", index: 0 }, op: Eq, right: Literal { value: Utf8(\"a\"), field: Field { name: \"lit\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} } }, fail_on_overflow: false }, op: Or, right: BinaryExpr { left: Column { name: \"c1\", index: 0 }, op: Eq, right: Literal { value: Utf8(\"1\"), field: Field { name: \"lit\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} } }, fail_on_overflow: false }, fail_on_overflow: false }";
+        let expected = r#"expr: BinaryExpr { left: BinaryExpr { left: Column { name: "c1", index: 0 }, op: Eq, right: Literal { value: Utf8("a"), field: Field { name: "lit", data_type: Utf8 } }, fail_on_overflow: false }"#;
 
         assert_contains!(format!("{execution_plan:?}"), expected);
 
@@ -2925,7 +3067,7 @@ mod tests {
 
         assert_contains!(
             &e,
-            r#"Error during planning: Can not find compatible types to compare Boolean with [Struct(foo Boolean), Utf8]"#
+            r#"Error during planning: Can not find compatible types to compare Boolean with [Struct("foo": Boolean), Utf8]"#
         );
 
         Ok(())
diff --git a/datafusion/core/tests/catalog/memory.rs b/datafusion/core/tests/catalog/memory.rs
index ea9e71fc3746..06ed141b2e8b 100644
--- a/datafusion/core/tests/catalog/memory.rs
+++ b/datafusion/core/tests/catalog/memory.rs
@@ -19,7 +19,7 @@ use arrow::datatypes::Schema;
 use datafusion::catalog::CatalogProvider;
 use datafusion::datasource::empty::EmptyTable;
 use datafusion::datasource::listing::{
-    ListingTable, ListingTableConfig, ListingTableUrl,
+    ListingTable, ListingTableConfig, ListingTableConfigExt, ListingTableUrl,
 };
 use datafusion::prelude::SessionContext;
 use datafusion_catalog::memory::*;
diff --git a/datafusion/core/tests/core_integration.rs b/datafusion/core/tests/core_integration.rs
index e37a368f0771..edcf039e4e70 100644
--- a/datafusion/core/tests/core_integration.rs
+++ b/datafusion/core/tests/core_integration.rs
@@ -21,6 +21,9 @@ mod sql;
 /// Run all tests that are found in the `dataframe` directory
 mod dataframe;
 
+/// Run all tests that are found in the `datasource` directory
+mod datasource;
+
 /// Run all tests that are found in the `macro_hygiene` directory
 mod macro_hygiene;
 
diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs b/datafusion/core/tests/dataframe/dataframe_functions.rs
index b664fccdfa80..265862ff9af8 100644
--- a/datafusion/core/tests/dataframe/dataframe_functions.rs
+++ b/datafusion/core/tests/dataframe/dataframe_functions.rs
@@ -274,6 +274,33 @@ async fn test_nvl2() -> Result<()> {
 
     Ok(())
 }
+
+#[tokio::test]
+async fn test_nvl2_short_circuit() -> Result<()> {
+    let expr = nvl2(
+        col("a"),
+        arrow_cast(lit("1"), lit("Int32")),
+        arrow_cast(col("a"), lit("Int32")),
+    );
+
+    let batches = get_batches(expr).await?;
+
+    assert_snapshot!(
+        batches_to_string(&batches),
+        @r#"
+    +-----------------------------------------------------------------------------------+
+    | nvl2(test.a,arrow_cast(Utf8("1"),Utf8("Int32")),arrow_cast(test.a,Utf8("Int32"))) |
+    +-----------------------------------------------------------------------------------+
+    | 1                                                                                 |
+    | 1                                                                                 |
+    | 1                                                                                 |
+    | 1                                                                                 |
+    +-----------------------------------------------------------------------------------+
+    "#
+    );
+
+    Ok(())
+}
 #[tokio::test]
 async fn test_fn_arrow_typeof() -> Result<()> {
     let expr = arrow_typeof(col("l"));
@@ -282,16 +309,16 @@ async fn test_fn_arrow_typeof() -> Result<()> {
 
     assert_snapshot!(
         batches_to_string(&batches),
-        @r#"
-    +------------------------------------------------------------------------------------------------------------------+
-    | arrow_typeof(test.l)                                                                                             |
-    +------------------------------------------------------------------------------------------------------------------+
-    | List(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) |
-    | List(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) |
-    | List(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) |
-    | List(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) |
-    +------------------------------------------------------------------------------------------------------------------+
-    "#);
+        @r"
+    +----------------------+
+    | arrow_typeof(test.l) |
+    +----------------------+
+    | List(nullable Int32) |
+    | List(nullable Int32) |
+    | List(nullable Int32) |
+    | List(nullable Int32) |
+    +----------------------+
+    ");
 
     Ok(())
 }
diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs
index aa538f6dee81..17d1695478a5 100644
--- a/datafusion/core/tests/dataframe/mod.rs
+++ b/datafusion/core/tests/dataframe/mod.rs
@@ -33,6 +33,7 @@ use arrow::error::ArrowError;
 use arrow::util::pretty::pretty_format_batches;
 use arrow_schema::{SortOptions, TimeUnit};
 use datafusion::{assert_batches_eq, dataframe};
+use datafusion_common::metadata::FieldMetadata;
 use datafusion_functions_aggregate::count::{count_all, count_all_window};
 use datafusion_functions_aggregate::expr_fn::{
     array_agg, avg, avg_distinct, count, count_distinct, max, median, min, sum,
@@ -65,15 +66,13 @@ use datafusion_catalog::TableProvider;
 use datafusion_common::test_util::{batches_to_sort_string, batches_to_string};
 use datafusion_common::{
     assert_contains, internal_datafusion_err, Constraint, Constraints, DFSchema,
-    DataFusionError, ParamValues, ScalarValue, TableReference, UnnestOptions,
+    DataFusionError, ScalarValue, TableReference, UnnestOptions,
 };
 use datafusion_common_runtime::SpawnedTask;
 use datafusion_datasource::file_format::format_as_file_type;
 use datafusion_execution::config::SessionConfig;
 use datafusion_execution::runtime_env::RuntimeEnv;
-use datafusion_expr::expr::{
-    FieldMetadata, GroupingSet, NullTreatment, Sort, WindowFunction,
-};
+use datafusion_expr::expr::{GroupingSet, NullTreatment, Sort, WindowFunction};
 use datafusion_expr::var_provider::{VarProvider, VarType};
 use datafusion_expr::{
     cast, col, create_udf, exists, in_subquery, lit, out_ref_col, placeholder,
@@ -2465,7 +2464,7 @@ async fn filtered_aggr_with_param_values() -> Result<()> {
     let df = ctx
         .sql("select count (c2) filter (where c3 > $1) from table1")
         .await?
-        .with_param_values(ParamValues::List(vec![ScalarValue::from(10u64)]));
+        .with_param_values(vec![ScalarValue::from(10u64)]);
 
     let df_results = df?.collect().await?;
     assert_snapshot!(
@@ -2945,18 +2944,18 @@ async fn test_count_wildcard_on_window() -> Result<()> {
     assert_snapshot!(
         pretty_format_batches(&sql_results).unwrap(),
         @r#"
-    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-    | plan_type     | plan                                                                                                                                                                                                                                                                                                                                                                                         |
-    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-    | logical_plan  | Projection: count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING AS count(*) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING                                                                                                                                                                                                |
-    |               |   WindowAggr: windowExpr=[[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]]                                                                                                                                                                                                                                                                      |
-    |               |     TableScan: t1 projection=[a]                                                                                                                                                                                                                                                                                                                                                             |
-    | physical_plan | ProjectionExec: expr=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@1 as count(*) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]                                                                                                                                                                                   |
-    |               |   BoundedWindowAggExec: wdw=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Field { name: "count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING], mode=[Sorted] |
-    |               |     SortExec: expr=[a@0 DESC], preserve_partitioning=[false]                                                                                                                                                                                                                                                                                                                                 |
-    |               |       DataSourceExec: partitions=1, partition_sizes=[1]                                                                                                                                                                                                                                                                                                                                      |
-    |               |                                                                                                                                                                                                                                                                                                                                                                                              |
-    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    | plan_type     | plan                                                                                                                                                                                                                                                                                                     |
+    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    | logical_plan  | Projection: count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING AS count(*) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING                                                                                                            |
+    |               |   WindowAggr: windowExpr=[[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]]                                                                                                                                                                                  |
+    |               |     TableScan: t1 projection=[a]                                                                                                                                                                                                                                                                         |
+    | physical_plan | ProjectionExec: expr=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@1 as count(*) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]                                                                                               |
+    |               |   BoundedWindowAggExec: wdw=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Field { "count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING": Int64 }, frame: RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING], mode=[Sorted] |
+    |               |     SortExec: expr=[a@0 DESC], preserve_partitioning=[false]                                                                                                                                                                                                                                             |
+    |               |       DataSourceExec: partitions=1, partition_sizes=[1]                                                                                                                                                                                                                                                  |
+    |               |                                                                                                                                                                                                                                                                                                          |
+    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
     "#
     );
 
@@ -2979,18 +2978,18 @@ async fn test_count_wildcard_on_window() -> Result<()> {
     assert_snapshot!(
         pretty_format_batches(&df_results).unwrap(),
         @r#"
-    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-    | plan_type     | plan                                                                                                                                                                                                                                                                                                                                                                                         |
-    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-    | logical_plan  | Projection: count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING                                                                                                                                                                                                                                                                                       |
-    |               |   WindowAggr: windowExpr=[[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]]                                                                                                                                                                                                                                                                      |
-    |               |     TableScan: t1 projection=[a]                                                                                                                                                                                                                                                                                                                                                             |
-    | physical_plan | ProjectionExec: expr=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@1 as count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]                                                                                                                                                                            |
-    |               |   BoundedWindowAggExec: wdw=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Field { name: "count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING], mode=[Sorted] |
-    |               |     SortExec: expr=[a@0 DESC], preserve_partitioning=[false]                                                                                                                                                                                                                                                                                                                                 |
-    |               |       DataSourceExec: partitions=1, partition_sizes=[1]                                                                                                                                                                                                                                                                                                                                      |
-    |               |                                                                                                                                                                                                                                                                                                                                                                                              |
-    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    | plan_type     | plan                                                                                                                                                                                                                                                                                                     |
+    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    | logical_plan  | Projection: count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING                                                                                                                                                                                                   |
+    |               |   WindowAggr: windowExpr=[[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]]                                                                                                                                                                                  |
+    |               |     TableScan: t1 projection=[a]                                                                                                                                                                                                                                                                         |
+    | physical_plan | ProjectionExec: expr=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@1 as count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]                                                                                        |
+    |               |   BoundedWindowAggExec: wdw=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Field { "count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING": Int64 }, frame: RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING], mode=[Sorted] |
+    |               |     SortExec: expr=[a@0 DESC], preserve_partitioning=[false]                                                                                                                                                                                                                                             |
+    |               |       DataSourceExec: partitions=1, partition_sizes=[1]                                                                                                                                                                                                                                                  |
+    |               |                                                                                                                                                                                                                                                                                                          |
+    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
     "#
     );
 
@@ -4436,12 +4435,12 @@ async fn unnest_with_redundant_columns() -> Result<()> {
     let actual = formatted.trim();
     assert_snapshot!(
         actual,
-        @r###"
+        @r"
     Projection: shapes.shape_id [shape_id:UInt32]
       Unnest: lists[shape_id2|depth=1] structs[] [shape_id:UInt32, shape_id2:UInt32;N]
-        Aggregate: groupBy=[[shapes.shape_id]], aggr=[[array_agg(shapes.shape_id) AS shape_id2]] [shape_id:UInt32, shape_id2:List(Field { name: "item", data_type: UInt32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
+        Aggregate: groupBy=[[shapes.shape_id]], aggr=[[array_agg(shapes.shape_id) AS shape_id2]] [shape_id:UInt32, shape_id2:List(Field { data_type: UInt32, nullable: true });N]
           TableScan: shapes projection=[shape_id] [shape_id:UInt32]
-    "###
+    "
     );
 
     let results = df.collect().await?;
diff --git a/datafusion/core/tests/csv_schema_fix_test.rs b/datafusion/core/tests/datasource/csv.rs
similarity index 100%
rename from datafusion/core/tests/csv_schema_fix_test.rs
rename to datafusion/core/tests/datasource/csv.rs
diff --git a/datafusion/core/tests/datasource/mod.rs b/datafusion/core/tests/datasource/mod.rs
new file mode 100644
index 000000000000..3785aa076618
--- /dev/null
+++ b/datafusion/core/tests/datasource/mod.rs
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Tests for various DataSources
+//!
+//! Note tests for the Parquet format are in `parquet_integration` binary
+
+// Include tests in csv module
+mod csv;
+mod object_store_access;
diff --git a/datafusion/core/tests/datasource/object_store_access.rs b/datafusion/core/tests/datasource/object_store_access.rs
new file mode 100644
index 000000000000..6b9585f408a1
--- /dev/null
+++ b/datafusion/core/tests/datasource/object_store_access.rs
@@ -0,0 +1,616 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Tests for object store access patterns with [`ListingTable`]\
+//!
+//! These tests setup a `ListingTable` backed by an in-memory object store
+//! that counts the number of requests made against it and then do
+//! various operations (table creation, queries with and without predicates)
+//! to verify the expected object store access patterns.
+//!
+//! [`ListingTable`]: datafusion::datasource::listing::ListingTable
+
+use arrow::array::{ArrayRef, Int32Array, RecordBatch};
+use async_trait::async_trait;
+use bytes::Bytes;
+use datafusion::prelude::{CsvReadOptions, SessionContext};
+use futures::stream::BoxStream;
+use insta::assert_snapshot;
+use object_store::memory::InMemory;
+use object_store::path::Path;
+use object_store::{
+    GetOptions, GetRange, GetResult, ListResult, MultipartUpload, ObjectMeta,
+    ObjectStore, PutMultipartOptions, PutOptions, PutPayload, PutResult,
+};
+use parking_lot::Mutex;
+use std::fmt;
+use std::fmt::{Display, Formatter};
+use std::ops::Range;
+use std::sync::Arc;
+use url::Url;
+
+#[tokio::test]
+async fn create_single_csv_file() {
+    assert_snapshot!(
+        single_file_csv_test().await.requests(),
+        @r"
+    RequestCountingObjectStore()
+    Total Requests: 2
+    - HEAD path=csv_table.csv
+    - GET  path=csv_table.csv
+    "
+    );
+}
+
+#[tokio::test]
+async fn query_single_csv_file() {
+    assert_snapshot!(
+        single_file_csv_test().await.query("select * from csv_table").await,
+        @r"
+    ------- Query Output (2 rows) -------
+    +---------+-------+-------+
+    | c1      | c2    | c3    |
+    +---------+-------+-------+
+    | 0.00001 | 5e-12 | true  |
+    | 0.00002 | 4e-12 | false |
+    +---------+-------+-------+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 2
+    - HEAD path=csv_table.csv
+    - GET  (opts) path=csv_table.csv
+    "
+    );
+}
+
+#[tokio::test]
+async fn create_multi_file_csv_file() {
+    assert_snapshot!(
+        multi_file_csv_test().await.requests(),
+        @r"
+    RequestCountingObjectStore()
+    Total Requests: 4
+    - LIST prefix=data
+    - GET  path=data/file_0.csv
+    - GET  path=data/file_1.csv
+    - GET  path=data/file_2.csv
+    "
+    );
+}
+
+#[tokio::test]
+async fn query_multi_csv_file() {
+    assert_snapshot!(
+        multi_file_csv_test().await.query("select * from csv_table").await,
+        @r"
+    ------- Query Output (6 rows) -------
+    +---------+-------+-------+
+    | c1      | c2    | c3    |
+    +---------+-------+-------+
+    | 0.0     | 0.0   | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00001 | 1e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00002 | 2e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    +---------+-------+-------+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 4
+    - LIST prefix=data
+    - GET  (opts) path=data/file_0.csv
+    - GET  (opts) path=data/file_1.csv
+    - GET  (opts) path=data/file_2.csv
+    "
+    );
+}
+
+#[tokio::test]
+async fn create_single_parquet_file() {
+    assert_snapshot!(
+        single_file_parquet_test().await.requests(),
+        @r"
+    RequestCountingObjectStore()
+    Total Requests: 4
+    - HEAD path=parquet_table.parquet
+    - GET  (range) range=2986-2994 path=parquet_table.parquet
+    - GET  (range) range=2264-2986 path=parquet_table.parquet
+    - GET  (range) range=2124-2264 path=parquet_table.parquet
+    "
+    );
+}
+
+#[tokio::test]
+async fn query_single_parquet_file() {
+    assert_snapshot!(
+        single_file_parquet_test().await.query("select count(distinct a), count(b) from parquet_table").await,
+        @r"
+    ------- Query Output (1 rows) -------
+    +---------------------------------+------------------------+
+    | count(DISTINCT parquet_table.a) | count(parquet_table.b) |
+    +---------------------------------+------------------------+
+    | 200                             | 200                    |
+    +---------------------------------+------------------------+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 3
+    - HEAD path=parquet_table.parquet
+    - GET  (ranges) path=parquet_table.parquet ranges=4-534,534-1064
+    - GET  (ranges) path=parquet_table.parquet ranges=1064-1594,1594-2124
+    "
+    );
+}
+
+#[tokio::test]
+async fn query_single_parquet_file_with_single_predicate() {
+    // Note that evaluating predicates requires additional object store requests
+    // (to evaluate predicates)
+    assert_snapshot!(
+        single_file_parquet_test().await.query("select min(a), max(b) from parquet_table WHERE a > 150").await,
+        @r"
+    ------- Query Output (1 rows) -------
+    +----------------------+----------------------+
+    | min(parquet_table.a) | max(parquet_table.b) |
+    +----------------------+----------------------+
+    | 151                  | 1199                 |
+    +----------------------+----------------------+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 2
+    - HEAD path=parquet_table.parquet
+    - GET  (ranges) path=parquet_table.parquet ranges=1064-1481,1481-1594,1594-2011,2011-2124
+    "
+    );
+}
+
+#[tokio::test]
+async fn query_single_parquet_file_multi_row_groups_multiple_predicates() {
+    // Note that evaluating predicates requires additional object store requests
+    // (to evaluate predicates)
+    assert_snapshot!(
+        single_file_parquet_test().await.query("select min(a), max(b) from parquet_table WHERE a > 50 AND b < 1150").await,
+        @r"
+    ------- Query Output (1 rows) -------
+    +----------------------+----------------------+
+    | min(parquet_table.a) | max(parquet_table.b) |
+    +----------------------+----------------------+
+    | 51                   | 1149                 |
+    +----------------------+----------------------+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 3
+    - HEAD path=parquet_table.parquet
+    - GET  (ranges) path=parquet_table.parquet ranges=4-421,421-534,534-951,951-1064
+    - GET  (ranges) path=parquet_table.parquet ranges=1064-1481,1481-1594,1594-2011,2011-2124
+    "
+    );
+}
+
+/// Create a test with a single CSV file with three columns and two rows
+async fn single_file_csv_test() -> Test {
+    // upload CSV data to object store
+    let csv_data = r#"c1,c2,c3
+0.00001,5e-12,true
+0.00002,4e-12,false
+"#;
+
+    Test::new()
+        .with_bytes("/csv_table.csv", csv_data)
+        .await
+        .register_csv("csv_table", "/csv_table.csv")
+        .await
+}
+
+/// Create a test with three CSV files in a directory
+async fn multi_file_csv_test() -> Test {
+    let mut test = Test::new();
+    // upload CSV data to object store
+    for i in 0..3 {
+        let csv_data1 = format!(
+            r#"c1,c2,c3
+0.0000{i},{i}e-12,true
+0.00003,5e-12,false
+"#
+        );
+        test = test
+            .with_bytes(&format!("/data/file_{i}.csv"), csv_data1)
+            .await;
+    }
+    // register table
+    test.register_csv("csv_table", "/data/").await
+}
+
+/// Create a test with a single parquet file that has two
+/// columns and two row groups
+///
+/// Column "a": Int32 with values 0-100] in row group 1
+/// and [101-200] in row group 2
+///
+/// Column "b": Int32 with values 1000-1100] in row group 1
+/// and [1101-1200] in row group 2
+async fn single_file_parquet_test() -> Test {
+    // Create parquet bytes
+    let a: ArrayRef = Arc::new(Int32Array::from_iter_values(0..200));
+    let b: ArrayRef = Arc::new(Int32Array::from_iter_values(1000..1200));
+    let batch = RecordBatch::try_from_iter([("a", a), ("b", b)]).unwrap();
+
+    let mut buffer = vec![];
+    let props = parquet::file::properties::WriterProperties::builder()
+        .set_max_row_group_size(100)
+        .build();
+    let mut writer =
+        parquet::arrow::ArrowWriter::try_new(&mut buffer, batch.schema(), Some(props))
+            .unwrap();
+    writer.write(&batch).unwrap();
+    writer.close().unwrap();
+
+    Test::new()
+        .with_bytes("/parquet_table.parquet", buffer)
+        .await
+        .register_parquet("parquet_table", "/parquet_table.parquet")
+        .await
+}
+
+/// Runs tests with a request counting object store
+struct Test {
+    object_store: Arc<RequestCountingObjectStore>,
+    session_context: SessionContext,
+}
+
+impl Test {
+    fn new() -> Self {
+        let object_store = Arc::new(RequestCountingObjectStore::new());
+        let session_context = SessionContext::new();
+        session_context
+            .runtime_env()
+            .register_object_store(&Url::parse("mem://").unwrap(), object_store.clone());
+        Self {
+            object_store,
+            session_context,
+        }
+    }
+
+    /// Returns a string representation of all recorded requests thus far
+    fn requests(&self) -> String {
+        format!("{}", self.object_store)
+    }
+
+    /// Store the specified bytes at the given path
+    async fn with_bytes(self, path: &str, bytes: impl Into<Bytes>) -> Self {
+        let path = Path::from(path);
+        self.object_store
+            .inner
+            .put(&path, PutPayload::from(bytes.into()))
+            .await
+            .unwrap();
+        self
+    }
+
+    /// Register a CSV file at the given path relative to the [`datafusion_test_data`] directory
+    async fn register_csv(self, table_name: &str, path: &str) -> Self {
+        let mut options = CsvReadOptions::new();
+        options.has_header = true;
+        let url = format!("mem://{path}");
+        self.session_context
+            .register_csv(table_name, url, options)
+            .await
+            .unwrap();
+        self
+    }
+
+    /// Register a CSV file at the given path relative to the [`datafusion_test_data`] directory
+    async fn register_parquet(self, table_name: &str, path: &str) -> Self {
+        let path = format!("mem://{path}");
+        self.session_context
+            .register_parquet(table_name, path, Default::default())
+            .await
+            .unwrap();
+        self
+    }
+
+    /// Runs the specified query and returns a string representation of the results
+    /// suitable for comparison with insta snapshots
+    ///
+    /// Clears all recorded requests before running the query
+    async fn query(&self, sql: &str) -> String {
+        self.object_store.clear_requests();
+        let results = self
+            .session_context
+            .sql(sql)
+            .await
+            .unwrap()
+            .collect()
+            .await
+            .unwrap();
+
+        let num_rows = results.iter().map(|batch| batch.num_rows()).sum::<usize>();
+        let formatted_result =
+            arrow::util::pretty::pretty_format_batches(&results).unwrap();
+
+        let object_store = &self.object_store;
+
+        format!(
+            r#"------- Query Output ({num_rows} rows) -------
+{formatted_result}
+------- Object Store Request Summary -------
+{object_store}
+"#
+        )
+    }
+}
+
+/// Details of individual requests made through the [`RequestCountingObjectStore`]
+#[derive(Clone, Debug)]
+enum RequestDetails {
+    Get { path: Path },
+    GetOpts { path: Path, get_options: GetOptions },
+    GetRanges { path: Path, ranges: Vec<Range<u64>> },
+    GetRange { path: Path, range: Range<u64> },
+    Head { path: Path },
+    List { prefix: Option<Path> },
+    ListWithDelimiter { prefix: Option<Path> },
+    ListWithOffset { prefix: Option<Path>, offset: Path },
+}
+
+fn display_range(range: &Range<u64>) -> impl Display + '_ {
+    struct Wrapper<'a>(&'a Range<u64>);
+    impl Display for Wrapper<'_> {
+        fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+            write!(f, "{}-{}", self.0.start, self.0.end)
+        }
+    }
+    Wrapper(range)
+}
+impl Display for RequestDetails {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        match self {
+            RequestDetails::Get { path } => {
+                write!(f, "GET  path={path}")
+            }
+            RequestDetails::GetOpts { path, get_options } => {
+                write!(f, "GET  (opts) path={path}")?;
+                if let Some(range) = &get_options.range {
+                    match range {
+                        GetRange::Bounded(range) => {
+                            let range = display_range(range);
+                            write!(f, " range={range}")?;
+                        }
+                        GetRange::Offset(offset) => {
+                            write!(f, " range=offset:{offset}")?;
+                        }
+                        GetRange::Suffix(suffix) => {
+                            write!(f, " range=suffix:{suffix}")?;
+                        }
+                    }
+                }
+                if let Some(version) = &get_options.version {
+                    write!(f, " version={version}")?;
+                }
+                if get_options.head {
+                    write!(f, " head=true")?;
+                }
+                Ok(())
+            }
+            RequestDetails::GetRanges { path, ranges } => {
+                write!(f, "GET  (ranges) path={path}")?;
+                if !ranges.is_empty() {
+                    write!(f, " ranges=")?;
+                    for (i, range) in ranges.iter().enumerate() {
+                        if i > 0 {
+                            write!(f, ",")?;
+                        }
+                        write!(f, "{}", display_range(range))?;
+                    }
+                }
+                Ok(())
+            }
+            RequestDetails::GetRange { path, range } => {
+                let range = display_range(range);
+                write!(f, "GET  (range) range={range} path={path}")
+            }
+            RequestDetails::Head { path } => {
+                write!(f, "HEAD path={path}")
+            }
+            RequestDetails::List { prefix } => {
+                write!(f, "LIST")?;
+                if let Some(prefix) = prefix {
+                    write!(f, " prefix={prefix}")?;
+                }
+                Ok(())
+            }
+            RequestDetails::ListWithDelimiter { prefix } => {
+                write!(f, "LIST (with delimiter)")?;
+                if let Some(prefix) = prefix {
+                    write!(f, " prefix={prefix}")?;
+                }
+                Ok(())
+            }
+            RequestDetails::ListWithOffset { prefix, offset } => {
+                write!(f, "LIST (with offset) offset={offset}")?;
+                if let Some(prefix) = prefix {
+                    write!(f, " prefix={prefix}")?;
+                }
+                Ok(())
+            }
+        }
+    }
+}
+
+#[derive(Debug)]
+struct RequestCountingObjectStore {
+    /// Inner (memory) store
+    inner: Arc<dyn ObjectStore>,
+    requests: Mutex<Vec<RequestDetails>>,
+}
+
+impl Display for RequestCountingObjectStore {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        write!(f, "RequestCountingObjectStore()")?;
+        let requests = self.recorded_requests();
+        write!(f, "\nTotal Requests: {}", requests.len())?;
+        for request in requests {
+            write!(f, "\n- {request}")?;
+        }
+        Ok(())
+    }
+}
+
+impl RequestCountingObjectStore {
+    pub fn new() -> Self {
+        let inner = Arc::new(InMemory::new());
+        Self {
+            inner,
+            requests: Mutex::new(vec![]),
+        }
+    }
+
+    pub fn clear_requests(&self) {
+        self.requests.lock().clear();
+    }
+
+    /// Return a copy of the recorded requests normalized
+    /// by removing the path prefix
+    pub fn recorded_requests(&self) -> Vec<RequestDetails> {
+        self.requests.lock().to_vec()
+    }
+}
+
+#[async_trait]
+impl ObjectStore for RequestCountingObjectStore {
+    async fn put_opts(
+        &self,
+        _location: &Path,
+        _payload: PutPayload,
+        _opts: PutOptions,
+    ) -> object_store::Result<PutResult> {
+        Err(object_store::Error::NotImplemented)
+    }
+
+    async fn put_multipart_opts(
+        &self,
+        _location: &Path,
+        _opts: PutMultipartOptions,
+    ) -> object_store::Result<Box<dyn MultipartUpload>> {
+        Err(object_store::Error::NotImplemented)
+    }
+
+    async fn get(&self, location: &Path) -> object_store::Result<GetResult> {
+        let result = self.inner.get(location).await?;
+        self.requests.lock().push(RequestDetails::Get {
+            path: location.to_owned(),
+        });
+        Ok(result)
+    }
+
+    async fn get_opts(
+        &self,
+        location: &Path,
+        options: GetOptions,
+    ) -> object_store::Result<GetResult> {
+        let result = self.inner.get_opts(location, options.clone()).await?;
+        self.requests.lock().push(RequestDetails::GetOpts {
+            path: location.to_owned(),
+            get_options: options,
+        });
+        Ok(result)
+    }
+
+    async fn get_range(
+        &self,
+        location: &Path,
+        range: Range<u64>,
+    ) -> object_store::Result<Bytes> {
+        let result = self.inner.get_range(location, range.clone()).await?;
+        self.requests.lock().push(RequestDetails::GetRange {
+            path: location.to_owned(),
+            range: range.clone(),
+        });
+        Ok(result)
+    }
+
+    async fn get_ranges(
+        &self,
+        location: &Path,
+        ranges: &[Range<u64>],
+    ) -> object_store::Result<Vec<Bytes>> {
+        let result = self.inner.get_ranges(location, ranges).await?;
+        self.requests.lock().push(RequestDetails::GetRanges {
+            path: location.to_owned(),
+            ranges: ranges.to_vec(),
+        });
+        Ok(result)
+    }
+
+    async fn head(&self, location: &Path) -> object_store::Result<ObjectMeta> {
+        let result = self.inner.head(location).await?;
+        self.requests.lock().push(RequestDetails::Head {
+            path: location.to_owned(),
+        });
+        Ok(result)
+    }
+
+    async fn delete(&self, _location: &Path) -> object_store::Result<()> {
+        Err(object_store::Error::NotImplemented)
+    }
+
+    fn list(
+        &self,
+        prefix: Option<&Path>,
+    ) -> BoxStream<'static, object_store::Result<ObjectMeta>> {
+        self.requests.lock().push(RequestDetails::List {
+            prefix: prefix.map(|p| p.to_owned()),
+        });
+
+        self.inner.list(prefix)
+    }
+
+    fn list_with_offset(
+        &self,
+        prefix: Option<&Path>,
+        offset: &Path,
+    ) -> BoxStream<'static, object_store::Result<ObjectMeta>> {
+        self.requests.lock().push(RequestDetails::ListWithOffset {
+            prefix: prefix.map(|p| p.to_owned()),
+            offset: offset.to_owned(),
+        });
+        self.inner.list_with_offset(prefix, offset)
+    }
+
+    async fn list_with_delimiter(
+        &self,
+        prefix: Option<&Path>,
+    ) -> object_store::Result<ListResult> {
+        self.requests
+            .lock()
+            .push(RequestDetails::ListWithDelimiter {
+                prefix: prefix.map(|p| p.to_owned()),
+            });
+        self.inner.list_with_delimiter(prefix).await
+    }
+
+    async fn copy(&self, _from: &Path, _to: &Path) -> object_store::Result<()> {
+        Err(object_store::Error::NotImplemented)
+    }
+
+    async fn copy_if_not_exists(
+        &self,
+        _from: &Path,
+        _to: &Path,
+    ) -> object_store::Result<()> {
+        Err(object_store::Error::NotImplemented)
+    }
+}
diff --git a/datafusion/core/tests/expr_api/mod.rs b/datafusion/core/tests/expr_api/mod.rs
index 4aee274de908..84e644480a4f 100644
--- a/datafusion/core/tests/expr_api/mod.rs
+++ b/datafusion/core/tests/expr_api/mod.rs
@@ -320,6 +320,26 @@ async fn test_create_physical_expr() {
     create_simplified_expr_test(lit(1i32) + lit(2i32), "3");
 }
 
+#[test]
+fn test_create_physical_expr_nvl2() {
+    let batch = &TEST_BATCH;
+    let df_schema = DFSchema::try_from(batch.schema()).unwrap();
+    let ctx = SessionContext::new();
+
+    let expect_err = |expr| {
+        let physical_expr = ctx.create_physical_expr(expr, &df_schema).unwrap();
+        let err = physical_expr.evaluate(batch).unwrap_err();
+        assert!(
+            err.to_string()
+                .contains("nvl2 should have been simplified to case"),
+            "unexpected error: {err:?}"
+        );
+    };
+
+    expect_err(nvl2(col("i"), lit(1i64), lit(0i64)));
+    expect_err(nvl2(lit(1i64), col("i"), lit(0i64)));
+}
+
 #[tokio::test]
 async fn test_create_physical_expr_coercion() {
     // create_physical_expr does apply type coercion and unwrapping in cast
diff --git a/datafusion/core/tests/expr_api/simplification.rs b/datafusion/core/tests/expr_api/simplification.rs
index 89651726a69a..572a7e2b335c 100644
--- a/datafusion/core/tests/expr_api/simplification.rs
+++ b/datafusion/core/tests/expr_api/simplification.rs
@@ -514,8 +514,7 @@ fn multiple_now() -> Result<()> {
     // expect the same timestamp appears in both exprs
     let actual = get_optimized_plan_formatted(plan, &time);
     let expected = format!(
-        "Projection: TimestampNanosecond({}, Some(\"+00:00\")) AS now(), TimestampNanosecond({}, Some(\"+00:00\")) AS t2\
-            \n  TableScan: test",
+        "Projection: TimestampNanosecond({}, Some(\"+00:00\")) AS now(), TimestampNanosecond({}, Some(\"+00:00\")) AS t2\n  TableScan: test",
         time.timestamp_nanos_opt().unwrap(),
         time.timestamp_nanos_opt().unwrap()
     );
diff --git a/datafusion/core/tests/memory_limit/mod.rs b/datafusion/core/tests/memory_limit/mod.rs
index 89bc48b1e634..5d8a1d24181c 100644
--- a/datafusion/core/tests/memory_limit/mod.rs
+++ b/datafusion/core/tests/memory_limit/mod.rs
@@ -23,6 +23,7 @@ use std::sync::{Arc, LazyLock};
 
 #[cfg(feature = "extended_tests")]
 mod memory_limit_validation;
+mod repartition_mem_limit;
 use arrow::array::{ArrayRef, DictionaryArray, Int32Array, RecordBatch, StringViewArray};
 use arrow::compute::SortOptions;
 use arrow::datatypes::{Int32Type, SchemaRef};
diff --git a/datafusion/core/tests/memory_limit/repartition_mem_limit.rs b/datafusion/core/tests/memory_limit/repartition_mem_limit.rs
new file mode 100644
index 000000000000..a7af2f01d1cc
--- /dev/null
+++ b/datafusion/core/tests/memory_limit/repartition_mem_limit.rs
@@ -0,0 +1,116 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow::array::{ArrayRef, Int32Array, RecordBatch};
+use datafusion::{
+    assert_batches_sorted_eq,
+    prelude::{SessionConfig, SessionContext},
+};
+use datafusion_catalog::MemTable;
+use datafusion_common::tree_node::{Transformed, TreeNode};
+use datafusion_execution::runtime_env::RuntimeEnvBuilder;
+use datafusion_physical_plan::{repartition::RepartitionExec, ExecutionPlanProperties};
+use futures::TryStreamExt;
+use itertools::Itertools;
+
+/// End to end test for spilling in RepartitionExec.
+/// The idea is to make a real world query with a relatively low memory limit and
+/// then drive one partition at a time, simulating dissimilar execution speed in partitions.
+/// Just as some examples of real world scenarios where this can happen consider
+/// lopsided groups in a group by especially if one partitions spills and others don't,
+/// or in distributed systems if one upstream node is slower than others.
+#[tokio::test]
+async fn test_repartition_memory_limit() {
+    let runtime = RuntimeEnvBuilder::new()
+        .with_memory_limit(1024 * 1024, 1.0)
+        .build()
+        .unwrap();
+    let config = SessionConfig::new()
+        .with_batch_size(32)
+        .with_target_partitions(2);
+    let ctx = SessionContext::new_with_config_rt(config, Arc::new(runtime));
+    let batches = vec![RecordBatch::try_from_iter(vec![(
+        "c1",
+        Arc::new(Int32Array::from_iter_values((0..10).cycle().take(100_000))) as ArrayRef,
+    )])
+    .unwrap()];
+    let table = Arc::new(MemTable::try_new(batches[0].schema(), vec![batches]).unwrap());
+    ctx.register_table("t", table).unwrap();
+    let plan = ctx
+        .state()
+        .create_logical_plan("SELECT c1, count(*) as c FROM t GROUP BY c1;")
+        .await
+        .unwrap();
+    let plan = ctx.state().create_physical_plan(&plan).await.unwrap();
+    assert_eq!(plan.output_partitioning().partition_count(), 2);
+    // Execute partition 0, this should cause items going into the rest of the partitions to queue up and because
+    // of the low memory limit should spill to disk.
+    let batches0 = Arc::clone(&plan)
+        .execute(0, ctx.task_ctx())
+        .unwrap()
+        .try_collect::<Vec<_>>()
+        .await
+        .unwrap();
+
+    let mut metrics = None;
+    Arc::clone(&plan)
+        .transform_down(|node| {
+            if node.as_any().is::<RepartitionExec>() {
+                metrics = node.metrics();
+            }
+            Ok(Transformed::no(node))
+        })
+        .unwrap();
+
+    let metrics = metrics.unwrap();
+    assert!(metrics.spilled_bytes().unwrap() > 0);
+    assert!(metrics.spilled_rows().unwrap() > 0);
+    assert!(metrics.spill_count().unwrap() > 0);
+
+    // Execute the other partition
+    let batches1 = Arc::clone(&plan)
+        .execute(1, ctx.task_ctx())
+        .unwrap()
+        .try_collect::<Vec<_>>()
+        .await
+        .unwrap();
+
+    let all_batches = batches0
+        .into_iter()
+        .chain(batches1.into_iter())
+        .collect_vec();
+    #[rustfmt::skip]
+    let expected = &[
+    "+----+-------+",
+    "| c1 | c     |",
+    "+----+-------+",
+    "| 0  | 10000 |",
+    "| 1  | 10000 |",
+    "| 2  | 10000 |",
+    "| 3  | 10000 |",
+    "| 4  | 10000 |",
+    "| 5  | 10000 |",
+    "| 6  | 10000 |",
+    "| 7  | 10000 |",
+    "| 8  | 10000 |",
+    "| 9  | 10000 |",
+    "+----+-------+",
+    ];
+    assert_batches_sorted_eq!(expected, &all_batches);
+}
diff --git a/datafusion/core/tests/optimizer/mod.rs b/datafusion/core/tests/optimizer/mod.rs
index 9899a0158fb8..aec32d05624c 100644
--- a/datafusion/core/tests/optimizer/mod.rs
+++ b/datafusion/core/tests/optimizer/mod.rs
@@ -144,8 +144,9 @@ fn test_sql(sql: &str) -> Result<LogicalPlan> {
     let statement = &ast[0];
 
     // create a logical query plan
+    let config = ConfigOptions::default();
     let context_provider = MyContextProvider::default()
-        .with_udf(datetime::now())
+        .with_udf(datetime::now(&config))
         .with_udf(datafusion_functions::core::arrow_cast())
         .with_udf(datafusion_functions::string::concat())
         .with_udf(datafusion_functions::string::concat_ws());
diff --git a/datafusion/core/tests/parquet/encryption.rs b/datafusion/core/tests/parquet/encryption.rs
index 819d8bf3a283..09b93f06ce85 100644
--- a/datafusion/core/tests/parquet/encryption.rs
+++ b/datafusion/core/tests/parquet/encryption.rs
@@ -314,7 +314,7 @@ async fn verify_file_encrypted(
         for col in row_group.columns() {
             assert!(matches!(
                 col.crypto_metadata(),
-                Some(ColumnCryptoMetaData::EncryptionWithFooterKey)
+                Some(ColumnCryptoMetaData::ENCRYPTION_WITH_FOOTER_KEY)
             ));
         }
     }
@@ -336,7 +336,7 @@ impl EncryptionFactory for MockEncryptionFactory {
         config: &EncryptionFactoryOptions,
         _schema: &SchemaRef,
         file_path: &object_store::path::Path,
-    ) -> datafusion_common::Result<Option<FileEncryptionProperties>> {
+    ) -> datafusion_common::Result<Option<Arc<FileEncryptionProperties>>> {
         assert_eq!(
             config.options.get("test_key"),
             Some(&"test value".to_string())
@@ -353,7 +353,7 @@ impl EncryptionFactory for MockEncryptionFactory {
         &self,
         config: &EncryptionFactoryOptions,
         file_path: &object_store::path::Path,
-    ) -> datafusion_common::Result<Option<FileDecryptionProperties>> {
+    ) -> datafusion_common::Result<Option<Arc<FileDecryptionProperties>>> {
         assert_eq!(
             config.options.get("test_key"),
             Some(&"test value".to_string())
diff --git a/datafusion/core/tests/parquet/filter_pushdown.rs b/datafusion/core/tests/parquet/filter_pushdown.rs
index b769fec7d372..226497fe5824 100644
--- a/datafusion/core/tests/parquet/filter_pushdown.rs
+++ b/datafusion/core/tests/parquet/filter_pushdown.rs
@@ -631,8 +631,8 @@ async fn predicate_cache_pushdown_default() -> datafusion_common::Result<()> {
 
 #[tokio::test]
 async fn predicate_cache_pushdown_disable() -> datafusion_common::Result<()> {
-    // Can disable the cache even with filter pushdown by setting the size to 0. In this case we
-    // expect the inner records are reported but no records are read from the cache
+    // Can disable the cache even with filter pushdown by setting the size to 0.
+    // This results in no records read from the cache and no metrics reported
     let mut config = SessionConfig::new();
     config.options_mut().execution.parquet.pushdown_filters = true;
     config
@@ -641,13 +641,10 @@ async fn predicate_cache_pushdown_disable() -> datafusion_common::Result<()> {
         .parquet
         .max_predicate_cache_size = Some(0);
     let ctx = SessionContext::new_with_config(config);
+    // Since the cache is disabled, there is no reporting or use of the cache
     PredicateCacheTest {
-        // file has 8 rows, which need to be read twice, one for filter, one for
-        // final output
-        expected_inner_records: 16,
-        // Expect this to 0 records read as the cache is disabled. However, it is
-        // non zero due to https://github.com/apache/arrow-rs/issues/8307
-        expected_records: 3,
+        expected_inner_records: 0,
+        expected_records: 0,
     }
     .run(&ctx)
     .await
diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs
index 4ae2fa9b4c39..40fc6176e212 100644
--- a/datafusion/core/tests/parquet/schema_adapter.rs
+++ b/datafusion/core/tests/parquet/schema_adapter.rs
@@ -23,7 +23,9 @@ use arrow_schema::{DataType, Field, FieldRef, Schema, SchemaRef};
 use bytes::{BufMut, BytesMut};
 use datafusion::assert_batches_eq;
 use datafusion::common::Result;
-use datafusion::datasource::listing::{ListingTable, ListingTableConfig};
+use datafusion::datasource::listing::{
+    ListingTable, ListingTableConfig, ListingTableConfigExt,
+};
 use datafusion::prelude::{SessionConfig, SessionContext};
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::DataFusionError;
diff --git a/datafusion/core/tests/parquet_config.rs b/datafusion/core/tests/parquet_integration.rs
similarity index 100%
rename from datafusion/core/tests/parquet_config.rs
rename to datafusion/core/tests/parquet_integration.rs
diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
index a2c604a84e76..620259821871 100644
--- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
+++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
@@ -33,16 +33,12 @@ use arrow::compute::SortOptions;
 use arrow::datatypes::{DataType, SchemaRef};
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{TreeNode, TransformedResult};
-use datafusion_common::{Result, ScalarValue, TableReference};
+use datafusion_common::{Result,  TableReference};
 use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
 use datafusion_datasource::source::DataSourceExec;
 use datafusion_expr_common::operator::Operator;
-use datafusion_expr::{JoinType, SortExpr, WindowFrame, WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition};
+use datafusion_expr::{JoinType, SortExpr};
 use datafusion_execution::object_store::ObjectStoreUrl;
-use datafusion_functions_aggregate::average::avg_udaf;
-use datafusion_functions_aggregate::count::count_udaf;
-use datafusion_functions_aggregate::min_max::{max_udaf, min_udaf};
-use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use datafusion_physical_expr_common::sort_expr::{
     LexOrdering, PhysicalSortExpr, PhysicalSortRequirement, OrderingRequirements
 };
@@ -52,8 +48,7 @@ use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
 use datafusion_physical_plan::repartition::RepartitionExec;
 use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
 use datafusion_physical_plan::sorts::sort::SortExec;
-use datafusion_physical_plan::windows::{create_window_expr, BoundedWindowAggExec, WindowAggExec};
-use datafusion_physical_plan::{displayable, get_plan_string, ExecutionPlan, InputOrderMode};
+use datafusion_physical_plan::{displayable, get_plan_string, ExecutionPlan};
 use datafusion::datasource::physical_plan::CsvSource;
 use datafusion::datasource::listing::PartitionedFile;
 use datafusion_physical_optimizer::enforce_sorting::{EnforceSorting, PlanWithCorrespondingCoalescePartitions, PlanWithCorrespondingSort, parallelize_sorts, ensure_sorting};
@@ -93,13 +88,13 @@ fn csv_exec_sorted(
 
 /// Runs the sort enforcement optimizer and asserts the plan
 /// against the original and expected plans
-struct EnforceSortingTest {
+pub(crate) struct EnforceSortingTest {
     plan: Arc<dyn ExecutionPlan>,
     repartition_sorts: bool,
 }
 
 impl EnforceSortingTest {
-    fn new(plan: Arc<dyn ExecutionPlan>) -> Self {
+    pub(crate) fn new(plan: Arc<dyn ExecutionPlan>) -> Self {
         Self {
             plan,
             repartition_sorts: false,
@@ -107,14 +102,14 @@ impl EnforceSortingTest {
     }
 
     /// Set whether to repartition sorts
-    fn with_repartition_sorts(mut self, repartition_sorts: bool) -> Self {
+    pub(crate) fn with_repartition_sorts(mut self, repartition_sorts: bool) -> Self {
         self.repartition_sorts = repartition_sorts;
         self
     }
 
     /// Runs the enforce sorting test and returns a string with the input and
     /// optimized plan as strings for snapshot comparison using insta
-    fn run(&self) -> String {
+    pub(crate) fn run(&self) -> String {
         let mut config = ConfigOptions::new();
         config.optimizer.repartition_sorts = self.repartition_sorts;
 
@@ -672,12 +667,12 @@ async fn test_soft_hard_requirements_remove_soft_requirement() -> Result<()> {
     let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r#"
     Input Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
 
     Optimized Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
     "#);
@@ -721,13 +716,13 @@ async fn test_soft_hard_requirements_remove_soft_requirement_without_pushdowns(
     assert_snapshot!(test.run(), @r#"
     Input Plan:
     ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as count]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
           DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
 
     Optimized Plan:
     ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as count]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
           DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
     "#);
@@ -768,13 +763,13 @@ async fn test_soft_hard_requirements_remove_soft_requirement_without_pushdowns(
     let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r#"
     Input Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]
         SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
           DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
 
     Optimized Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
         ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]
           SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
@@ -829,15 +824,15 @@ async fn test_soft_hard_requirements_multiple_soft_requirements() -> Result<()>
     let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r#"
     Input Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]
           SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
             DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
 
     Optimized Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
           ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]
             SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
@@ -894,17 +889,17 @@ async fn test_soft_hard_requirements_multiple_soft_requirements() -> Result<()>
     let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r#"
     Input Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
         SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
-          BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+          BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
             ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]
               SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
                 DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
 
     Optimized Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
           ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]
             SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
@@ -966,14 +961,14 @@ async fn test_soft_hard_requirements_multiple_sorts() -> Result<()> {
     Input Plan:
     SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
       SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
-        BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
           ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]
             SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
               DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
 
     Optimized Plan:
     SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
           ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]
             SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
@@ -1028,16 +1023,16 @@ async fn test_soft_hard_requirements_with_multiple_soft_requirements_and_output_
     assert_snapshot!(test.run(), @r#"
     Input Plan:
     OutputRequirementExec: order_by=[(non_nullable_col@1, asc)], dist_by=SinglePartition
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-        BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
           SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
             DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
 
     Optimized Plan:
     OutputRequirementExec: order_by=[(non_nullable_col@1, asc)], dist_by=SinglePartition
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
-          BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+          BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
             SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
               DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
     "#);
@@ -1086,7 +1081,7 @@ async fn test_window_multi_path_sort() -> Result<()> {
     let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r#"
     Input Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortPreservingMergeExec: [nullable_col@0 DESC NULLS LAST]
         UnionExec
           SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
@@ -1095,7 +1090,7 @@ async fn test_window_multi_path_sort() -> Result<()> {
             DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet
 
     Optimized Plan:
-    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
       SortPreservingMergeExec: [nullable_col@0 ASC]
         UnionExec
           DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC, non_nullable_col@1 ASC], file_type=parquet
@@ -1127,7 +1122,7 @@ async fn test_window_multi_path_sort2() -> Result<()> {
     let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r#"
     Input Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]
         UnionExec
           SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]
@@ -1136,7 +1131,7 @@ async fn test_window_multi_path_sort2() -> Result<()> {
             DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet
 
     Optimized Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortPreservingMergeExec: [nullable_col@0 ASC]
         UnionExec
           DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet
@@ -1683,7 +1678,7 @@ async fn test_window_multi_layer_requirement() -> Result<()> {
         EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r#"
     Input Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortPreservingMergeExec: [a@0 ASC, b@1 ASC]
         RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC, b@1 ASC
           RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
@@ -1691,7 +1686,7 @@ async fn test_window_multi_layer_requirement() -> Result<()> {
               DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
 
     Optimized Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortPreservingMergeExec: [a@0 ASC, b@1 ASC]
         SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[true]
           RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10
@@ -1788,18 +1783,18 @@ async fn test_remove_unnecessary_sort_window_multilayer() -> Result<()> {
         EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r#"
     Input Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       FilterExec: NOT non_nullable_col@1
         SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
-          BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+          BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
             CoalesceBatchesExec: target_batch_size=128
               SortExec: expr=[non_nullable_col@1 DESC], preserve_partitioning=[false]
                 DataSourceExec: partitions=1, partition_sizes=[0]
 
     Optimized Plan:
-    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
       FilterExec: NOT non_nullable_col@1
-        BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
           CoalesceBatchesExec: target_batch_size=128
             SortExec: expr=[non_nullable_col@1 DESC], preserve_partitioning=[false]
               DataSourceExec: partitions=1, partition_sizes=[0]
@@ -2243,17 +2238,17 @@ async fn test_multiple_sort_window_exec() -> Result<()> {
         EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r#"
     Input Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-        BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
           SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
             DataSourceExec: partitions=1, partition_sizes=[0]
 
     Optimized Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]
-          BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+          BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
             SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
               DataSourceExec: partitions=1, partition_sizes=[0]
     "#);
@@ -2278,7 +2273,7 @@ async fn test_commutativity() -> Result<()> {
     assert_snapshot!(displayable(orig_plan.as_ref()).indent(true), @r#"
     SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
       RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-        BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
           DataSourceExec: partitions=1, partition_sizes=[0]
     "#);
 
@@ -2487,1203 +2482,7 @@ async fn test_not_replaced_with_partial_sort_for_unbounded_input() -> Result<()>
     ");
     Ok(())
 }
-// aal here
 
-#[tokio::test]
-async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> {
-    let input_schema = create_test_schema()?;
-    let ordering = [sort_expr_options(
-        "nullable_col",
-        &input_schema,
-        SortOptions {
-            descending: false,
-            nulls_first: false,
-        },
-    )]
-    .into();
-    let source = parquet_exec_with_sort(input_schema.clone(), vec![ordering]) as _;
-
-    // Macro for testing window function optimization with snapshots
-    macro_rules! test_window_case {
-        (
-            partition_by: $partition_by:expr,
-            window_frame: $window_frame:expr,
-            func: ($func_def:expr, $func_name:expr, $func_args:expr),
-            required_sort: [$($col:expr, $asc:expr, $nulls_first:expr),*],
-            @ $expected:literal
-        ) => {{
-            let partition_by_exprs = if $partition_by {
-                vec![col("nullable_col", &input_schema)?]
-            } else {
-                vec![]
-            };
-
-            let window_expr = create_window_expr(
-                &$func_def,
-                $func_name,
-                &$func_args,
-                &partition_by_exprs,
-                &[],
-                $window_frame,
-                Arc::clone(&input_schema),
-                false,
-                false,
-                None,
-            )?;
-
-            let window_exec = if window_expr.uses_bounded_memory() {
-                Arc::new(BoundedWindowAggExec::try_new(
-                    vec![window_expr],
-                    Arc::clone(&source),
-                    InputOrderMode::Sorted,
-                    $partition_by,
-                )?) as Arc<dyn ExecutionPlan>
-            } else {
-                Arc::new(WindowAggExec::try_new(
-                    vec![window_expr],
-                    Arc::clone(&source),
-                    $partition_by,
-                )?) as Arc<dyn ExecutionPlan>
-            };
-
-            let output_schema = window_exec.schema();
-            let sort_expr = vec![
-                $(
-                    sort_expr_options(
-                        $col,
-                        &output_schema,
-                        SortOptions {
-                            descending: !$asc,
-                            nulls_first: $nulls_first,
-                        },
-                    )
-                ),*
-            ];
-            let ordering = LexOrdering::new(sort_expr).unwrap();
-            let physical_plan = sort_exec(ordering, window_exec);
-
-            let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true);
-
-            assert_snapshot!(test.run(), @ $expected);
-
-            Result::<(), datafusion_common::DataFusionError>::Ok(())
-        }};
-    }
-
-    // Function definition - Alias of the resulting column - Arguments of the function
-    #[derive(Clone)]
-    struct WindowFuncParam(WindowFunctionDefinition, String, Vec<Arc<dyn PhysicalExpr>>);
-    let function_arg_ordered = vec![col("nullable_col", &input_schema)?];
-    let function_arg_unordered = vec![col("non_nullable_col", &input_schema)?];
-    let fn_count_on_ordered = WindowFuncParam(
-        WindowFunctionDefinition::AggregateUDF(count_udaf()),
-        "count".to_string(),
-        function_arg_ordered.clone(),
-    );
-    let fn_max_on_ordered = WindowFuncParam(
-        WindowFunctionDefinition::AggregateUDF(max_udaf()),
-        "max".to_string(),
-        function_arg_ordered.clone(),
-    );
-    let fn_min_on_ordered = WindowFuncParam(
-        WindowFunctionDefinition::AggregateUDF(min_udaf()),
-        "min".to_string(),
-        function_arg_ordered.clone(),
-    );
-    let fn_avg_on_ordered = WindowFuncParam(
-        WindowFunctionDefinition::AggregateUDF(avg_udaf()),
-        "avg".to_string(),
-        function_arg_ordered,
-    );
-    let fn_count_on_unordered = WindowFuncParam(
-        WindowFunctionDefinition::AggregateUDF(count_udaf()),
-        "count".to_string(),
-        function_arg_unordered.clone(),
-    );
-    let fn_max_on_unordered = WindowFuncParam(
-        WindowFunctionDefinition::AggregateUDF(max_udaf()),
-        "max".to_string(),
-        function_arg_unordered.clone(),
-    );
-    let fn_min_on_unordered = WindowFuncParam(
-        WindowFunctionDefinition::AggregateUDF(min_udaf()),
-        "min".to_string(),
-        function_arg_unordered.clone(),
-    );
-    let fn_avg_on_unordered = WindowFuncParam(
-        WindowFunctionDefinition::AggregateUDF(avg_udaf()),
-        "avg".to_string(),
-        function_arg_unordered,
-    );
-
-    // ============================================REGION STARTS============================================
-    // WindowAggExec + Plain(unbounded preceding, unbounded following) + no partition_by + on ordered column
-    // Case 0:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_count_on_ordered.0.clone(), fn_count_on_ordered.1.clone(), fn_count_on_ordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "count", true, false],
-        @ r#"
-    Input Plan:
-    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-    Optimized Plan:
-    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 1:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "max", false, false],
-        @ r#"
-    Input Plan:
-    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-    Optimized Plan:
-    WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 2:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()),
-        required_sort: ["min", false, false, "nullable_col", true, false],
-        @ r#"
-    Input Plan:
-    SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-    Optimized Plan:
-    WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 3:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()),
-        required_sort: ["avg", true, false, "nullable_col", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // WindowAggExec + Plain(unbounded preceding, unbounded following) + no partition_by + on unordered column
-    // Case 4:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()),
-        required_sort: ["non_nullable_col", true, false, "count", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 5:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()),
-        required_sort: ["non_nullable_col", false, false, "max", false, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[non_nullable_col@1 DESC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-SortExec: expr=[non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 6:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()),
-        required_sort: ["min", true, false, "non_nullable_col", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[min@2 ASC NULLS LAST, non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 7:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()),
-        required_sort: ["avg", false, false, "nullable_col", false, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // WindowAggExec + Plain(unbounded preceding, unbounded following) + partition_by + on ordered column
-    // Case 8:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_count_on_ordered.0.clone(), fn_count_on_ordered.1.clone(), fn_count_on_ordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "count", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 9:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "max", false, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 10:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()),
-        required_sort: ["min", false, false, "nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 11:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()),
-        required_sort: ["avg", true, false, "nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // WindowAggExec + Plain(unbounded preceding, unbounded following) + partition_by + on unordered column
-    // Case 12:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()),
-        required_sort: ["non_nullable_col", true, false, "count", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 13:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()),
-        required_sort: ["non_nullable_col", true, false, "max", false, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 14:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()),
-        required_sort: ["min", false, false, "non_nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[min@2 DESC NULLS LAST, non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 15:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()),
-        required_sort: ["avg", true, false, "nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // WindowAggExec + Sliding(current row, unbounded following) + no partition_by + on ordered column
-    // Case 16:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_count_on_ordered.0.clone(), fn_count_on_ordered.1.clone(), fn_count_on_ordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "count", false, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 17:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()),
-        required_sort: ["max", false, true, "nullable_col", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[max@2 DESC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 18:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()),
-        required_sort: ["min", true, true, "nullable_col", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[min@2 ASC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 19:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()),
-        required_sort: ["avg", false, false, "nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // WindowAggExec + Sliding(current row, unbounded following) + no partition_by + on unordered column
-    // Case 20:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "count", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 21:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "max", false, true],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC], preserve_partitioning=[false]
-  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 22:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()),
-        required_sort: ["min", true, false, "nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[min@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 23:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()),
-        required_sort: ["avg", false, false, "nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // WindowAggExec + Sliding(current row, unbounded following) + partition_by + on ordered column
-    // Case 24:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_count_on_ordered.0.clone(), fn_count_on_ordered.1.clone(), fn_count_on_ordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "count", false, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 25:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "max", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 26:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()),
-        required_sort: ["min", false, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 27:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()),
-        required_sort: ["avg", false, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[avg@2 DESC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // WindowAggExec + Sliding(current row, unbounded following) + partition_by + on unordered column
-    // Case 28:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()),
-        required_sort: ["count", false, false, "nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[count@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 29:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "max", false, true],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC], preserve_partitioning=[false]
-  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 30:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()),
-        required_sort: ["min", false, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 31:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "avg", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // BoundedWindowAggExec + Plain(unbounded preceding, unbounded following) + no partition_by + on ordered column
-    // Case 32:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_count_on_ordered.0.clone(), fn_count_on_ordered.1.clone(), fn_count_on_ordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "count", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 33:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()),
-        required_sort: ["max", false, false, "nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[max@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 34:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()),
-        required_sort: ["min", false, false, "nullable_col", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 35:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "avg", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // BoundedWindowAggExec + Plain(unbounded preceding, unbounded following) + no partition_by + on unordered column
-    // Case 36:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "count", true, true],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 37:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()),
-        required_sort: ["max", true, false, "nullable_col", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[max@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 38:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()),
-        required_sort: ["min", false, true, "nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[min@2 DESC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 39:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()),
-        required_sort: ["avg", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // BoundedWindowAggExec + Plain(unbounded preceding, unbounded following) + partition_by + on ordered column
-    // Case 40:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_count_on_ordered.0.clone(), fn_count_on_ordered.1.clone(), fn_count_on_ordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "count", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 41:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()),
-        required_sort: ["max", true, false, "nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[max@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 42:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()),
-        required_sort: ["min", false, false, "nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 43:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "avg", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // BoundedWindowAggExec + Plain(unbounded preceding, unbounded following) + partition_by + on unordered column
-    // Case 44:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()),
-        required_sort: ["count", true, true],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[count@2 ASC], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 45:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "max", false, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 46:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "min", false, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 47:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // BoundedWindowAggExec + Sliding(bounded preceding, bounded following) + no partition_by + on ordered column
-    // Case 48:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)),
-        func: (fn_count_on_ordered.0.clone(), fn_count_on_ordered.1.clone(), fn_count_on_ordered.2.clone()),
-        required_sort: ["count", true, false, "nullable_col", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 49:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::Following(ScalarValue::new_one(&DataType::UInt32)?))),
-        func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()),
-        required_sort: ["max", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[max@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 50:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)),
-        func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "min", false, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 51:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)),
-        func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()),
-        required_sort: ["avg", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // BoundedWindowAggExec + Sliding(bounded preceding, bounded following) + no partition_by + on unordered column
-    // Case 52:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::Following(ScalarValue::new_one(&DataType::UInt32)?))),
-        func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()),
-        required_sort: ["count", true, false, "nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 53:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)),
-        func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "max", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 54:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)),
-        func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()),
-        required_sort: ["min", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[min@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 55:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::Following(ScalarValue::new_one(&DataType::UInt32)?))),
-        func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // BoundedWindowAggExec + Sliding(bounded preceding, bounded following) + partition_by + on ordered column
-    // Case 56:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)),
-        func: (fn_count_on_ordered.0.clone(), fn_count_on_ordered.1.clone(), fn_count_on_ordered.2.clone()),
-        required_sort: ["count", true, false, "nullable_col", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 57:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::Following(ScalarValue::new_one(&DataType::UInt32)?))),
-        func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "max", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 58:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)),
-        func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()),
-        required_sort: ["min", false, false, "nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 59:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)),
-        func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()),
-        required_sort: ["avg", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // BoundedWindowAggExec + Sliding(bounded preceding, bounded following) + partition_by + on unordered column
-    // Case 60:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)),
-        func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "count", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 61:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)),
-        func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "max", true, true],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 62:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)),
-        func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "min", false, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 63:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)),
-        func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-    // =============================================REGION ENDS=============================================
-
-    Ok(())
-}
 #[test]
 fn test_removes_unused_orthogonal_sort() -> Result<()> {
     let schema = create_test_schema3()?;
diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting_monotonicity.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting_monotonicity.rs
new file mode 100644
index 000000000000..ef233e222912
--- /dev/null
+++ b/datafusion/core/tests/physical_optimizer/enforce_sorting_monotonicity.rs
@@ -0,0 +1,1715 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::physical_optimizer::test_utils::{
+    create_test_schema, parquet_exec_with_sort, sort_exec, sort_expr_options,
+};
+use arrow::datatypes::DataType;
+use arrow_schema::SortOptions;
+use datafusion::common::ScalarValue;
+use datafusion::logical_expr::WindowFrameBound;
+use datafusion::logical_expr::WindowFrameUnits;
+use datafusion_expr::{WindowFrame, WindowFunctionDefinition};
+use datafusion_functions_aggregate::average::avg_udaf;
+use datafusion_functions_aggregate::count::count_udaf;
+use datafusion_functions_aggregate::min_max::{max_udaf, min_udaf};
+use datafusion_physical_expr::expressions::col;
+use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+use datafusion_physical_expr_common::sort_expr::LexOrdering;
+use datafusion_physical_plan::windows::{
+    create_window_expr, BoundedWindowAggExec, WindowAggExec,
+};
+use datafusion_physical_plan::{ExecutionPlan, InputOrderMode};
+use insta::assert_snapshot;
+use std::sync::{Arc, LazyLock};
+
+// Function definition - Alias of the resulting column - Arguments of the function
+#[derive(Clone)]
+struct WindowFuncParam(
+    WindowFunctionDefinition,
+    &'static str,
+    Vec<Arc<dyn PhysicalExpr>>,
+);
+
+fn function_arg_ordered() -> Vec<Arc<dyn PhysicalExpr>> {
+    let input_schema = create_test_schema().unwrap();
+    vec![col("nullable_col", &input_schema).unwrap()]
+}
+fn function_arg_unordered() -> Vec<Arc<dyn PhysicalExpr>> {
+    let input_schema = create_test_schema().unwrap();
+    vec![col("non_nullable_col", &input_schema).unwrap()]
+}
+
+fn fn_count_on_ordered() -> WindowFuncParam {
+    WindowFuncParam(
+        WindowFunctionDefinition::AggregateUDF(count_udaf()),
+        "count",
+        function_arg_ordered(),
+    )
+}
+
+fn fn_max_on_ordered() -> WindowFuncParam {
+    WindowFuncParam(
+        WindowFunctionDefinition::AggregateUDF(max_udaf()),
+        "max",
+        function_arg_ordered(),
+    )
+}
+
+fn fn_min_on_ordered() -> WindowFuncParam {
+    WindowFuncParam(
+        WindowFunctionDefinition::AggregateUDF(min_udaf()),
+        "min",
+        function_arg_ordered(),
+    )
+}
+
+fn fn_avg_on_ordered() -> WindowFuncParam {
+    WindowFuncParam(
+        WindowFunctionDefinition::AggregateUDF(avg_udaf()),
+        "avg",
+        function_arg_ordered(),
+    )
+}
+
+fn fn_count_on_unordered() -> WindowFuncParam {
+    WindowFuncParam(
+        WindowFunctionDefinition::AggregateUDF(count_udaf()),
+        "count",
+        function_arg_unordered(),
+    )
+}
+
+fn fn_max_on_unordered() -> WindowFuncParam {
+    WindowFuncParam(
+        WindowFunctionDefinition::AggregateUDF(max_udaf()),
+        "max",
+        function_arg_unordered(),
+    )
+}
+fn fn_min_on_unordered() -> WindowFuncParam {
+    WindowFuncParam(
+        WindowFunctionDefinition::AggregateUDF(min_udaf()),
+        "min",
+        function_arg_unordered(),
+    )
+}
+
+fn fn_avg_on_unordered() -> WindowFuncParam {
+    WindowFuncParam(
+        WindowFunctionDefinition::AggregateUDF(avg_udaf()),
+        "avg",
+        function_arg_unordered(),
+    )
+}
+
+struct TestWindowCase {
+    partition_by: bool,
+    window_frame: Arc<WindowFrame>,
+    func: WindowFuncParam,
+    required_sort: Vec<(&'static str, bool, bool)>, // (column name, ascending, nulls_first)
+}
+impl TestWindowCase {
+    fn source() -> Arc<dyn ExecutionPlan> {
+        static SOURCE: LazyLock<Arc<dyn ExecutionPlan>> = LazyLock::new(|| {
+            let input_schema = create_test_schema().unwrap();
+            let ordering = [sort_expr_options(
+                "nullable_col",
+                &input_schema,
+                SortOptions {
+                    descending: false,
+                    nulls_first: false,
+                },
+            )]
+            .into();
+            parquet_exec_with_sort(input_schema.clone(), vec![ordering])
+        });
+        Arc::clone(&SOURCE)
+    }
+
+    // runs the window test case and returns the string representation of the plan
+    fn run(self) -> String {
+        let input_schema = create_test_schema().unwrap();
+        let source = Self::source();
+
+        let Self {
+            partition_by,
+            window_frame,
+            func: WindowFuncParam(func_def, func_name, func_args),
+            required_sort,
+        } = self;
+        let partition_by_exprs = if partition_by {
+            vec![col("nullable_col", &input_schema).unwrap()]
+        } else {
+            vec![]
+        };
+
+        let window_expr = create_window_expr(
+            &func_def,
+            func_name.to_string(),
+            &func_args,
+            &partition_by_exprs,
+            &[],
+            window_frame,
+            Arc::clone(&input_schema),
+            false,
+            false,
+            None,
+        )
+        .unwrap();
+
+        let window_exec = if window_expr.uses_bounded_memory() {
+            Arc::new(
+                BoundedWindowAggExec::try_new(
+                    vec![window_expr],
+                    Arc::clone(&source),
+                    InputOrderMode::Sorted,
+                    partition_by,
+                )
+                .unwrap(),
+            ) as Arc<dyn ExecutionPlan>
+        } else {
+            Arc::new(
+                WindowAggExec::try_new(
+                    vec![window_expr],
+                    Arc::clone(&source),
+                    partition_by,
+                )
+                .unwrap(),
+            ) as Arc<dyn ExecutionPlan>
+        };
+
+        let output_schema = window_exec.schema();
+        let sort_expr = required_sort.into_iter().map(|(col, asc, nulls_first)| {
+            sort_expr_options(
+                col,
+                &output_schema,
+                SortOptions {
+                    descending: !asc,
+                    nulls_first,
+                },
+            )
+        });
+        let ordering = LexOrdering::new(sort_expr).unwrap();
+        let physical_plan = sort_exec(ordering, window_exec);
+
+        crate::physical_optimizer::enforce_sorting::EnforceSortingTest::new(physical_plan)
+            .with_repartition_sorts(true)
+            .run()
+    }
+}
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_0() {
+    // ============================================REGION STARTS============================================
+    // WindowAggExec + Plain(unbounded preceding, unbounded following) + no partition_by + on ordered column
+    // Case 0:
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_count_on_ordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("count", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_1() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_max_on_ordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("max", false, false),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_2() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_min_on_ordered(),
+        required_sort: vec![
+            ("min", false, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_3() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_avg_on_ordered(),
+        required_sort: vec![
+            ("avg", true, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_4() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_count_on_unordered(),
+        required_sort: vec![
+            ("non_nullable_col", true, false),
+            ("count", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_5() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_max_on_unordered(),
+        required_sort: vec![
+            ("non_nullable_col", false, false),
+            ("max", false, false),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[non_nullable_col@1 DESC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    SortExec: expr=[non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_6() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_min_on_unordered(),
+        required_sort: vec![
+            ("min", true, false),
+            ("non_nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[min@2 ASC NULLS LAST, non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_7() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_avg_on_unordered(),
+        required_sort: vec![
+            ("avg", false, false),
+            ("nullable_col", false, false),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+            );
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_8() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_count_on_ordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("count", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_9() {
+    assert_snapshot!(TestWindowCase  {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_max_on_ordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("max", false, false),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_10() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_min_on_ordered(),
+        required_sort: vec![
+            ("min", false, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_11() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_avg_on_ordered(),
+        required_sort: vec![
+            ("avg", true, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+// WindowAggExec + Plain(unbounded preceding, unbounded following) + partition_by + on unordered column
+// Case 12:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_12() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_count_on_unordered(),
+        required_sort: vec![
+            ("non_nullable_col", true, false),
+            ("count", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 13:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_13() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_max_on_unordered(),
+        required_sort: vec![
+            ("non_nullable_col", true, false),
+            ("max", false, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 14:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_14() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_min_on_unordered(),
+        required_sort: vec![
+            ("min", false, false),
+            ("non_nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[min@2 DESC NULLS LAST, non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 15:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_15() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_avg_on_unordered(),
+        required_sort: vec![
+            ("avg", true, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+// WindowAggExec + Sliding(current row, unbounded following) + no partition_by + on ordered column
+// Case 16:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_16() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_count_on_ordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("count", false, false),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 17:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_17() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_max_on_ordered(),
+        required_sort: vec![
+            ("max", false, true),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[max@2 DESC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 18:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_18() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_min_on_ordered(),
+        required_sort: vec![
+            ("min", true, true),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[min@2 ASC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 19:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_19() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_avg_on_ordered(),
+        required_sort: vec![
+            ("avg", false, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+// WindowAggExec + Sliding(current row, unbounded following) + no partition_by + on unordered column
+// Case 20:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_20() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_count_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("count", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 21:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_21() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_max_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("max", false, true),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC], preserve_partitioning=[false]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 22:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_22() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_min_on_unordered(),
+        required_sort: vec![
+            ("min", true, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[min@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 23:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_23() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_avg_on_unordered(),
+        required_sort: vec![
+            ("avg", false, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+// WindowAggExec + Sliding(current row, unbounded following) + partition_by + on ordered column
+// Case 24:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_24() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_count_on_ordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("count", false, false),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 25:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_25() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_max_on_ordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("max", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 26:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_26() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_min_on_ordered(),
+        required_sort: vec![
+            ("min", false, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#);
+}
+
+// Case 27:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_27() {
+    assert_snapshot!(
+        TestWindowCase {
+            partition_by: true,
+            window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+            func: fn_avg_on_ordered(),
+            required_sort: vec![
+                ("avg", false, false),
+            ],
+        }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[avg@2 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#);
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+// WindowAggExec + Sliding(current row, unbounded following) + partition_by + on unordered column
+
+// Case 28:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_28() {
+    assert_snapshot!(
+        TestWindowCase {
+            partition_by: true,
+            window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+            func: fn_count_on_unordered(),
+            required_sort: vec![
+                ("count", false, false),
+                ("nullable_col", true, false),
+            ],
+        }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[count@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 29:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_29() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_max_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("max", false, true),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC], preserve_partitioning=[false]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#)
+}
+
+// Case 30:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_30() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_min_on_unordered(),
+        required_sort: vec![
+            ("min", false, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#);
+}
+
+// Case 31:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_31() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_avg_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("avg", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+// BoundedWindowAggExec + Plain(unbounded preceding, unbounded following) + no partition_by + on ordered column
+
+// Case 32:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_32() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_count_on_ordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("count", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 33:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_33() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_max_on_ordered(),
+        required_sort: vec![
+            ("max", false, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[max@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[max: Field { "max": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 34:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_34() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_min_on_ordered(),
+        required_sort: vec![
+            ("min", false, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+// Case 35:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_35() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_avg_on_ordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+           ("avg", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+// BoundedWindowAggExec + Plain(unbounded preceding, unbounded following) + no partition_by + on unordered column
+
+// Case 36:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_36() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_count_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("count", true, true),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 37:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_37() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_max_on_unordered(),
+        required_sort: vec![
+            ("max", true, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[max@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[max: Field { "max": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[max: Field { "max": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 38:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_38() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_min_on_unordered(),
+        required_sort: vec![
+            ("min", false, true),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[min@2 DESC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 39:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_39() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_avg_on_unordered(),
+        required_sort: vec![
+            ("avg", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+// BoundedWindowAggExec + Plain(unbounded preceding, unbounded following) + partition_by + on ordered column
+
+// Case 40:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_40() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_count_on_ordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("count", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 41:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_41() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_max_on_ordered(),
+        required_sort: vec![
+            ("max", true, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[max@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[max: Field { "max": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 42:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_42() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_min_on_ordered(),
+        required_sort: vec![
+            ("min", false, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 43:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_43() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_avg_on_ordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("avg", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+// BoundedWindowAggExec + Plain(unbounded preceding, unbounded following) + partition_by + on unordered column
+
+// Case 44:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_44() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_count_on_unordered(),
+        required_sort: vec![
+            ("count", true, true),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[count@2 ASC], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 45:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_45() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_max_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("max", false, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[max: Field { "max": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 46:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_46() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_min_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("min", false, false),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 47:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_47() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_avg_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+// BoundedWindowAggExec + Sliding(bounded preceding, bounded following) + no partition_by + on ordered column
+
+// Case 48:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_48() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::CurrentRow)),
+        func: fn_count_on_ordered(),
+        required_sort: vec![
+            ("count", true, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 49:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_49() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::Following(ScalarValue::new_one(&DataType::UInt32).unwrap()))),
+        func: fn_max_on_ordered(),
+        required_sort: vec![
+            ("max", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[max@2 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[max: Field { "max": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 50:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_50() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::CurrentRow)),
+        func: fn_min_on_ordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("min", false, false),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 51:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_51() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::CurrentRow)),
+        func: fn_avg_on_ordered(),
+        required_sort: vec![
+            ("avg", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+// BoundedWindowAggExec + Sliding(bounded preceding, bounded following) + no partition_by + on unordered column
+
+// Case 52:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_52() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::Following(ScalarValue::new_one(&DataType::UInt32).unwrap()))),
+        func: fn_count_on_unordered(),
+        required_sort: vec![
+            ("count", true, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 53:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_53() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::CurrentRow)),
+        func: fn_max_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("max", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[max: Field { "max": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 54:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_54() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::CurrentRow)),
+        func: fn_min_on_unordered(),
+        required_sort: vec![
+            ("min", true, false),
+            ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[min@2 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 55:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_55() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::Following(ScalarValue::new_one(&DataType::UInt32).unwrap()))),
+        func: fn_avg_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+// BoundedWindowAggExec + Sliding(bounded preceding, bounded following) + partition_by + on ordered column
+
+// Case 56:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_56() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::CurrentRow)),
+        func: fn_count_on_ordered(),
+        required_sort: vec![
+            ("count", true, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 57:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_57() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::Following(ScalarValue::new_one(&DataType::UInt32).unwrap()))),
+        func: fn_max_on_ordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("max", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[max: Field { "max": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 58:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_58() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::CurrentRow)),
+        func: fn_min_on_ordered(),
+        required_sort: vec![
+            ("min", false, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 59:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_59() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::CurrentRow)),
+        func: fn_avg_on_ordered(),
+        required_sort: vec![
+            ("avg", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+// BoundedWindowAggExec + Sliding(bounded preceding, bounded following) + partition_by + on unordered column
+
+// Case 60:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_60() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::CurrentRow)),
+        func: fn_count_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("count", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 61:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_61() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::CurrentRow)),
+        func: fn_max_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("max", true, true),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[max: Field { "max": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 62:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_62() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::CurrentRow)),
+        func: fn_min_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("min", false, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 63:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_63() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::CurrentRow)),
+        func: fn_avg_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+// =============================================REGION ENDS=============================================
diff --git a/datafusion/core/tests/physical_optimizer/mod.rs b/datafusion/core/tests/physical_optimizer/mod.rs
index 777c26e80e90..936c02eb2a02 100644
--- a/datafusion/core/tests/physical_optimizer/mod.rs
+++ b/datafusion/core/tests/physical_optimizer/mod.rs
@@ -21,6 +21,7 @@ mod aggregate_statistics;
 mod combine_partial_final_agg;
 mod enforce_distribution;
 mod enforce_sorting;
+mod enforce_sorting_monotonicity;
 mod filter_pushdown;
 mod join_selection;
 mod limit_pushdown;
diff --git a/datafusion/core/tests/physical_optimizer/partition_statistics.rs b/datafusion/core/tests/physical_optimizer/partition_statistics.rs
index 62ab5cbc422b..49dc5b845605 100644
--- a/datafusion/core/tests/physical_optimizer/partition_statistics.rs
+++ b/datafusion/core/tests/physical_optimizer/partition_statistics.rs
@@ -17,6 +17,7 @@
 
 #[cfg(test)]
 mod test {
+    use insta::assert_snapshot;
     use std::sync::Arc;
 
     use arrow::array::{Int32Array, RecordBatch};
@@ -606,21 +607,21 @@ mod test {
             .build()
             .map(Arc::new)?];
 
-        let aggregate_exec_partial = Arc::new(AggregateExec::try_new(
-            AggregateMode::Partial,
-            group_by.clone(),
-            aggr_expr.clone(),
-            vec![None],
-            Arc::clone(&scan),
-            scan_schema.clone(),
-        )?) as _;
-
-        let mut plan_string = get_plan_string(&aggregate_exec_partial);
-        let _ = plan_string.swap_remove(1);
-        let expected_plan = vec![
-            "AggregateExec: mode=Partial, gby=[id@0 as id, 1 + id@0 as expr], aggr=[COUNT(c)]",
-        ];
-        assert_eq!(plan_string, expected_plan);
+        let aggregate_exec_partial: Arc<dyn ExecutionPlan> =
+            Arc::new(AggregateExec::try_new(
+                AggregateMode::Partial,
+                group_by.clone(),
+                aggr_expr.clone(),
+                vec![None],
+                Arc::clone(&scan),
+                scan_schema.clone(),
+            )?) as _;
+
+        let plan_string = get_plan_string(&aggregate_exec_partial).swap_remove(0);
+        assert_snapshot!(
+            plan_string,
+            @"AggregateExec: mode=Partial, gby=[id@0 as id, 1 + id@0 as expr], aggr=[COUNT(c)]"
+        );
 
         let p0_statistics = aggregate_exec_partial.partition_statistics(Some(0))?;
 
@@ -710,7 +711,10 @@ mod test {
         )?) as _;
 
         let agg_plan = get_plan_string(&agg_partial).remove(0);
-        assert_eq!("AggregateExec: mode=Partial, gby=[id@0 as id, 1 + id@0 as expr], aggr=[COUNT(c)]",agg_plan);
+        assert_snapshot!(
+            agg_plan,
+            @"AggregateExec: mode=Partial, gby=[id@0 as id, 1 + id@0 as expr], aggr=[COUNT(c)]"
+        );
 
         let empty_stat = Statistics {
             num_rows: Precision::Exact(0),
diff --git a/datafusion/core/tests/physical_optimizer/sanity_checker.rs b/datafusion/core/tests/physical_optimizer/sanity_checker.rs
index ce6eb13c86c4..9867ed173341 100644
--- a/datafusion/core/tests/physical_optimizer/sanity_checker.rs
+++ b/datafusion/core/tests/physical_optimizer/sanity_checker.rs
@@ -421,7 +421,7 @@ async fn test_bounded_window_agg_sort_requirement() -> Result<()> {
     assert_snapshot!(
         actual,
         @r#"
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortExec: expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false]
         DataSourceExec: partitions=1, partition_sizes=[0]
     "#
@@ -449,7 +449,7 @@ async fn test_bounded_window_agg_no_sort_requirement() -> Result<()> {
     assert_snapshot!(
         actual,
         @r#"
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       DataSourceExec: partitions=1, partition_sizes=[0]
     "#
     );
diff --git a/datafusion/core/tests/sql/explain_analyze.rs b/datafusion/core/tests/sql/explain_analyze.rs
index e082cabaadaf..43f79ead0257 100644
--- a/datafusion/core/tests/sql/explain_analyze.rs
+++ b/datafusion/core/tests/sql/explain_analyze.rs
@@ -22,6 +22,7 @@ use rstest::rstest;
 use datafusion::config::ConfigOptions;
 use datafusion::physical_plan::display::DisplayableExecutionPlan;
 use datafusion::physical_plan::metrics::Timestamp;
+use datafusion_common::format::ExplainAnalyzeLevel;
 use object_store::path::Path;
 
 #[tokio::test]
@@ -62,36 +63,59 @@ async fn explain_analyze_baseline_metrics() {
         "AggregateExec: mode=Partial, gby=[]",
         "metrics=[output_rows=3, elapsed_compute="
     );
+    assert_metrics!(
+        &formatted,
+        "AggregateExec: mode=Partial, gby=[]",
+        "output_bytes="
+    );
     assert_metrics!(
         &formatted,
         "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1]",
         "metrics=[output_rows=5, elapsed_compute="
     );
+    assert_metrics!(
+        &formatted,
+        "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1]",
+        "output_bytes="
+    );
     assert_metrics!(
         &formatted,
         "FilterExec: c13@1 != C2GT5KVyOPZpgKVl110TyZO0NcJ434",
         "metrics=[output_rows=99, elapsed_compute="
     );
+    assert_metrics!(
+        &formatted,
+        "FilterExec: c13@1 != C2GT5KVyOPZpgKVl110TyZO0NcJ434",
+        "output_bytes="
+    );
     assert_metrics!(
         &formatted,
         "ProjectionExec: expr=[]",
         "metrics=[output_rows=5, elapsed_compute="
     );
+    assert_metrics!(&formatted, "ProjectionExec: expr=[]", "output_bytes=");
     assert_metrics!(
         &formatted,
         "CoalesceBatchesExec: target_batch_size=4096",
         "metrics=[output_rows=5, elapsed_compute"
     );
+    assert_metrics!(
+        &formatted,
+        "CoalesceBatchesExec: target_batch_size=4096",
+        "output_bytes="
+    );
     assert_metrics!(
         &formatted,
         "UnionExec",
         "metrics=[output_rows=3, elapsed_compute="
     );
+    assert_metrics!(&formatted, "UnionExec", "output_bytes=");
     assert_metrics!(
         &formatted,
         "WindowAggExec",
         "metrics=[output_rows=1, elapsed_compute="
     );
+    assert_metrics!(&formatted, "WindowAggExec", "output_bytes=");
 
     fn expected_to_have_metrics(plan: &dyn ExecutionPlan) -> bool {
         use datafusion::physical_plan;
@@ -158,6 +182,81 @@ async fn explain_analyze_baseline_metrics() {
 fn nanos_from_timestamp(ts: &Timestamp) -> i64 {
     ts.value().unwrap().timestamp_nanos_opt().unwrap()
 }
+
+// Test different detail level for config `datafusion.explain.analyze_level`
+
+async fn collect_plan_with_context(
+    sql_str: &str,
+    ctx: &SessionContext,
+    level: ExplainAnalyzeLevel,
+) -> String {
+    {
+        let state = ctx.state_ref();
+        let mut state = state.write();
+        state.config_mut().options_mut().explain.analyze_level = level;
+    }
+    let dataframe = ctx.sql(sql_str).await.unwrap();
+    let batches = dataframe.collect().await.unwrap();
+    arrow::util::pretty::pretty_format_batches(&batches)
+        .unwrap()
+        .to_string()
+}
+
+async fn collect_plan(sql_str: &str, level: ExplainAnalyzeLevel) -> String {
+    let ctx = SessionContext::new();
+    collect_plan_with_context(sql_str, &ctx, level).await
+}
+
+#[tokio::test]
+async fn explain_analyze_level() {
+    let sql = "EXPLAIN ANALYZE \
+            SELECT * \
+            FROM generate_series(10) as t1(v1) \
+            ORDER BY v1 DESC";
+
+    for (level, needle, should_contain) in [
+        (ExplainAnalyzeLevel::Summary, "spill_count", false),
+        (ExplainAnalyzeLevel::Summary, "output_rows", true),
+        (ExplainAnalyzeLevel::Dev, "spill_count", true),
+        (ExplainAnalyzeLevel::Dev, "output_rows", true),
+    ] {
+        let plan = collect_plan(sql, level).await;
+        assert_eq!(
+            plan.contains(needle),
+            should_contain,
+            "plan for level {level:?} unexpected content: {plan}"
+        );
+    }
+}
+
+#[tokio::test]
+async fn explain_analyze_level_datasource_parquet() {
+    let table_name = "tpch_lineitem_small";
+    let parquet_path = "tests/data/tpch_lineitem_small.parquet";
+    let sql = format!("EXPLAIN ANALYZE SELECT * FROM {table_name}");
+
+    // Register test parquet file into context
+    let ctx = SessionContext::new();
+    ctx.register_parquet(table_name, parquet_path, ParquetReadOptions::default())
+        .await
+        .expect("register parquet table for explain analyze test");
+
+    for (level, needle, should_contain) in [
+        (ExplainAnalyzeLevel::Summary, "metadata_load_time", true),
+        (ExplainAnalyzeLevel::Summary, "page_index_eval_time", false),
+        (ExplainAnalyzeLevel::Dev, "metadata_load_time", true),
+        (ExplainAnalyzeLevel::Dev, "page_index_eval_time", true),
+    ] {
+        let plan = collect_plan_with_context(&sql, &ctx, level).await;
+
+        assert_eq!(
+            plan.contains(needle),
+            should_contain,
+            "plan for level {level:?} unexpected content: {plan}"
+        );
+    }
+}
+
 #[tokio::test]
 async fn csv_explain_plans() {
     // This test verify the look of each plan in its full cycle plan creation
diff --git a/datafusion/core/tests/sql/select.rs b/datafusion/core/tests/sql/select.rs
index 98c3e3ccee8a..8a0f62062738 100644
--- a/datafusion/core/tests/sql/select.rs
+++ b/datafusion/core/tests/sql/select.rs
@@ -15,8 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::collections::HashMap;
+
 use super::*;
-use datafusion_common::ScalarValue;
+use datafusion::assert_batches_eq;
+use datafusion_common::{metadata::ScalarAndMetadata, ParamValues, ScalarValue};
 use insta::assert_snapshot;
 
 #[tokio::test]
@@ -219,11 +222,11 @@ async fn test_parameter_invalid_types() -> Result<()> {
         .collect()
         .await;
     assert_snapshot!(results.unwrap_err().strip_backtrace(),
-        @r#"
-    type_coercion
-    caused by
-    Error during planning: Cannot infer common argument type for comparison operation List(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) = Int32
-    "#);
+        @r"
+        type_coercion
+        caused by
+        Error during planning: Cannot infer common argument type for comparison operation List(nullable Int32) = Int32
+        ");
     Ok(())
 }
 
@@ -317,6 +320,53 @@ async fn test_named_parameter_not_bound() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn test_query_parameters_with_metadata() -> Result<()> {
+    let ctx = SessionContext::new();
+
+    let df = ctx.sql("SELECT $1, $2").await.unwrap();
+
+    let metadata1 = HashMap::from([("some_key".to_string(), "some_value".to_string())]);
+    let metadata2 =
+        HashMap::from([("some_other_key".to_string(), "some_other_value".to_string())]);
+
+    let df_with_params_replaced = df
+        .with_param_values(ParamValues::List(vec![
+            ScalarAndMetadata::new(
+                ScalarValue::UInt32(Some(1)),
+                Some(metadata1.clone().into()),
+            ),
+            ScalarAndMetadata::new(
+                ScalarValue::Utf8(Some("two".to_string())),
+                Some(metadata2.clone().into()),
+            ),
+        ]))
+        .unwrap();
+
+    // df_with_params_replaced.schema() is not correct here
+    // https://github.com/apache/datafusion/issues/18102
+    let batches = df_with_params_replaced.clone().collect().await.unwrap();
+    let schema = batches[0].schema();
+
+    assert_eq!(schema.field(0).data_type(), &DataType::UInt32);
+    assert_eq!(schema.field(0).metadata(), &metadata1);
+    assert_eq!(schema.field(1).data_type(), &DataType::Utf8);
+    assert_eq!(schema.field(1).metadata(), &metadata2);
+
+    assert_batches_eq!(
+        [
+            "+----+-----+",
+            "| $1 | $2  |",
+            "+----+-----+",
+            "| 1  | two |",
+            "+----+-----+",
+        ],
+        &batches
+    );
+
+    Ok(())
+}
+
 #[tokio::test]
 async fn test_version_function() {
     let expected_version = format!(
diff --git a/datafusion/core/tests/user_defined/insert_operation.rs b/datafusion/core/tests/user_defined/insert_operation.rs
index c8a4279a4211..e0a3e98604ae 100644
--- a/datafusion/core/tests/user_defined/insert_operation.rs
+++ b/datafusion/core/tests/user_defined/insert_operation.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::{any::Any, sync::Arc};
+use std::{any::Any, str::FromStr, sync::Arc};
 
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use async_trait::async_trait;
@@ -24,6 +24,7 @@ use datafusion::{
     prelude::{SessionConfig, SessionContext},
 };
 use datafusion_catalog::{Session, TableProvider};
+use datafusion_common::config::Dialect;
 use datafusion_expr::{dml::InsertOp, Expr, TableType};
 use datafusion_physical_expr::{EquivalenceProperties, Partitioning};
 use datafusion_physical_plan::execution_plan::SchedulingType;
@@ -63,7 +64,7 @@ async fn assert_insert_op(ctx: &SessionContext, sql: &str, insert_op: InsertOp)
 fn session_ctx_with_dialect(dialect: impl Into<String>) -> SessionContext {
     let mut config = SessionConfig::new();
     let options = config.options_mut();
-    options.sql_parser.dialect = dialect.into();
+    options.sql_parser.dialect = Dialect::from_str(&dialect.into()).unwrap();
     SessionContext::new_with_config(config)
 }
 
diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
index f1af66de9b59..fb1371da6ceb 100644
--- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
+++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
@@ -34,13 +34,13 @@ use datafusion::execution::context::{FunctionFactory, RegisterFunction, SessionS
 use datafusion::prelude::*;
 use datafusion::{execution::registry::FunctionRegistry, test_util};
 use datafusion_common::cast::{as_float64_array, as_int32_array};
+use datafusion_common::metadata::FieldMetadata;
 use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_common::utils::take_function_args;
 use datafusion_common::{
     assert_batches_eq, assert_batches_sorted_eq, assert_contains, exec_datafusion_err,
     exec_err, not_impl_err, plan_err, DFSchema, DataFusionError, Result, ScalarValue,
 };
-use datafusion_expr::expr::FieldMetadata;
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
 use datafusion_expr::{
     lit_with_metadata, Accumulator, ColumnarValue, CreateFunction, CreateFunctionBody,
diff --git a/datafusion/datasource-arrow/Cargo.toml b/datafusion/datasource-arrow/Cargo.toml
new file mode 100644
index 000000000000..b3d1e3f2accc
--- /dev/null
+++ b/datafusion/datasource-arrow/Cargo.toml
@@ -0,0 +1,64 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "datafusion-datasource-arrow"
+description = "datafusion-datasource-arrow"
+readme = "README.md"
+authors.workspace = true
+edition.workspace = true
+homepage.workspace = true
+license.workspace = true
+repository.workspace = true
+rust-version.workspace = true
+version.workspace = true
+
+[package.metadata.docs.rs]
+all-features = true
+
+[dependencies]
+arrow = { workspace = true }
+arrow-ipc = { workspace = true }
+async-trait = { workspace = true }
+bytes = { workspace = true }
+datafusion-common = { workspace = true, features = ["object_store"] }
+datafusion-common-runtime = { workspace = true }
+datafusion-datasource = { workspace = true }
+datafusion-execution = { workspace = true }
+datafusion-expr = { workspace = true }
+datafusion-physical-expr-common = { workspace = true }
+datafusion-physical-plan = { workspace = true }
+datafusion-session = { workspace = true }
+futures = { workspace = true }
+itertools = { workspace = true }
+object_store = { workspace = true }
+tokio = { workspace = true }
+
+[dev-dependencies]
+chrono = { workspace = true }
+
+[lints]
+workspace = true
+
+[lib]
+name = "datafusion_datasource_arrow"
+path = "src/mod.rs"
+
+[features]
+compression = [
+    "arrow-ipc/zstd",
+]
diff --git a/datafusion/datasource-arrow/LICENSE.txt b/datafusion/datasource-arrow/LICENSE.txt
new file mode 100644
index 000000000000..d74c6b599d2a
--- /dev/null
+++ b/datafusion/datasource-arrow/LICENSE.txt
@@ -0,0 +1,212 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+This project includes code from Apache Aurora.
+
+* dev/release/{release,changelog,release-candidate} are based on the scripts from
+  Apache Aurora
+
+Copyright: 2016 The Apache Software Foundation.
+Home page: https://aurora.apache.org/
+License: http://www.apache.org/licenses/LICENSE-2.0
diff --git a/datafusion/datasource-arrow/NOTICE.txt b/datafusion/datasource-arrow/NOTICE.txt
new file mode 100644
index 000000000000..7f3c80d606c0
--- /dev/null
+++ b/datafusion/datasource-arrow/NOTICE.txt
@@ -0,0 +1,5 @@
+Apache DataFusion
+Copyright 2019-2025 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
diff --git a/datafusion/datasource-arrow/README.md b/datafusion/datasource-arrow/README.md
new file mode 100644
index 000000000000..9901b52105dd
--- /dev/null
+++ b/datafusion/datasource-arrow/README.md
@@ -0,0 +1,34 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Apache DataFusion Arrow DataSource
+
+[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format.
+
+This crate is a submodule of DataFusion that defines a Arrow based file source.
+It works with files following the [Arrow IPC format].
+
+Most projects should use the [`datafusion`] crate directly, which re-exports
+this module. If you are already using the [`datafusion`] crate, there is no
+reason to use this crate directly in your project as well.
+
+[apache arrow]: https://arrow.apache.org/
+[apache datafusion]: https://datafusion.apache.org/
+[`datafusion`]: https://crates.io/crates/datafusion
+[arrow ipc format]: https://arrow.apache.org/docs/format/Columnar.html#ipc-file-format
diff --git a/datafusion/datasource-arrow/src/file_format.rs b/datafusion/datasource-arrow/src/file_format.rs
new file mode 100644
index 000000000000..3b8564080421
--- /dev/null
+++ b/datafusion/datasource-arrow/src/file_format.rs
@@ -0,0 +1,603 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`ArrowFormat`]: Apache Arrow [`FileFormat`] abstractions
+//!
+//! Works with files following the [Arrow IPC format](https://arrow.apache.org/docs/format/Columnar.html#ipc-file-format)
+
+use std::any::Any;
+use std::borrow::Cow;
+use std::collections::HashMap;
+use std::fmt::{self, Debug};
+use std::sync::Arc;
+
+use arrow::datatypes::{Schema, SchemaRef};
+use arrow::error::ArrowError;
+use arrow::ipc::convert::fb_to_schema;
+use arrow::ipc::reader::FileReader;
+use arrow::ipc::writer::IpcWriteOptions;
+use arrow::ipc::{root_as_message, CompressionType};
+use datafusion_common::error::Result;
+use datafusion_common::parsers::CompressionTypeVariant;
+use datafusion_common::{
+    internal_datafusion_err, not_impl_err, DataFusionError, GetExt, Statistics,
+    DEFAULT_ARROW_EXTENSION,
+};
+use datafusion_common_runtime::{JoinSet, SpawnedTask};
+use datafusion_datasource::display::FileGroupDisplay;
+use datafusion_datasource::file::FileSource;
+use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder};
+use datafusion_datasource::sink::{DataSink, DataSinkExec};
+use datafusion_datasource::write::{
+    get_writer_schema, ObjectWriterBuilder, SharedBuffer,
+};
+use datafusion_execution::{SendableRecordBatchStream, TaskContext};
+use datafusion_expr::dml::InsertOp;
+use datafusion_physical_expr_common::sort_expr::LexRequirement;
+
+use crate::source::ArrowSource;
+use async_trait::async_trait;
+use bytes::Bytes;
+use datafusion_datasource::file_compression_type::FileCompressionType;
+use datafusion_datasource::file_format::{FileFormat, FileFormatFactory};
+use datafusion_datasource::file_sink_config::{FileSink, FileSinkConfig};
+use datafusion_datasource::source::DataSourceExec;
+use datafusion_datasource::write::demux::DemuxedStreamReceiver;
+use datafusion_physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan};
+use datafusion_session::Session;
+use futures::stream::BoxStream;
+use futures::StreamExt;
+use object_store::{GetResultPayload, ObjectMeta, ObjectStore};
+use tokio::io::AsyncWriteExt;
+
+/// Initial writing buffer size. Note this is just a size hint for efficiency. It
+/// will grow beyond the set value if needed.
+const INITIAL_BUFFER_BYTES: usize = 1048576;
+
+/// If the buffered Arrow data exceeds this size, it is flushed to object store
+const BUFFER_FLUSH_BYTES: usize = 1024000;
+
+#[derive(Default, Debug)]
+/// Factory struct used to create [ArrowFormat]
+pub struct ArrowFormatFactory;
+
+impl ArrowFormatFactory {
+    /// Creates an instance of [ArrowFormatFactory]
+    pub fn new() -> Self {
+        Self {}
+    }
+}
+
+impl FileFormatFactory for ArrowFormatFactory {
+    fn create(
+        &self,
+        _state: &dyn Session,
+        _format_options: &HashMap<String, String>,
+    ) -> Result<Arc<dyn FileFormat>> {
+        Ok(Arc::new(ArrowFormat))
+    }
+
+    fn default(&self) -> Arc<dyn FileFormat> {
+        Arc::new(ArrowFormat)
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+}
+
+impl GetExt for ArrowFormatFactory {
+    fn get_ext(&self) -> String {
+        // Removes the dot, i.e. ".parquet" -> "parquet"
+        DEFAULT_ARROW_EXTENSION[1..].to_string()
+    }
+}
+
+/// Arrow `FileFormat` implementation.
+#[derive(Default, Debug)]
+pub struct ArrowFormat;
+
+#[async_trait]
+impl FileFormat for ArrowFormat {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn get_ext(&self) -> String {
+        ArrowFormatFactory::new().get_ext()
+    }
+
+    fn get_ext_with_compression(
+        &self,
+        file_compression_type: &FileCompressionType,
+    ) -> Result<String> {
+        let ext = self.get_ext();
+        match file_compression_type.get_variant() {
+            CompressionTypeVariant::UNCOMPRESSED => Ok(ext),
+            _ => Err(internal_datafusion_err!(
+                "Arrow FileFormat does not support compression."
+            )),
+        }
+    }
+
+    fn compression_type(&self) -> Option<FileCompressionType> {
+        None
+    }
+
+    async fn infer_schema(
+        &self,
+        _state: &dyn Session,
+        store: &Arc<dyn ObjectStore>,
+        objects: &[ObjectMeta],
+    ) -> Result<SchemaRef> {
+        let mut schemas = vec![];
+        for object in objects {
+            let r = store.as_ref().get(&object.location).await?;
+            let schema = match r.payload {
+                #[cfg(not(target_arch = "wasm32"))]
+                GetResultPayload::File(mut file, _) => {
+                    let reader = FileReader::try_new(&mut file, None)?;
+                    reader.schema()
+                }
+                GetResultPayload::Stream(stream) => {
+                    infer_schema_from_file_stream(stream).await?
+                }
+            };
+            schemas.push(schema.as_ref().clone());
+        }
+        let merged_schema = Schema::try_merge(schemas)?;
+        Ok(Arc::new(merged_schema))
+    }
+
+    async fn infer_stats(
+        &self,
+        _state: &dyn Session,
+        _store: &Arc<dyn ObjectStore>,
+        table_schema: SchemaRef,
+        _object: &ObjectMeta,
+    ) -> Result<Statistics> {
+        Ok(Statistics::new_unknown(&table_schema))
+    }
+
+    async fn create_physical_plan(
+        &self,
+        _state: &dyn Session,
+        conf: FileScanConfig,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let source = Arc::new(ArrowSource::default());
+        let config = FileScanConfigBuilder::from(conf)
+            .with_source(source)
+            .build();
+
+        Ok(DataSourceExec::from_data_source(config))
+    }
+
+    async fn create_writer_physical_plan(
+        &self,
+        input: Arc<dyn ExecutionPlan>,
+        _state: &dyn Session,
+        conf: FileSinkConfig,
+        order_requirements: Option<LexRequirement>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        if conf.insert_op != InsertOp::Append {
+            return not_impl_err!("Overwrites are not implemented yet for Arrow format");
+        }
+
+        let sink = Arc::new(ArrowFileSink::new(conf));
+
+        Ok(Arc::new(DataSinkExec::new(input, sink, order_requirements)) as _)
+    }
+
+    fn file_source(&self) -> Arc<dyn FileSource> {
+        Arc::new(ArrowSource::default())
+    }
+}
+
+/// Implements [`FileSink`] for writing to arrow_ipc files
+struct ArrowFileSink {
+    config: FileSinkConfig,
+}
+
+impl ArrowFileSink {
+    fn new(config: FileSinkConfig) -> Self {
+        Self { config }
+    }
+}
+
+#[async_trait]
+impl FileSink for ArrowFileSink {
+    fn config(&self) -> &FileSinkConfig {
+        &self.config
+    }
+
+    async fn spawn_writer_tasks_and_join(
+        &self,
+        context: &Arc<TaskContext>,
+        demux_task: SpawnedTask<Result<()>>,
+        mut file_stream_rx: DemuxedStreamReceiver,
+        object_store: Arc<dyn ObjectStore>,
+    ) -> Result<u64> {
+        let mut file_write_tasks: JoinSet<std::result::Result<usize, DataFusionError>> =
+            JoinSet::new();
+
+        let ipc_options =
+            IpcWriteOptions::try_new(64, false, arrow_ipc::MetadataVersion::V5)?
+                .try_with_compression(Some(CompressionType::LZ4_FRAME))?;
+        while let Some((path, mut rx)) = file_stream_rx.recv().await {
+            let shared_buffer = SharedBuffer::new(INITIAL_BUFFER_BYTES);
+            let mut arrow_writer = arrow_ipc::writer::FileWriter::try_new_with_options(
+                shared_buffer.clone(),
+                &get_writer_schema(&self.config),
+                ipc_options.clone(),
+            )?;
+            let mut object_store_writer = ObjectWriterBuilder::new(
+                FileCompressionType::UNCOMPRESSED,
+                &path,
+                Arc::clone(&object_store),
+            )
+            .with_buffer_size(Some(
+                context
+                    .session_config()
+                    .options()
+                    .execution
+                    .objectstore_writer_buffer_size,
+            ))
+            .build()?;
+            file_write_tasks.spawn(async move {
+                let mut row_count = 0;
+                while let Some(batch) = rx.recv().await {
+                    row_count += batch.num_rows();
+                    arrow_writer.write(&batch)?;
+                    let mut buff_to_flush = shared_buffer.buffer.try_lock().unwrap();
+                    if buff_to_flush.len() > BUFFER_FLUSH_BYTES {
+                        object_store_writer
+                            .write_all(buff_to_flush.as_slice())
+                            .await?;
+                        buff_to_flush.clear();
+                    }
+                }
+                arrow_writer.finish()?;
+                let final_buff = shared_buffer.buffer.try_lock().unwrap();
+
+                object_store_writer.write_all(final_buff.as_slice()).await?;
+                object_store_writer.shutdown().await?;
+                Ok(row_count)
+            });
+        }
+
+        let mut row_count = 0;
+        while let Some(result) = file_write_tasks.join_next().await {
+            match result {
+                Ok(r) => {
+                    row_count += r?;
+                }
+                Err(e) => {
+                    if e.is_panic() {
+                        std::panic::resume_unwind(e.into_panic());
+                    } else {
+                        unreachable!();
+                    }
+                }
+            }
+        }
+
+        demux_task
+            .join_unwind()
+            .await
+            .map_err(|e| DataFusionError::ExecutionJoin(Box::new(e)))??;
+        Ok(row_count as u64)
+    }
+}
+
+impl Debug for ArrowFileSink {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("ArrowFileSink").finish()
+    }
+}
+
+impl DisplayAs for ArrowFileSink {
+    fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match t {
+            DisplayFormatType::Default | DisplayFormatType::Verbose => {
+                write!(f, "ArrowFileSink(file_groups=",)?;
+                FileGroupDisplay(&self.config.file_group).fmt_as(t, f)?;
+                write!(f, ")")
+            }
+            DisplayFormatType::TreeRender => {
+                writeln!(f, "format: arrow")?;
+                write!(f, "file={}", &self.config.original_url)
+            }
+        }
+    }
+}
+
+#[async_trait]
+impl DataSink for ArrowFileSink {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> &SchemaRef {
+        self.config.output_schema()
+    }
+
+    async fn write_all(
+        &self,
+        data: SendableRecordBatchStream,
+        context: &Arc<TaskContext>,
+    ) -> Result<u64> {
+        FileSink::write_all(self, data, context).await
+    }
+}
+
+const ARROW_MAGIC: [u8; 6] = [b'A', b'R', b'R', b'O', b'W', b'1'];
+const CONTINUATION_MARKER: [u8; 4] = [0xff; 4];
+
+/// Custom implementation of inferring schema. Should eventually be moved upstream to arrow-rs.
+/// See <https://github.com/apache/arrow-rs/issues/5021>
+async fn infer_schema_from_file_stream(
+    mut stream: BoxStream<'static, object_store::Result<Bytes>>,
+) -> Result<SchemaRef> {
+    // Expected format:
+    // <magic number "ARROW1"> - 6 bytes
+    // <empty padding bytes [to 8 byte boundary]> - 2 bytes
+    // <continuation: 0xFFFFFFFF> - 4 bytes, not present below v0.15.0
+    // <metadata_size: int32> - 4 bytes
+    // <metadata_flatbuffer: bytes>
+    // <rest of file bytes>
+
+    // So in first read we need at least all known sized sections,
+    // which is 6 + 2 + 4 + 4 = 16 bytes.
+    let bytes = collect_at_least_n_bytes(&mut stream, 16, None).await?;
+
+    // Files should start with these magic bytes
+    if bytes[0..6] != ARROW_MAGIC {
+        return Err(ArrowError::ParseError(
+            "Arrow file does not contain correct header".to_string(),
+        ))?;
+    }
+
+    // Since continuation marker bytes added in later versions
+    let (meta_len, rest_of_bytes_start_index) = if bytes[8..12] == CONTINUATION_MARKER {
+        (&bytes[12..16], 16)
+    } else {
+        (&bytes[8..12], 12)
+    };
+
+    let meta_len = [meta_len[0], meta_len[1], meta_len[2], meta_len[3]];
+    let meta_len = i32::from_le_bytes(meta_len);
+
+    // Read bytes for Schema message
+    let block_data = if bytes[rest_of_bytes_start_index..].len() < meta_len as usize {
+        // Need to read more bytes to decode Message
+        let mut block_data = Vec::with_capacity(meta_len as usize);
+        // In case we had some spare bytes in our initial read chunk
+        block_data.extend_from_slice(&bytes[rest_of_bytes_start_index..]);
+        let size_to_read = meta_len as usize - block_data.len();
+        let block_data =
+            collect_at_least_n_bytes(&mut stream, size_to_read, Some(block_data)).await?;
+        Cow::Owned(block_data)
+    } else {
+        // Already have the bytes we need
+        let end_index = meta_len as usize + rest_of_bytes_start_index;
+        let block_data = &bytes[rest_of_bytes_start_index..end_index];
+        Cow::Borrowed(block_data)
+    };
+
+    // Decode Schema message
+    let message = root_as_message(&block_data).map_err(|err| {
+        ArrowError::ParseError(format!("Unable to read IPC message as metadata: {err:?}"))
+    })?;
+    let ipc_schema = message.header_as_schema().ok_or_else(|| {
+        ArrowError::IpcError("Unable to read IPC message as schema".to_string())
+    })?;
+    let schema = fb_to_schema(ipc_schema);
+
+    Ok(Arc::new(schema))
+}
+
+async fn collect_at_least_n_bytes(
+    stream: &mut BoxStream<'static, object_store::Result<Bytes>>,
+    n: usize,
+    extend_from: Option<Vec<u8>>,
+) -> Result<Vec<u8>> {
+    let mut buf = extend_from.unwrap_or_else(|| Vec::with_capacity(n));
+    // If extending existing buffer then ensure we read n additional bytes
+    let n = n + buf.len();
+    while let Some(bytes) = stream.next().await.transpose()? {
+        buf.extend_from_slice(&bytes);
+        if buf.len() >= n {
+            break;
+        }
+    }
+    if buf.len() < n {
+        return Err(ArrowError::ParseError(
+            "Unexpected end of byte stream for Arrow IPC file".to_string(),
+        ))?;
+    }
+    Ok(buf)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use chrono::DateTime;
+    use datafusion_common::config::TableOptions;
+    use datafusion_common::DFSchema;
+    use datafusion_execution::config::SessionConfig;
+    use datafusion_execution::runtime_env::RuntimeEnv;
+    use datafusion_expr::execution_props::ExecutionProps;
+    use datafusion_expr::{AggregateUDF, Expr, LogicalPlan, ScalarUDF, WindowUDF};
+    use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+    use object_store::{chunked::ChunkedStore, memory::InMemory, path::Path};
+
+    struct MockSession {
+        config: SessionConfig,
+        runtime_env: Arc<RuntimeEnv>,
+    }
+
+    impl MockSession {
+        fn new() -> Self {
+            Self {
+                config: SessionConfig::new(),
+                runtime_env: Arc::new(RuntimeEnv::default()),
+            }
+        }
+    }
+
+    #[async_trait::async_trait]
+    impl Session for MockSession {
+        fn session_id(&self) -> &str {
+            unimplemented!()
+        }
+
+        fn config(&self) -> &SessionConfig {
+            &self.config
+        }
+
+        async fn create_physical_plan(
+            &self,
+            _logical_plan: &LogicalPlan,
+        ) -> Result<Arc<dyn ExecutionPlan>> {
+            unimplemented!()
+        }
+
+        fn create_physical_expr(
+            &self,
+            _expr: Expr,
+            _df_schema: &DFSchema,
+        ) -> Result<Arc<dyn PhysicalExpr>> {
+            unimplemented!()
+        }
+
+        fn scalar_functions(&self) -> &HashMap<String, Arc<ScalarUDF>> {
+            unimplemented!()
+        }
+
+        fn aggregate_functions(&self) -> &HashMap<String, Arc<AggregateUDF>> {
+            unimplemented!()
+        }
+
+        fn window_functions(&self) -> &HashMap<String, Arc<WindowUDF>> {
+            unimplemented!()
+        }
+
+        fn runtime_env(&self) -> &Arc<RuntimeEnv> {
+            &self.runtime_env
+        }
+
+        fn execution_props(&self) -> &ExecutionProps {
+            unimplemented!()
+        }
+
+        fn as_any(&self) -> &dyn Any {
+            unimplemented!()
+        }
+
+        fn table_options(&self) -> &TableOptions {
+            unimplemented!()
+        }
+
+        fn table_options_mut(&mut self) -> &mut TableOptions {
+            unimplemented!()
+        }
+
+        fn task_ctx(&self) -> Arc<TaskContext> {
+            unimplemented!()
+        }
+    }
+
+    #[tokio::test]
+    async fn test_infer_schema_stream() -> Result<()> {
+        let mut bytes = std::fs::read("tests/data/example.arrow")?;
+        bytes.truncate(bytes.len() - 20); // mangle end to show we don't need to read whole file
+        let location = Path::parse("example.arrow")?;
+        let in_memory_store: Arc<dyn ObjectStore> = Arc::new(InMemory::new());
+        in_memory_store.put(&location, bytes.into()).await?;
+
+        let state = MockSession::new();
+        let object_meta = ObjectMeta {
+            location,
+            last_modified: DateTime::default(),
+            size: u64::MAX,
+            e_tag: None,
+            version: None,
+        };
+
+        let arrow_format = ArrowFormat {};
+        let expected = vec!["f0: Int64", "f1: Utf8", "f2: Boolean"];
+
+        // Test chunk sizes where too small so we keep having to read more bytes
+        // And when large enough that first read contains all we need
+        for chunk_size in [7, 3000] {
+            let store = Arc::new(ChunkedStore::new(in_memory_store.clone(), chunk_size));
+            let inferred_schema = arrow_format
+                .infer_schema(
+                    &state,
+                    &(store.clone() as Arc<dyn ObjectStore>),
+                    std::slice::from_ref(&object_meta),
+                )
+                .await?;
+            let actual_fields = inferred_schema
+                .fields()
+                .iter()
+                .map(|f| format!("{}: {:?}", f.name(), f.data_type()))
+                .collect::<Vec<_>>();
+            assert_eq!(expected, actual_fields);
+        }
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_infer_schema_short_stream() -> Result<()> {
+        let mut bytes = std::fs::read("tests/data/example.arrow")?;
+        bytes.truncate(20); // should cause error that file shorter than expected
+        let location = Path::parse("example.arrow")?;
+        let in_memory_store: Arc<dyn ObjectStore> = Arc::new(InMemory::new());
+        in_memory_store.put(&location, bytes.into()).await?;
+
+        let state = MockSession::new();
+        let object_meta = ObjectMeta {
+            location,
+            last_modified: DateTime::default(),
+            size: u64::MAX,
+            e_tag: None,
+            version: None,
+        };
+
+        let arrow_format = ArrowFormat {};
+
+        let store = Arc::new(ChunkedStore::new(in_memory_store.clone(), 7));
+        let err = arrow_format
+            .infer_schema(
+                &state,
+                &(store.clone() as Arc<dyn ObjectStore>),
+                std::slice::from_ref(&object_meta),
+            )
+            .await;
+
+        assert!(err.is_err());
+        assert_eq!(
+            "Arrow error: Parser error: Unexpected end of byte stream for Arrow IPC file",
+            err.unwrap_err().to_string().lines().next().unwrap()
+        );
+
+        Ok(())
+    }
+}
diff --git a/datafusion/datasource-arrow/src/mod.rs b/datafusion/datasource-arrow/src/mod.rs
new file mode 100644
index 000000000000..18bb8792c3ff
--- /dev/null
+++ b/datafusion/datasource-arrow/src/mod.rs
@@ -0,0 +1,25 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Make sure fast / cheap clones on Arc are explicit:
+// https://github.com/apache/datafusion/issues/11143
+#![cfg_attr(not(test), deny(clippy::clone_on_ref_ptr))]
+
+pub mod file_format;
+pub mod source;
+
+pub use file_format::*;
diff --git a/datafusion/core/src/datasource/physical_plan/arrow_file.rs b/datafusion/datasource-arrow/src/source.rs
similarity index 98%
rename from datafusion/core/src/datasource/physical_plan/arrow_file.rs
rename to datafusion/datasource-arrow/src/source.rs
index b37dc499d403..f43f11880182 100644
--- a/datafusion/core/src/datasource/physical_plan/arrow_file.rs
+++ b/datafusion/datasource-arrow/src/source.rs
@@ -18,20 +18,21 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use crate::datasource::physical_plan::{FileOpenFuture, FileOpener};
-use crate::error::Result;
 use datafusion_datasource::as_file_source;
 use datafusion_datasource::schema_adapter::SchemaAdapterFactory;
 
 use arrow::buffer::Buffer;
 use arrow::datatypes::SchemaRef;
 use arrow_ipc::reader::FileDecoder;
+use datafusion_common::error::Result;
 use datafusion_common::{exec_datafusion_err, Statistics};
 use datafusion_datasource::file::FileSource;
 use datafusion_datasource::file_scan_config::FileScanConfig;
 use datafusion_datasource::PartitionedFile;
 use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
 
+use datafusion_datasource::file_stream::FileOpenFuture;
+use datafusion_datasource::file_stream::FileOpener;
 use futures::StreamExt;
 use itertools::Itertools;
 use object_store::{GetOptions, GetRange, GetResultPayload, ObjectStore};
diff --git a/datafusion/core/tests/data/example.arrow b/datafusion/datasource-arrow/tests/data/example.arrow
similarity index 100%
rename from datafusion/core/tests/data/example.arrow
rename to datafusion/datasource-arrow/tests/data/example.arrow
diff --git a/datafusion/datasource-parquet/src/file_format.rs b/datafusion/datasource-parquet/src/file_format.rs
index 963c1d77950c..f27bda387fda 100644
--- a/datafusion/datasource-parquet/src/file_format.rs
+++ b/datafusion/datasource-parquet/src/file_format.rs
@@ -38,8 +38,6 @@ use datafusion_datasource::write::demux::DemuxedStreamReceiver;
 
 use arrow::datatypes::{DataType, Field, FieldRef};
 use datafusion_common::config::{ConfigField, ConfigFileType, TableParquetOptions};
-#[cfg(feature = "parquet_encryption")]
-use datafusion_common::encryption::map_config_decryption_to_decryption;
 use datafusion_common::encryption::FileDecryptionProperties;
 use datafusion_common::parsers::CompressionTypeVariant;
 use datafusion_common::{
@@ -59,11 +57,13 @@ use datafusion_physical_expr_common::sort_expr::LexRequirement;
 use datafusion_physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan};
 use datafusion_session::Session;
 
+use crate::metadata::DFParquetMetadata;
 use crate::reader::CachedParquetFileReaderFactory;
 use crate::source::{parse_coerce_int96_string, ParquetSource};
 use async_trait::async_trait;
 use bytes::Bytes;
 use datafusion_datasource::source::DataSourceExec;
+use datafusion_execution::cache::cache_manager::FileMetadataCache;
 use datafusion_execution::runtime_env::RuntimeEnv;
 use futures::future::BoxFuture;
 use futures::{FutureExt, StreamExt, TryStreamExt};
@@ -77,14 +77,12 @@ use parquet::arrow::arrow_writer::{
 use parquet::arrow::async_reader::MetadataFetch;
 use parquet::arrow::{ArrowWriter, AsyncArrowWriter};
 use parquet::basic::Type;
-
-use crate::metadata::DFParquetMetadata;
-use datafusion_execution::cache::cache_manager::FileMetadataCache;
+#[cfg(feature = "parquet_encryption")]
+use parquet::encryption::encrypt::FileEncryptionProperties;
 use parquet::errors::ParquetError;
 use parquet::file::metadata::ParquetMetaData;
 use parquet::file::properties::{WriterProperties, WriterPropertiesBuilder};
 use parquet::file::writer::SerializedFileWriter;
-use parquet::format::FileMetaData;
 use parquet::schema::types::SchemaDescriptor;
 use tokio::io::{AsyncWrite, AsyncWriteExt};
 use tokio::sync::mpsc::{self, Receiver, Sender};
@@ -306,25 +304,23 @@ async fn get_file_decryption_properties(
     state: &dyn Session,
     options: &TableParquetOptions,
     file_path: &Path,
-) -> Result<Option<FileDecryptionProperties>> {
-    let file_decryption_properties: Option<FileDecryptionProperties> =
-        match &options.crypto.file_decryption {
-            Some(cfd) => Some(map_config_decryption_to_decryption(cfd)),
-            None => match &options.crypto.factory_id {
-                Some(factory_id) => {
-                    let factory =
-                        state.runtime_env().parquet_encryption_factory(factory_id)?;
-                    factory
-                        .get_file_decryption_properties(
-                            &options.crypto.factory_options,
-                            file_path,
-                        )
-                        .await?
-                }
-                None => None,
-            },
-        };
-    Ok(file_decryption_properties)
+) -> Result<Option<Arc<FileDecryptionProperties>>> {
+    Ok(match &options.crypto.file_decryption {
+        Some(cfd) => Some(Arc::new(FileDecryptionProperties::from(cfd.clone()))),
+        None => match &options.crypto.factory_id {
+            Some(factory_id) => {
+                let factory =
+                    state.runtime_env().parquet_encryption_factory(factory_id)?;
+                factory
+                    .get_file_decryption_properties(
+                        &options.crypto.factory_options,
+                        file_path,
+                    )
+                    .await?
+            }
+            None => None,
+        },
+    })
 }
 
 #[cfg(not(feature = "parquet_encryption"))]
@@ -332,7 +328,7 @@ async fn get_file_decryption_properties(
     _state: &dyn Session,
     _options: &TableParquetOptions,
     _file_path: &Path,
-) -> Result<Option<FileDecryptionProperties>> {
+) -> Result<Option<Arc<FileDecryptionProperties>>> {
     Ok(None)
 }
 
@@ -385,7 +381,7 @@ impl FileFormat for ParquetFormat {
                 .await?;
                 let result = DFParquetMetadata::new(store.as_ref(), object)
                     .with_metadata_size_hint(self.metadata_size_hint())
-                    .with_decryption_properties(file_decryption_properties.as_ref())
+                    .with_decryption_properties(file_decryption_properties)
                     .with_file_metadata_cache(Some(Arc::clone(&file_metadata_cache)))
                     .with_coerce_int96(coerce_int96)
                     .fetch_schema_with_location()
@@ -446,7 +442,7 @@ impl FileFormat for ParquetFormat {
             state.runtime_env().cache_manager.get_file_metadata_cache();
         DFParquetMetadata::new(store, object)
             .with_metadata_size_hint(self.metadata_size_hint())
-            .with_decryption_properties(file_decryption_properties.as_ref())
+            .with_decryption_properties(file_decryption_properties)
             .with_file_metadata_cache(Some(file_metadata_cache))
             .fetch_statistics(&table_schema)
             .await
@@ -1027,9 +1023,10 @@ pub async fn fetch_parquet_metadata(
     store: &dyn ObjectStore,
     object_meta: &ObjectMeta,
     size_hint: Option<usize>,
-    #[allow(unused)] decryption_properties: Option<&FileDecryptionProperties>,
+    decryption_properties: Option<&FileDecryptionProperties>,
     file_metadata_cache: Option<Arc<dyn FileMetadataCache>>,
 ) -> Result<Arc<ParquetMetaData>> {
+    let decryption_properties = decryption_properties.cloned().map(Arc::new);
     DFParquetMetadata::new(store, object_meta)
         .with_metadata_size_hint(size_hint)
         .with_decryption_properties(decryption_properties)
@@ -1053,6 +1050,7 @@ pub async fn fetch_statistics(
     decryption_properties: Option<&FileDecryptionProperties>,
     file_metadata_cache: Option<Arc<dyn FileMetadataCache>>,
 ) -> Result<Statistics> {
+    let decryption_properties = decryption_properties.cloned().map(Arc::new);
     DFParquetMetadata::new(store, file)
         .with_metadata_size_hint(metadata_size_hint)
         .with_decryption_properties(decryption_properties)
@@ -1080,7 +1078,7 @@ pub struct ParquetSink {
     parquet_options: TableParquetOptions,
     /// File metadata from successfully produced parquet files. The Mutex is only used
     /// to allow inserting to HashMap from behind borrowed reference in DataSink::write_all.
-    written: Arc<parking_lot::Mutex<HashMap<Path, FileMetaData>>>,
+    written: Arc<parking_lot::Mutex<HashMap<Path, ParquetMetaData>>>,
 }
 
 impl Debug for ParquetSink {
@@ -1117,7 +1115,7 @@ impl ParquetSink {
 
     /// Retrieve the file metadata for the written files, keyed to the path
     /// which may be partitioned (in the case of hive style partitioning).
-    pub fn written(&self) -> HashMap<Path, FileMetaData> {
+    pub fn written(&self) -> HashMap<Path, ParquetMetaData> {
         self.written.lock().clone()
     }
 
@@ -1141,7 +1139,7 @@ impl ParquetSink {
         builder = set_writer_encryption_properties(
             builder,
             runtime,
-            &parquet_opts,
+            parquet_opts,
             schema,
             path,
         )
@@ -1189,14 +1187,15 @@ impl ParquetSink {
 async fn set_writer_encryption_properties(
     builder: WriterPropertiesBuilder,
     runtime: &Arc<RuntimeEnv>,
-    parquet_opts: &TableParquetOptions,
+    parquet_opts: TableParquetOptions,
     schema: &Arc<Schema>,
     path: &Path,
 ) -> Result<WriterPropertiesBuilder> {
-    if let Some(file_encryption_properties) = &parquet_opts.crypto.file_encryption {
+    if let Some(file_encryption_properties) = parquet_opts.crypto.file_encryption {
         // Encryption properties have been specified directly
-        return Ok(builder
-            .with_file_encryption_properties(file_encryption_properties.clone().into()));
+        return Ok(builder.with_file_encryption_properties(Arc::new(
+            FileEncryptionProperties::from(file_encryption_properties),
+        )));
     } else if let Some(encryption_factory_id) = &parquet_opts.crypto.factory_id.as_ref() {
         // Encryption properties will be generated by an encryption factory
         let encryption_factory =
@@ -1221,7 +1220,7 @@ async fn set_writer_encryption_properties(
 async fn set_writer_encryption_properties(
     builder: WriterPropertiesBuilder,
     _runtime: &Arc<RuntimeEnv>,
-    _parquet_opts: &TableParquetOptions,
+    _parquet_opts: TableParquetOptions,
     _schema: &Arc<Schema>,
     _path: &Path,
 ) -> Result<WriterPropertiesBuilder> {
@@ -1244,7 +1243,7 @@ impl FileSink for ParquetSink {
         let parquet_opts = &self.parquet_options;
 
         let mut file_write_tasks: JoinSet<
-            std::result::Result<(Path, FileMetaData), DataFusionError>,
+            std::result::Result<(Path, ParquetMetaData), DataFusionError>,
         > = JoinSet::new();
 
         let runtime = context.runtime_env();
@@ -1275,11 +1274,11 @@ impl FileSink for ParquetSink {
                         writer.write(&batch).await?;
                         reservation.try_resize(writer.memory_size())?;
                     }
-                    let file_metadata = writer
+                    let parquet_meta_data = writer
                         .close()
                         .await
                         .map_err(|e| DataFusionError::ParquetError(Box::new(e)))?;
-                    Ok((path, file_metadata))
+                    Ok((path, parquet_meta_data))
                 });
             } else {
                 let writer = ObjectWriterBuilder::new(
@@ -1303,7 +1302,7 @@ impl FileSink for ParquetSink {
                 let parallel_options_clone = parallel_options.clone();
                 let pool = Arc::clone(context.memory_pool());
                 file_write_tasks.spawn(async move {
-                    let file_metadata = output_single_parquet_file_parallelized(
+                    let parquet_meta_data = output_single_parquet_file_parallelized(
                         writer,
                         rx,
                         schema,
@@ -1313,7 +1312,7 @@ impl FileSink for ParquetSink {
                         pool,
                     )
                     .await?;
-                    Ok((path, file_metadata))
+                    Ok((path, parquet_meta_data))
                 });
             }
         }
@@ -1322,11 +1321,11 @@ impl FileSink for ParquetSink {
         while let Some(result) = file_write_tasks.join_next().await {
             match result {
                 Ok(r) => {
-                    let (path, file_metadata) = r?;
-                    row_count += file_metadata.num_rows;
+                    let (path, parquet_meta_data) = r?;
+                    row_count += parquet_meta_data.file_metadata().num_rows();
                     let mut written_files = self.written.lock();
                     written_files
-                        .try_insert(path.clone(), file_metadata)
+                        .try_insert(path.clone(), parquet_meta_data)
                         .map_err(|e| internal_datafusion_err!("duplicate entry detected for partitioned file {path}: {e}"))?;
                     drop(written_files);
                 }
@@ -1589,7 +1588,7 @@ async fn concatenate_parallel_row_groups(
     mut serialize_rx: Receiver<SpawnedTask<RBStreamSerializeResult>>,
     mut object_store_writer: Box<dyn AsyncWrite + Send + Unpin>,
     pool: Arc<dyn MemoryPool>,
-) -> Result<FileMetaData> {
+) -> Result<ParquetMetaData> {
     let mut file_reservation =
         MemoryConsumer::new("ParquetSink(SerializedFileWriter)").register(&pool);
 
@@ -1617,14 +1616,14 @@ async fn concatenate_parallel_row_groups(
         rg_out.close()?;
     }
 
-    let file_metadata = parquet_writer.close()?;
+    let parquet_meta_data = parquet_writer.close()?;
     let final_buff = merged_buff.buffer.try_lock().unwrap();
 
     object_store_writer.write_all(final_buff.as_slice()).await?;
     object_store_writer.shutdown().await?;
     file_reservation.free();
 
-    Ok(file_metadata)
+    Ok(parquet_meta_data)
 }
 
 /// Parallelizes the serialization of a single parquet file, by first serializing N
@@ -1639,7 +1638,7 @@ async fn output_single_parquet_file_parallelized(
     skip_arrow_metadata: bool,
     parallel_options: ParallelParquetWriterOptions,
     pool: Arc<dyn MemoryPool>,
-) -> Result<FileMetaData> {
+) -> Result<ParquetMetaData> {
     let max_rowgroups = parallel_options.max_parallel_row_groups;
     // Buffer size of this channel limits maximum number of RowGroups being worked on in parallel
     let (serialize_tx, serialize_rx) =
@@ -1666,7 +1665,7 @@ async fn output_single_parquet_file_parallelized(
         parallel_options,
         Arc::clone(&pool),
     );
-    let file_metadata = concatenate_parallel_row_groups(
+    let parquet_meta_data = concatenate_parallel_row_groups(
         writer,
         merged_buff,
         serialize_rx,
@@ -1679,7 +1678,7 @@ async fn output_single_parquet_file_parallelized(
         .join_unwind()
         .await
         .map_err(|e| DataFusionError::ExecutionJoin(Box::new(e)))??;
-    Ok(file_metadata)
+    Ok(parquet_meta_data)
 }
 
 #[cfg(test)]
diff --git a/datafusion/datasource-parquet/src/metadata.rs b/datafusion/datasource-parquet/src/metadata.rs
index 4de68793ce02..6505a447d7ce 100644
--- a/datafusion/datasource-parquet/src/metadata.rs
+++ b/datafusion/datasource-parquet/src/metadata.rs
@@ -58,7 +58,7 @@ pub struct DFParquetMetadata<'a> {
     store: &'a dyn ObjectStore,
     object_meta: &'a ObjectMeta,
     metadata_size_hint: Option<usize>,
-    decryption_properties: Option<&'a FileDecryptionProperties>,
+    decryption_properties: Option<Arc<FileDecryptionProperties>>,
     file_metadata_cache: Option<Arc<dyn FileMetadataCache>>,
     /// timeunit to coerce INT96 timestamps to
     pub coerce_int96: Option<TimeUnit>,
@@ -85,7 +85,7 @@ impl<'a> DFParquetMetadata<'a> {
     /// set decryption properties
     pub fn with_decryption_properties(
         mut self,
-        decryption_properties: Option<&'a FileDecryptionProperties>,
+        decryption_properties: Option<Arc<FileDecryptionProperties>>,
     ) -> Self {
         self.decryption_properties = decryption_properties;
         self
@@ -145,7 +145,8 @@ impl<'a> DFParquetMetadata<'a> {
 
         #[cfg(feature = "parquet_encryption")]
         if let Some(decryption_properties) = decryption_properties {
-            reader = reader.with_decryption_properties(Some(decryption_properties));
+            reader = reader
+                .with_decryption_properties(Some(Arc::clone(decryption_properties)));
         }
 
         if cache_metadata && file_metadata_cache.is_some() {
@@ -299,7 +300,6 @@ impl<'a> DFParquetMetadata<'a> {
                             summarize_min_max_null_counts(
                                 &mut accumulators,
                                 idx,
-                                num_rows,
                                 &stats_converter,
                                 row_groups_metadata,
                             )
@@ -417,7 +417,6 @@ struct StatisticsAccumulators<'a> {
 fn summarize_min_max_null_counts(
     accumulators: &mut StatisticsAccumulators,
     arrow_schema_index: usize,
-    num_rows: usize,
     stats_converter: &StatisticsConverter,
     row_groups_metadata: &[RowGroupMetaData],
 ) -> Result<()> {
@@ -449,11 +448,14 @@ fn summarize_min_max_null_counts(
         );
     }
 
-    accumulators.null_counts_array[arrow_schema_index] =
-        Precision::Exact(match sum(&null_counts) {
-            Some(null_count) => null_count as usize,
-            None => num_rows,
-        });
+    accumulators.null_counts_array[arrow_schema_index] = match sum(&null_counts) {
+        Some(null_count) => Precision::Exact(null_count as usize),
+        None => match null_counts.len() {
+            // If sum() returned None we either have no rows or all values are null
+            0 => Precision::Exact(0),
+            _ => Precision::Absent,
+        },
+    };
 
     Ok(())
 }
diff --git a/datafusion/datasource-parquet/src/metrics.rs b/datafusion/datasource-parquet/src/metrics.rs
index d75a979d4cad..5f17fbb4b9ee 100644
--- a/datafusion/datasource-parquet/src/metrics.rs
+++ b/datafusion/datasource-parquet/src/metrics.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use datafusion_physical_plan::metrics::{
-    Count, ExecutionPlanMetricsSet, MetricBuilder, Time,
+    Count, ExecutionPlanMetricsSet, MetricBuilder, MetricType, Time,
 };
 
 /// Stores metrics about the parquet execution for a particular parquet file.
@@ -88,30 +88,59 @@ impl ParquetFileMetrics {
         filename: &str,
         metrics: &ExecutionPlanMetricsSet,
     ) -> Self {
-        let predicate_evaluation_errors = MetricBuilder::new(metrics)
-            .with_new_label("filename", filename.to_string())
-            .counter("predicate_evaluation_errors", partition);
-
+        // -----------------------
+        // 'summary' level metrics
+        // -----------------------
         let row_groups_matched_bloom_filter = MetricBuilder::new(metrics)
             .with_new_label("filename", filename.to_string())
+            .with_type(MetricType::SUMMARY)
             .counter("row_groups_matched_bloom_filter", partition);
 
         let row_groups_pruned_bloom_filter = MetricBuilder::new(metrics)
             .with_new_label("filename", filename.to_string())
+            .with_type(MetricType::SUMMARY)
             .counter("row_groups_pruned_bloom_filter", partition);
 
         let row_groups_matched_statistics = MetricBuilder::new(metrics)
             .with_new_label("filename", filename.to_string())
+            .with_type(MetricType::SUMMARY)
             .counter("row_groups_matched_statistics", partition);
 
         let row_groups_pruned_statistics = MetricBuilder::new(metrics)
             .with_new_label("filename", filename.to_string())
+            .with_type(MetricType::SUMMARY)
             .counter("row_groups_pruned_statistics", partition);
 
+        let page_index_rows_pruned = MetricBuilder::new(metrics)
+            .with_new_label("filename", filename.to_string())
+            .with_type(MetricType::SUMMARY)
+            .counter("page_index_rows_pruned", partition);
+        let page_index_rows_matched = MetricBuilder::new(metrics)
+            .with_new_label("filename", filename.to_string())
+            .with_type(MetricType::SUMMARY)
+            .counter("page_index_rows_matched", partition);
+
         let bytes_scanned = MetricBuilder::new(metrics)
             .with_new_label("filename", filename.to_string())
+            .with_type(MetricType::SUMMARY)
             .counter("bytes_scanned", partition);
 
+        let metadata_load_time = MetricBuilder::new(metrics)
+            .with_new_label("filename", filename.to_string())
+            .with_type(MetricType::SUMMARY)
+            .subset_time("metadata_load_time", partition);
+
+        let files_ranges_pruned_statistics = MetricBuilder::new(metrics)
+            .with_type(MetricType::SUMMARY)
+            .counter("files_ranges_pruned_statistics", partition);
+
+        // -----------------------
+        // 'dev' level metrics
+        // -----------------------
+        let predicate_evaluation_errors = MetricBuilder::new(metrics)
+            .with_new_label("filename", filename.to_string())
+            .counter("predicate_evaluation_errors", partition);
+
         let pushdown_rows_pruned = MetricBuilder::new(metrics)
             .with_new_label("filename", filename.to_string())
             .counter("pushdown_rows_pruned", partition);
@@ -129,24 +158,10 @@ impl ParquetFileMetrics {
             .with_new_label("filename", filename.to_string())
             .subset_time("bloom_filter_eval_time", partition);
 
-        let page_index_rows_pruned = MetricBuilder::new(metrics)
-            .with_new_label("filename", filename.to_string())
-            .counter("page_index_rows_pruned", partition);
-        let page_index_rows_matched = MetricBuilder::new(metrics)
-            .with_new_label("filename", filename.to_string())
-            .counter("page_index_rows_matched", partition);
-
         let page_index_eval_time = MetricBuilder::new(metrics)
             .with_new_label("filename", filename.to_string())
             .subset_time("page_index_eval_time", partition);
 
-        let metadata_load_time = MetricBuilder::new(metrics)
-            .with_new_label("filename", filename.to_string())
-            .subset_time("metadata_load_time", partition);
-
-        let files_ranges_pruned_statistics = MetricBuilder::new(metrics)
-            .counter("files_ranges_pruned_statistics", partition);
-
         let predicate_cache_inner_records = MetricBuilder::new(metrics)
             .with_new_label("filename", filename.to_string())
             .counter("predicate_cache_inner_records", partition);
diff --git a/datafusion/datasource-parquet/src/opener.rs b/datafusion/datasource-parquet/src/opener.rs
index 167fc3c5147e..af7a537ca6f4 100644
--- a/datafusion/datasource-parquet/src/opener.rs
+++ b/datafusion/datasource-parquet/src/opener.rs
@@ -208,7 +208,7 @@ impl FileOpener for ParquetOpener {
             let mut options = ArrowReaderOptions::new().with_page_index(false);
             #[cfg(feature = "parquet_encryption")]
             if let Some(fd_val) = file_decryption_properties {
-                options = options.with_file_decryption_properties((*fd_val).clone());
+                options = options.with_file_decryption_properties(Arc::clone(&fd_val));
             }
             let mut metadata_timer = file_metrics.metadata_load_time.timer();
 
@@ -581,8 +581,7 @@ impl EncryptionContext {
             None => match &self.encryption_factory {
                 Some((encryption_factory, encryption_config)) => Ok(encryption_factory
                     .get_file_decryption_properties(encryption_config, file_location)
-                    .await?
-                    .map(Arc::new)),
+                    .await?),
                 None => Ok(None),
             },
         }
diff --git a/datafusion/datasource-parquet/src/page_filter.rs b/datafusion/datasource-parquet/src/page_filter.rs
index 5f3e05747d40..65d1affb44a9 100644
--- a/datafusion/datasource-parquet/src/page_filter.rs
+++ b/datafusion/datasource-parquet/src/page_filter.rs
@@ -36,7 +36,7 @@ use datafusion_pruning::PruningPredicate;
 use log::{debug, trace};
 use parquet::arrow::arrow_reader::statistics::StatisticsConverter;
 use parquet::file::metadata::{ParquetColumnIndex, ParquetOffsetIndex};
-use parquet::format::PageLocation;
+use parquet::file::page_index::offset_index::PageLocation;
 use parquet::schema::types::SchemaDescriptor;
 use parquet::{
     arrow::arrow_reader::{RowSelection, RowSelector},
diff --git a/datafusion/datasource-parquet/src/reader.rs b/datafusion/datasource-parquet/src/reader.rs
index 687a7f15fccc..88a3cea5623b 100644
--- a/datafusion/datasource-parquet/src/reader.rs
+++ b/datafusion/datasource-parquet/src/reader.rs
@@ -262,8 +262,9 @@ impl AsyncFileReader for CachedParquetFileReader {
 
         async move {
             #[cfg(feature = "parquet_encryption")]
-            let file_decryption_properties =
-                options.and_then(|o| o.file_decryption_properties());
+            let file_decryption_properties = options
+                .and_then(|o| o.file_decryption_properties())
+                .map(Arc::clone);
 
             #[cfg(not(feature = "parquet_encryption"))]
             let file_decryption_properties = None;
diff --git a/datafusion/datasource-parquet/src/source.rs b/datafusion/datasource-parquet/src/source.rs
index dd10363079f9..186d922fc373 100644
--- a/datafusion/datasource-parquet/src/source.rs
+++ b/datafusion/datasource-parquet/src/source.rs
@@ -52,12 +52,12 @@ use datafusion_physical_plan::metrics::Count;
 use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
 use datafusion_physical_plan::DisplayFormatType;
 
-#[cfg(feature = "parquet_encryption")]
-use datafusion_common::encryption::map_config_decryption_to_decryption;
 #[cfg(feature = "parquet_encryption")]
 use datafusion_execution::parquet_encryption::EncryptionFactory;
 use itertools::Itertools;
 use object_store::ObjectStore;
+#[cfg(feature = "parquet_encryption")]
+use parquet::encryption::decrypt::FileDecryptionProperties;
 
 /// Execution plan for reading one or more Parquet files.
 ///
@@ -497,7 +497,7 @@ impl FileSource for ParquetSource {
     ) -> Arc<dyn FileOpener> {
         let projection = base_config
             .file_column_projection_indices()
-            .unwrap_or_else(|| (0..base_config.file_schema.fields().len()).collect());
+            .unwrap_or_else(|| (0..base_config.file_schema().fields().len()).collect());
 
         let (expr_adapter_factory, schema_adapter_factory) = match (
             base_config.expr_adapter_factory.as_ref(),
@@ -547,8 +547,8 @@ impl FileSource for ParquetSource {
             .table_parquet_options()
             .crypto
             .file_decryption
-            .as_ref()
-            .map(map_config_decryption_to_decryption)
+            .clone()
+            .map(FileDecryptionProperties::from)
             .map(Arc::new);
 
         let coerce_int96 = self
@@ -566,8 +566,8 @@ impl FileSource for ParquetSource {
                 .expect("Batch size must set before creating ParquetOpener"),
             limit: base_config.limit,
             predicate: self.predicate.clone(),
-            logical_file_schema: Arc::clone(&base_config.file_schema),
-            partition_fields: base_config.table_partition_cols.clone(),
+            logical_file_schema: Arc::clone(base_config.file_schema()),
+            partition_fields: base_config.table_partition_cols().clone(),
             metadata_size_hint: self.metadata_size_hint,
             metrics: self.metrics().clone(),
             parquet_file_reader_factory,
diff --git a/datafusion/datasource/Cargo.toml b/datafusion/datasource/Cargo.toml
index afd0256ba972..8e0738448a75 100644
--- a/datafusion/datasource/Cargo.toml
+++ b/datafusion/datasource/Cargo.toml
@@ -45,7 +45,7 @@ async-compression = { version = "0.4.19", features = [
 ], optional = true }
 async-trait = { workspace = true }
 bytes = { workspace = true }
-bzip2 = { version = "0.6.0", optional = true }
+bzip2 = { version = "0.6.1", optional = true }
 chrono = { workspace = true }
 datafusion-common = { workspace = true, features = ["object_store"] }
 datafusion-common-runtime = { workspace = true }
diff --git a/datafusion/datasource/src/file_scan_config.rs b/datafusion/datasource/src/file_scan_config.rs
index e67e1f827372..695252803bae 100644
--- a/datafusion/datasource/src/file_scan_config.rs
+++ b/datafusion/datasource/src/file_scan_config.rs
@@ -24,7 +24,7 @@ use crate::schema_adapter::SchemaAdapterFactory;
 use crate::{
     display::FileGroupsDisplay, file::FileSource,
     file_compression_type::FileCompressionType, file_stream::FileStream,
-    source::DataSource, statistics::MinMaxStatistics, PartitionedFile,
+    source::DataSource, statistics::MinMaxStatistics, PartitionedFile, TableSchema,
 };
 use arrow::datatypes::FieldRef;
 use arrow::{
@@ -153,15 +153,16 @@ pub struct FileScanConfig {
     /// [`RuntimeEnv::register_object_store`]: datafusion_execution::runtime_env::RuntimeEnv::register_object_store
     /// [`RuntimeEnv::object_store`]: datafusion_execution::runtime_env::RuntimeEnv::object_store
     pub object_store_url: ObjectStoreUrl,
-    /// Schema before `projection` is applied. It contains the all columns that may
-    /// appear in the files. It does not include table partition columns
-    /// that may be added.
-    /// Note that this is **not** the schema of the physical files.
-    /// This is the schema that the physical file schema will be
-    /// mapped onto, and the schema that the [`DataSourceExec`] will return.
+    /// Schema information including the file schema, table partition columns,
+    /// and the combined table schema.
+    ///
+    /// The table schema (file schema + partition columns) is the schema exposed
+    /// upstream of [`FileScanConfig`] (e.g. in [`DataSourceExec`]).
+    ///
+    /// See [`TableSchema`] for more information.
     ///
     /// [`DataSourceExec`]: crate::source::DataSourceExec
-    pub file_schema: SchemaRef,
+    pub table_schema: TableSchema,
     /// List of files to be processed, grouped into partitions
     ///
     /// Each file must have a schema of `file_schema` or a subset. If
@@ -180,8 +181,6 @@ pub struct FileScanConfig {
     /// The maximum number of records to read from this plan. If `None`,
     /// all records after filtering are returned.
     pub limit: Option<usize>,
-    /// The partitioning columns
-    pub table_partition_cols: Vec<FieldRef>,
     /// All equivalent lexicographical orderings that describe the schema.
     pub output_ordering: Vec<LexOrdering>,
     /// File compression type
@@ -250,23 +249,19 @@ pub struct FileScanConfig {
 #[derive(Clone)]
 pub struct FileScanConfigBuilder {
     object_store_url: ObjectStoreUrl,
-    /// Table schema before any projections or partition columns are applied.
+    /// Schema information including the file schema, table partition columns,
+    /// and the combined table schema.
     ///
-    /// This schema is used to read the files, but is **not** necessarily the
-    /// schema of the physical files. Rather this is the schema that the
+    /// This schema is used to read the files, but the file schema is **not** necessarily
+    /// the schema of the physical files. Rather this is the schema that the
     /// physical file schema will be mapped onto, and the schema that the
     /// [`DataSourceExec`] will return.
     ///
-    /// This is usually the same as the table schema as specified by the `TableProvider` minus any partition columns.
-    ///
-    /// This probably would be better named `table_schema`
-    ///
     /// [`DataSourceExec`]: crate::source::DataSourceExec
-    file_schema: SchemaRef,
+    table_schema: TableSchema,
     file_source: Arc<dyn FileSource>,
     limit: Option<usize>,
     projection: Option<Vec<usize>>,
-    table_partition_cols: Vec<FieldRef>,
     constraints: Option<Constraints>,
     file_groups: Vec<FileGroup>,
     statistics: Option<Statistics>,
@@ -291,7 +286,7 @@ impl FileScanConfigBuilder {
     ) -> Self {
         Self {
             object_store_url,
-            file_schema,
+            table_schema: TableSchema::from_file_schema(file_schema),
             file_source,
             file_groups: vec![],
             statistics: None,
@@ -300,7 +295,6 @@ impl FileScanConfigBuilder {
             new_lines_in_values: None,
             limit: None,
             projection: None,
-            table_partition_cols: vec![],
             constraints: None,
             batch_size: None,
             expr_adapter_factory: None,
@@ -332,10 +326,13 @@ impl FileScanConfigBuilder {
 
     /// Set the partitioning columns
     pub fn with_table_partition_cols(mut self, table_partition_cols: Vec<Field>) -> Self {
-        self.table_partition_cols = table_partition_cols
+        let table_partition_cols: Vec<FieldRef> = table_partition_cols
             .into_iter()
             .map(|f| Arc::new(f) as FieldRef)
             .collect();
+        self.table_schema = self
+            .table_schema
+            .with_table_partition_cols(table_partition_cols);
         self
     }
 
@@ -433,11 +430,10 @@ impl FileScanConfigBuilder {
     pub fn build(self) -> FileScanConfig {
         let Self {
             object_store_url,
-            file_schema,
+            table_schema,
             file_source,
             limit,
             projection,
-            table_partition_cols,
             constraints,
             file_groups,
             statistics,
@@ -449,23 +445,22 @@ impl FileScanConfigBuilder {
         } = self;
 
         let constraints = constraints.unwrap_or_default();
-        let statistics =
-            statistics.unwrap_or_else(|| Statistics::new_unknown(&file_schema));
+        let statistics = statistics
+            .unwrap_or_else(|| Statistics::new_unknown(table_schema.file_schema()));
 
         let file_source = file_source
             .with_statistics(statistics.clone())
-            .with_schema(Arc::clone(&file_schema));
+            .with_schema(Arc::clone(table_schema.file_schema()));
         let file_compression_type =
             file_compression_type.unwrap_or(FileCompressionType::UNCOMPRESSED);
         let new_lines_in_values = new_lines_in_values.unwrap_or(false);
 
         FileScanConfig {
             object_store_url,
-            file_schema,
+            table_schema,
             file_source,
             limit,
             projection,
-            table_partition_cols,
             constraints,
             file_groups,
             output_ordering,
@@ -481,7 +476,7 @@ impl From<FileScanConfig> for FileScanConfigBuilder {
     fn from(config: FileScanConfig) -> Self {
         Self {
             object_store_url: config.object_store_url,
-            file_schema: config.file_schema,
+            table_schema: config.table_schema,
             file_source: Arc::<dyn FileSource>::clone(&config.file_source),
             file_groups: config.file_groups,
             statistics: config.file_source.statistics().ok(),
@@ -490,7 +485,6 @@ impl From<FileScanConfig> for FileScanConfigBuilder {
             new_lines_in_values: Some(config.new_lines_in_values),
             limit: config.limit,
             projection: config.projection,
-            table_partition_cols: config.table_partition_cols,
             constraints: Some(config.constraints),
             batch_size: config.batch_size,
             expr_adapter_factory: config.expr_adapter_factory,
@@ -604,8 +598,39 @@ impl DataSource for FileScanConfig {
         SchedulingType::Cooperative
     }
 
-    fn statistics(&self) -> Result<Statistics> {
-        Ok(self.projected_stats())
+    fn partition_statistics(&self, partition: Option<usize>) -> Result<Statistics> {
+        if let Some(partition) = partition {
+            // Get statistics for a specific partition
+            if let Some(file_group) = self.file_groups.get(partition) {
+                if let Some(stat) = file_group.file_statistics(None) {
+                    // Project the statistics based on the projection
+                    let table_cols_stats = self
+                        .projection_indices()
+                        .into_iter()
+                        .map(|idx| {
+                            if idx < self.file_schema().fields().len() {
+                                stat.column_statistics[idx].clone()
+                            } else {
+                                // TODO provide accurate stat for partition column
+                                // See https://github.com/apache/datafusion/issues/1186
+                                ColumnStatistics::new_unknown()
+                            }
+                        })
+                        .collect();
+
+                    return Ok(Statistics {
+                        num_rows: stat.num_rows,
+                        total_byte_size: stat.total_byte_size,
+                        column_statistics: table_cols_stats,
+                    });
+                }
+            }
+            // If no statistics available for this partition, return unknown
+            Ok(Statistics::new_unknown(&self.projected_schema()))
+        } else {
+            // Return aggregate statistics across all partitions
+            Ok(self.projected_stats())
+        }
     }
 
     fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn DataSource>> {
@@ -635,7 +660,7 @@ impl DataSource for FileScanConfig {
                 .expr
                 .as_any()
                 .downcast_ref::<Column>()
-                .map(|expr| expr.index() >= self.file_schema.fields().len())
+                .map(|expr| expr.index() >= self.file_schema().fields().len())
                 .unwrap_or(false)
         });
 
@@ -650,7 +675,7 @@ impl DataSource for FileScanConfig {
                 &file_scan
                     .projection
                     .clone()
-                    .unwrap_or_else(|| (0..self.file_schema.fields().len()).collect()),
+                    .unwrap_or_else(|| (0..self.file_schema().fields().len()).collect()),
             );
 
             Arc::new(
@@ -691,11 +716,21 @@ impl DataSource for FileScanConfig {
 }
 
 impl FileScanConfig {
+    /// Get the file schema (schema of the files without partition columns)
+    pub fn file_schema(&self) -> &SchemaRef {
+        self.table_schema.file_schema()
+    }
+
+    /// Get the table partition columns
+    pub fn table_partition_cols(&self) -> &Vec<FieldRef> {
+        self.table_schema.table_partition_cols()
+    }
+
     fn projection_indices(&self) -> Vec<usize> {
         match &self.projection {
             Some(proj) => proj.clone(),
-            None => (0..self.file_schema.fields().len()
-                + self.table_partition_cols.len())
+            None => (0..self.file_schema().fields().len()
+                + self.table_partition_cols().len())
                 .collect(),
         }
     }
@@ -707,7 +742,7 @@ impl FileScanConfig {
             .projection_indices()
             .into_iter()
             .map(|idx| {
-                if idx < self.file_schema.fields().len() {
+                if idx < self.file_schema().fields().len() {
                     statistics.column_statistics[idx].clone()
                 } else {
                     // TODO provide accurate stat for partition column (#1186)
@@ -729,12 +764,12 @@ impl FileScanConfig {
             .projection_indices()
             .into_iter()
             .map(|idx| {
-                if idx < self.file_schema.fields().len() {
-                    self.file_schema.field(idx).clone()
+                if idx < self.file_schema().fields().len() {
+                    self.file_schema().field(idx).clone()
                 } else {
-                    let partition_idx = idx - self.file_schema.fields().len();
+                    let partition_idx = idx - self.file_schema().fields().len();
                     Arc::unwrap_or_clone(Arc::clone(
-                        &self.table_partition_cols[partition_idx],
+                        &self.table_partition_cols()[partition_idx],
                     ))
                 }
             })
@@ -742,7 +777,7 @@ impl FileScanConfig {
 
         Arc::new(Schema::new_with_metadata(
             table_fields,
-            self.file_schema.metadata().clone(),
+            self.file_schema().metadata().clone(),
         ))
     }
 
@@ -790,9 +825,9 @@ impl FileScanConfig {
 
     /// Project the schema, constraints, and the statistics on the given column indices
     pub fn project(&self) -> (SchemaRef, Constraints, Statistics, Vec<LexOrdering>) {
-        if self.projection.is_none() && self.table_partition_cols.is_empty() {
+        if self.projection.is_none() && self.table_partition_cols().is_empty() {
             return (
-                Arc::clone(&self.file_schema),
+                Arc::clone(self.file_schema()),
                 self.constraints.clone(),
                 self.file_source.statistics().unwrap().clone(),
                 self.output_ordering.clone(),
@@ -811,8 +846,8 @@ impl FileScanConfig {
     pub fn projected_file_column_names(&self) -> Option<Vec<String>> {
         self.projection.as_ref().map(|p| {
             p.iter()
-                .filter(|col_idx| **col_idx < self.file_schema.fields().len())
-                .map(|col_idx| self.file_schema.field(*col_idx).name())
+                .filter(|col_idx| **col_idx < self.file_schema().fields().len())
+                .map(|col_idx| self.file_schema().field(*col_idx).name())
                 .cloned()
                 .collect()
         })
@@ -823,17 +858,17 @@ impl FileScanConfig {
         let fields = self.file_column_projection_indices().map(|indices| {
             indices
                 .iter()
-                .map(|col_idx| self.file_schema.field(*col_idx))
+                .map(|col_idx| self.file_schema().field(*col_idx))
                 .cloned()
                 .collect::<Vec<_>>()
         });
 
         fields.map_or_else(
-            || Arc::clone(&self.file_schema),
+            || Arc::clone(self.file_schema()),
             |f| {
                 Arc::new(Schema::new_with_metadata(
                     f,
-                    self.file_schema.metadata.clone(),
+                    self.file_schema().metadata.clone(),
                 ))
             },
         )
@@ -842,7 +877,7 @@ impl FileScanConfig {
     pub fn file_column_projection_indices(&self) -> Option<Vec<usize>> {
         self.projection.as_ref().map(|p| {
             p.iter()
-                .filter(|col_idx| **col_idx < self.file_schema.fields().len())
+                .filter(|col_idx| **col_idx < self.file_schema().fields().len())
                 .copied()
                 .collect()
         })
@@ -1599,7 +1634,7 @@ mod tests {
         );
 
         let source_statistics = conf.file_source.statistics().unwrap();
-        let conf_stats = conf.statistics().unwrap();
+        let conf_stats = conf.partition_statistics(None).unwrap();
 
         // projection should be reflected in the file source statistics
         assert_eq!(conf_stats.num_rows, Precision::Inexact(3));
@@ -2182,11 +2217,11 @@ mod tests {
 
         // Verify the built config has all the expected values
         assert_eq!(config.object_store_url, object_store_url);
-        assert_eq!(config.file_schema, file_schema);
+        assert_eq!(*config.file_schema(), file_schema);
         assert_eq!(config.limit, Some(1000));
         assert_eq!(config.projection, Some(vec![0, 1]));
-        assert_eq!(config.table_partition_cols.len(), 1);
-        assert_eq!(config.table_partition_cols[0].name(), "date");
+        assert_eq!(config.table_partition_cols().len(), 1);
+        assert_eq!(config.table_partition_cols()[0].name(), "date");
         assert_eq!(config.file_groups.len(), 1);
         assert_eq!(config.file_groups[0].len(), 1);
         assert_eq!(
@@ -2265,10 +2300,10 @@ mod tests {
 
         // Verify default values
         assert_eq!(config.object_store_url, object_store_url);
-        assert_eq!(config.file_schema, file_schema);
+        assert_eq!(*config.file_schema(), file_schema);
         assert_eq!(config.limit, None);
         assert_eq!(config.projection, None);
-        assert!(config.table_partition_cols.is_empty());
+        assert!(config.table_partition_cols().is_empty());
         assert!(config.file_groups.is_empty());
         assert_eq!(
             config.file_compression_type,
@@ -2339,10 +2374,10 @@ mod tests {
         // Verify properties match
         let partition_cols = partition_cols.into_iter().map(Arc::new).collect::<Vec<_>>();
         assert_eq!(new_config.object_store_url, object_store_url);
-        assert_eq!(new_config.file_schema, schema);
+        assert_eq!(*new_config.file_schema(), schema);
         assert_eq!(new_config.projection, Some(vec![0, 2]));
         assert_eq!(new_config.limit, Some(10));
-        assert_eq!(new_config.table_partition_cols, partition_cols);
+        assert_eq!(*new_config.table_partition_cols(), partition_cols);
         assert_eq!(new_config.file_groups.len(), 1);
         assert_eq!(new_config.file_groups[0].len(), 1);
         assert_eq!(
@@ -2506,4 +2541,91 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_partition_statistics_projection() {
+        // This test verifies that partition_statistics applies projection correctly.
+        // The old implementation had a bug where it returned file group statistics
+        // without applying the projection, returning all column statistics instead
+        // of just the projected ones.
+
+        use crate::source::DataSourceExec;
+        use datafusion_physical_plan::ExecutionPlan;
+
+        // Create a schema with 4 columns
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("col0", DataType::Int32, false),
+            Field::new("col1", DataType::Int32, false),
+            Field::new("col2", DataType::Int32, false),
+            Field::new("col3", DataType::Int32, false),
+        ]));
+
+        // Create statistics for all 4 columns
+        let file_group_stats = Statistics {
+            num_rows: Precision::Exact(100),
+            total_byte_size: Precision::Exact(1024),
+            column_statistics: vec![
+                ColumnStatistics {
+                    null_count: Precision::Exact(0),
+                    ..ColumnStatistics::new_unknown()
+                },
+                ColumnStatistics {
+                    null_count: Precision::Exact(5),
+                    ..ColumnStatistics::new_unknown()
+                },
+                ColumnStatistics {
+                    null_count: Precision::Exact(10),
+                    ..ColumnStatistics::new_unknown()
+                },
+                ColumnStatistics {
+                    null_count: Precision::Exact(15),
+                    ..ColumnStatistics::new_unknown()
+                },
+            ],
+        };
+
+        // Create a file group with statistics
+        let file_group = FileGroup::new(vec![PartitionedFile::new("test.parquet", 1024)])
+            .with_statistics(Arc::new(file_group_stats));
+
+        // Create a FileScanConfig with projection: only keep columns 0 and 2
+        let config = FileScanConfigBuilder::new(
+            ObjectStoreUrl::parse("test:///").unwrap(),
+            Arc::clone(&schema),
+            Arc::new(MockSource::default()),
+        )
+        .with_projection(Some(vec![0, 2])) // Only project columns 0 and 2
+        .with_file_groups(vec![file_group])
+        .build();
+
+        // Create a DataSourceExec from the config
+        let exec = DataSourceExec::from_data_source(config);
+
+        // Get statistics for partition 0
+        let partition_stats = exec.partition_statistics(Some(0)).unwrap();
+
+        // Verify that only 2 columns are in the statistics (the projected ones)
+        assert_eq!(
+            partition_stats.column_statistics.len(),
+            2,
+            "Expected 2 column statistics (projected), but got {}",
+            partition_stats.column_statistics.len()
+        );
+
+        // Verify the column statistics are for columns 0 and 2
+        assert_eq!(
+            partition_stats.column_statistics[0].null_count,
+            Precision::Exact(0),
+            "First projected column should be col0 with 0 nulls"
+        );
+        assert_eq!(
+            partition_stats.column_statistics[1].null_count,
+            Precision::Exact(10),
+            "Second projected column should be col2 with 10 nulls"
+        );
+
+        // Verify row count and byte size are preserved
+        assert_eq!(partition_stats.num_rows, Precision::Exact(100));
+        assert_eq!(partition_stats.total_byte_size, Precision::Exact(1024));
+    }
 }
diff --git a/datafusion/datasource/src/file_stream.rs b/datafusion/datasource/src/file_stream.rs
index e0b6c25a1916..9fee5691beea 100644
--- a/datafusion/datasource/src/file_stream.rs
+++ b/datafusion/datasource/src/file_stream.rs
@@ -80,7 +80,7 @@ impl FileStream {
         let pc_projector = PartitionColumnProjector::new(
             Arc::clone(&projected_schema),
             &config
-                .table_partition_cols
+                .table_partition_cols()
                 .iter()
                 .map(|x| x.name().clone())
                 .collect::<Vec<_>>(),
diff --git a/datafusion/datasource/src/memory.rs b/datafusion/datasource/src/memory.rs
index eb55aa9b0b0d..7d5c8c4834ea 100644
--- a/datafusion/datasource/src/memory.rs
+++ b/datafusion/datasource/src/memory.rs
@@ -21,6 +21,7 @@ use std::collections::BinaryHeap;
 use std::fmt;
 use std::fmt::Debug;
 use std::ops::Deref;
+use std::slice::from_ref;
 use std::sync::Arc;
 
 use crate::sink::DataSink;
@@ -192,12 +193,27 @@ impl DataSource for MemorySourceConfig {
         SchedulingType::Cooperative
     }
 
-    fn statistics(&self) -> Result<Statistics> {
-        Ok(common::compute_record_batch_statistics(
-            &self.partitions,
-            &self.schema,
-            self.projection.clone(),
-        ))
+    fn partition_statistics(&self, partition: Option<usize>) -> Result<Statistics> {
+        if let Some(partition) = partition {
+            // Compute statistics for a specific partition
+            if let Some(batches) = self.partitions.get(partition) {
+                Ok(common::compute_record_batch_statistics(
+                    from_ref(batches),
+                    &self.schema,
+                    self.projection.clone(),
+                ))
+            } else {
+                // Invalid partition index
+                Ok(Statistics::new_unknown(&self.projected_schema))
+            }
+        } else {
+            // Compute statistics across all partitions
+            Ok(common::compute_record_batch_statistics(
+                &self.partitions,
+                &self.schema,
+                self.projection.clone(),
+            ))
+        }
     }
 
     fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn DataSource>> {
diff --git a/datafusion/datasource/src/mod.rs b/datafusion/datasource/src/mod.rs
index 1f47c0983ea1..80b44ad5949a 100644
--- a/datafusion/datasource/src/mod.rs
+++ b/datafusion/datasource/src/mod.rs
@@ -41,6 +41,7 @@ pub mod schema_adapter;
 pub mod sink;
 pub mod source;
 mod statistics;
+pub mod table_schema;
 
 #[cfg(test)]
 pub mod test_util;
@@ -57,6 +58,7 @@ use datafusion_common::{ScalarValue, Statistics};
 use futures::{Stream, StreamExt};
 use object_store::{path::Path, ObjectMeta};
 use object_store::{GetOptions, GetRange, ObjectStore};
+pub use table_schema::TableSchema;
 // Remove when add_row_stats is remove
 #[allow(deprecated)]
 pub use statistics::add_row_stats;
diff --git a/datafusion/datasource/src/source.rs b/datafusion/datasource/src/source.rs
index 20d9a1d6e53f..11a8a3867b80 100644
--- a/datafusion/datasource/src/source.rs
+++ b/datafusion/datasource/src/source.rs
@@ -151,7 +151,21 @@ pub trait DataSource: Send + Sync + Debug {
     fn scheduling_type(&self) -> SchedulingType {
         SchedulingType::NonCooperative
     }
-    fn statistics(&self) -> Result<Statistics>;
+
+    /// Returns statistics for a specific partition, or aggregate statistics
+    /// across all partitions if `partition` is `None`.
+    fn partition_statistics(&self, partition: Option<usize>) -> Result<Statistics>;
+
+    /// Returns aggregate statistics across all partitions.
+    ///
+    /// # Deprecated
+    /// Use [`Self::partition_statistics`] instead, which provides more fine-grained
+    /// control over statistics retrieval (per-partition or aggregate).
+    #[deprecated(since = "51.0.0", note = "Use partition_statistics instead")]
+    fn statistics(&self) -> Result<Statistics> {
+        self.partition_statistics(None)
+    }
+
     /// Return a copy of this DataSource with a new fetch limit
     fn with_fetch(&self, _limit: Option<usize>) -> Option<Arc<dyn DataSource>>;
     fn fetch(&self) -> Option<usize>;
@@ -285,21 +299,7 @@ impl ExecutionPlan for DataSourceExec {
     }
 
     fn partition_statistics(&self, partition: Option<usize>) -> Result<Statistics> {
-        if let Some(partition) = partition {
-            let mut statistics = Statistics::new_unknown(&self.schema());
-            if let Some(file_config) =
-                self.data_source.as_any().downcast_ref::<FileScanConfig>()
-            {
-                if let Some(file_group) = file_config.file_groups.get(partition) {
-                    if let Some(stat) = file_group.file_statistics(None) {
-                        statistics = stat.clone();
-                    }
-                }
-            }
-            Ok(statistics)
-        } else {
-            Ok(self.data_source.statistics()?)
-        }
+        self.data_source.partition_statistics(partition)
     }
 
     fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>> {
diff --git a/datafusion/datasource/src/table_schema.rs b/datafusion/datasource/src/table_schema.rs
new file mode 100644
index 000000000000..8e95585ce873
--- /dev/null
+++ b/datafusion/datasource/src/table_schema.rs
@@ -0,0 +1,160 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Helper struct to manage table schemas with partition columns
+
+use arrow::datatypes::{FieldRef, SchemaBuilder, SchemaRef};
+use std::sync::Arc;
+
+/// Helper to hold table schema information for partitioned data sources.
+///
+/// When reading partitioned data (such as Hive-style partitioning), a table's schema
+/// consists of two parts:
+/// 1. **File schema**: The schema of the actual data files on disk
+/// 2. **Partition columns**: Columns that are encoded in the directory structure,
+///    not stored in the files themselves
+///
+/// # Example: Partitioned Table
+///
+/// Consider a table with the following directory structure:
+/// ```text
+/// /data/date=2025-10-10/region=us-west/data.parquet
+/// /data/date=2025-10-11/region=us-east/data.parquet
+/// ```
+///
+/// In this case:
+/// - **File schema**: The schema of `data.parquet` files (e.g., `[user_id, amount]`)
+/// - **Partition columns**: `[date, region]` extracted from the directory path
+/// - **Table schema**: The full schema combining both (e.g., `[user_id, amount, date, region]`)
+///
+/// # When to Use
+///
+/// Use `TableSchema` when:
+/// - Reading partitioned data sources (Parquet, CSV, etc. with Hive-style partitioning)
+/// - You need to efficiently access different schema representations without reconstructing them
+/// - You want to avoid repeatedly concatenating file and partition schemas
+///
+/// For non-partitioned data or when working with a single schema representation,
+/// working directly with Arrow's `Schema` or `SchemaRef` is simpler.
+///
+/// # Performance
+///
+/// This struct pre-computes and caches the full table schema, allowing cheap references
+/// to any representation without repeated allocations or reconstructions.
+#[derive(Debug, Clone)]
+pub struct TableSchema {
+    /// The schema of the data files themselves, without partition columns.
+    ///
+    /// For example, if your Parquet files contain `[user_id, amount]`,
+    /// this field holds that schema.
+    file_schema: SchemaRef,
+
+    /// Columns that are derived from the directory structure (partitioning scheme).
+    ///
+    /// For Hive-style partitioning like `/date=2025-10-10/region=us-west/`,
+    /// this contains the `date` and `region` fields.
+    ///
+    /// These columns are NOT present in the data files but are appended to each
+    /// row during query execution based on the file's location.
+    table_partition_cols: Vec<FieldRef>,
+
+    /// The complete table schema: file_schema columns followed by partition columns.
+    ///
+    /// This is pre-computed during construction by concatenating `file_schema`
+    /// and `table_partition_cols`, so it can be returned as a cheap reference.
+    table_schema: SchemaRef,
+}
+
+impl TableSchema {
+    /// Create a new TableSchema from a file schema and partition columns.
+    ///
+    /// The table schema is automatically computed by appending the partition columns
+    /// to the file schema.
+    ///
+    /// # Arguments
+    ///
+    /// * `file_schema` - Schema of the data files (without partition columns)
+    /// * `table_partition_cols` - Partition columns to append to each row
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use arrow::datatypes::{Schema, Field, DataType};
+    /// # use datafusion_datasource::TableSchema;
+    /// let file_schema = Arc::new(Schema::new(vec![
+    ///     Field::new("user_id", DataType::Int64, false),
+    ///     Field::new("amount", DataType::Float64, false),
+    /// ]));
+    ///
+    /// let partition_cols = vec![
+    ///     Arc::new(Field::new("date", DataType::Utf8, false)),
+    ///     Arc::new(Field::new("region", DataType::Utf8, false)),
+    /// ];
+    ///
+    /// let table_schema = TableSchema::new(file_schema, partition_cols);
+    ///
+    /// // Table schema will have 4 columns: user_id, amount, date, region
+    /// assert_eq!(table_schema.table_schema().fields().len(), 4);
+    /// ```
+    pub fn new(file_schema: SchemaRef, table_partition_cols: Vec<FieldRef>) -> Self {
+        let mut builder = SchemaBuilder::from(file_schema.as_ref());
+        builder.extend(table_partition_cols.iter().cloned());
+        Self {
+            file_schema,
+            table_partition_cols,
+            table_schema: Arc::new(builder.finish()),
+        }
+    }
+
+    /// Create a new TableSchema from a file schema with no partition columns.
+    pub fn from_file_schema(file_schema: SchemaRef) -> Self {
+        Self::new(file_schema, vec![])
+    }
+
+    /// Set the table partition columns and rebuild the table schema.
+    pub fn with_table_partition_cols(
+        mut self,
+        table_partition_cols: Vec<FieldRef>,
+    ) -> TableSchema {
+        self.table_partition_cols = table_partition_cols;
+        self
+    }
+
+    /// Get the file schema (without partition columns).
+    ///
+    /// This is the schema of the actual data files on disk.
+    pub fn file_schema(&self) -> &SchemaRef {
+        &self.file_schema
+    }
+
+    /// Get the table partition columns.
+    ///
+    /// These are the columns derived from the directory structure that
+    /// will be appended to each row during query execution.
+    pub fn table_partition_cols(&self) -> &Vec<FieldRef> {
+        &self.table_partition_cols
+    }
+
+    /// Get the full table schema (file schema + partition columns).
+    ///
+    /// This is the complete schema that will be seen by queries, combining
+    /// both the columns from the files and the partition columns.
+    pub fn table_schema(&self) -> &SchemaRef {
+        &self.table_schema
+    }
+}
diff --git a/datafusion/datasource/src/url.rs b/datafusion/datasource/src/url.rs
index c87b307c5fb8..0f31eb7caf41 100644
--- a/datafusion/datasource/src/url.rs
+++ b/datafusion/datasource/src/url.rs
@@ -252,7 +252,10 @@ impl ListingTableUrl {
                     .boxed(),
                 // If the head command fails, it is likely that object doesn't exist.
                 // Retry as though it were a prefix (aka a collection)
-                Err(_) => list_with_cache(ctx, store, &self.prefix).await?,
+                Err(object_store::Error::NotFound { .. }) => {
+                    list_with_cache(ctx, store, &self.prefix).await?
+                }
+                Err(e) => return Err(e.into()),
             }
         };
 
@@ -405,6 +408,8 @@ fn split_glob_expression(path: &str) -> Option<(&str, &str)> {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use async_trait::async_trait;
+    use bytes::Bytes;
     use datafusion_common::config::TableOptions;
     use datafusion_common::DFSchema;
     use datafusion_execution::config::SessionConfig;
@@ -414,9 +419,13 @@ mod tests {
     use datafusion_expr::{AggregateUDF, Expr, LogicalPlan, ScalarUDF, WindowUDF};
     use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
     use datafusion_physical_plan::ExecutionPlan;
-    use object_store::PutPayload;
+    use object_store::{
+        GetOptions, GetResult, ListResult, MultipartUpload, PutMultipartOptions,
+        PutPayload,
+    };
     use std::any::Any;
     use std::collections::HashMap;
+    use std::ops::Range;
     use tempfile::tempdir;
 
     #[test]
@@ -632,48 +641,68 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn test_list_files() {
-        let store = object_store::memory::InMemory::new();
+    async fn test_list_files() -> Result<()> {
+        let store = MockObjectStore {
+            in_mem: object_store::memory::InMemory::new(),
+            forbidden_paths: vec!["forbidden/e.parquet".into()],
+        };
+
         // Create some files:
         create_file(&store, "a.parquet").await;
         create_file(&store, "/t/b.parquet").await;
         create_file(&store, "/t/c.csv").await;
         create_file(&store, "/t/d.csv").await;
 
+        // This file returns a permission error.
+        create_file(&store, "/forbidden/e.parquet").await;
+
         assert_eq!(
-            list_all_files("/", &store, "parquet").await,
+            list_all_files("/", &store, "parquet").await?,
             vec!["a.parquet"],
         );
 
         // test with and without trailing slash
         assert_eq!(
-            list_all_files("/t/", &store, "parquet").await,
+            list_all_files("/t/", &store, "parquet").await?,
             vec!["t/b.parquet"],
         );
         assert_eq!(
-            list_all_files("/t", &store, "parquet").await,
+            list_all_files("/t", &store, "parquet").await?,
             vec!["t/b.parquet"],
         );
 
         // test with and without trailing slash
         assert_eq!(
-            list_all_files("/t", &store, "csv").await,
+            list_all_files("/t", &store, "csv").await?,
             vec!["t/c.csv", "t/d.csv"],
         );
         assert_eq!(
-            list_all_files("/t/", &store, "csv").await,
+            list_all_files("/t/", &store, "csv").await?,
             vec!["t/c.csv", "t/d.csv"],
         );
 
         // Test a non existing prefix
         assert_eq!(
-            list_all_files("/NonExisting", &store, "csv").await,
+            list_all_files("/NonExisting", &store, "csv").await?,
             vec![] as Vec<String>
         );
         assert_eq!(
-            list_all_files("/NonExisting/", &store, "csv").await,
+            list_all_files("/NonExisting/", &store, "csv").await?,
             vec![] as Vec<String>
         );
+
+        // Including forbidden.parquet generates an error.
+        let Err(DataFusionError::ObjectStore(err)) =
+            list_all_files("/forbidden/e.parquet", &store, "parquet").await
+        else {
+            panic!("Expected ObjectStore error");
+        };
+
+        let object_store::Error::PermissionDenied { .. } = &*err else {
+            panic!("Expected PermissionDenied error");
+        };
+
+        Ok(())
     }
 
     /// Creates a file with "hello world" content at the specified path
@@ -691,10 +720,8 @@ mod tests {
         url: &str,
         store: &dyn ObjectStore,
         file_extension: &str,
-    ) -> Vec<String> {
-        try_list_all_files(url, store, file_extension)
-            .await
-            .unwrap()
+    ) -> Result<Vec<String>> {
+        try_list_all_files(url, store, file_extension).await
     }
 
     /// Runs "list_all_files" and returns their paths
@@ -716,6 +743,95 @@ mod tests {
         Ok(files)
     }
 
+    #[derive(Debug)]
+    struct MockObjectStore {
+        in_mem: object_store::memory::InMemory,
+        forbidden_paths: Vec<Path>,
+    }
+
+    impl std::fmt::Display for MockObjectStore {
+        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+            self.in_mem.fmt(f)
+        }
+    }
+
+    #[async_trait]
+    impl ObjectStore for MockObjectStore {
+        async fn put_opts(
+            &self,
+            location: &Path,
+            payload: PutPayload,
+            opts: object_store::PutOptions,
+        ) -> object_store::Result<object_store::PutResult> {
+            self.in_mem.put_opts(location, payload, opts).await
+        }
+
+        async fn put_multipart_opts(
+            &self,
+            location: &Path,
+            opts: PutMultipartOptions,
+        ) -> object_store::Result<Box<dyn MultipartUpload>> {
+            self.in_mem.put_multipart_opts(location, opts).await
+        }
+
+        async fn get_opts(
+            &self,
+            location: &Path,
+            options: GetOptions,
+        ) -> object_store::Result<GetResult> {
+            self.in_mem.get_opts(location, options).await
+        }
+
+        async fn get_ranges(
+            &self,
+            location: &Path,
+            ranges: &[Range<u64>],
+        ) -> object_store::Result<Vec<Bytes>> {
+            self.in_mem.get_ranges(location, ranges).await
+        }
+
+        async fn head(&self, location: &Path) -> object_store::Result<ObjectMeta> {
+            if self.forbidden_paths.contains(location) {
+                Err(object_store::Error::PermissionDenied {
+                    path: location.to_string(),
+                    source: "forbidden".into(),
+                })
+            } else {
+                self.in_mem.head(location).await
+            }
+        }
+
+        async fn delete(&self, location: &Path) -> object_store::Result<()> {
+            self.in_mem.delete(location).await
+        }
+
+        fn list(
+            &self,
+            prefix: Option<&Path>,
+        ) -> BoxStream<'static, object_store::Result<ObjectMeta>> {
+            self.in_mem.list(prefix)
+        }
+
+        async fn list_with_delimiter(
+            &self,
+            prefix: Option<&Path>,
+        ) -> object_store::Result<ListResult> {
+            self.in_mem.list_with_delimiter(prefix).await
+        }
+
+        async fn copy(&self, from: &Path, to: &Path) -> object_store::Result<()> {
+            self.in_mem.copy(from, to).await
+        }
+
+        async fn copy_if_not_exists(
+            &self,
+            from: &Path,
+            to: &Path,
+        ) -> object_store::Result<()> {
+            self.in_mem.copy_if_not_exists(from, to).await
+        }
+    }
+
     struct MockSession {
         config: SessionConfig,
         runtime_env: Arc<RuntimeEnv>,
diff --git a/datafusion/datasource/src/write/demux.rs b/datafusion/datasource/src/write/demux.rs
index e80099823054..52cb17c10453 100644
--- a/datafusion/datasource/src/write/demux.rs
+++ b/datafusion/datasource/src/write/demux.rs
@@ -40,9 +40,9 @@ use datafusion_common::cast::{
 };
 use datafusion_common::{exec_datafusion_err, internal_datafusion_err, not_impl_err};
 use datafusion_common_runtime::SpawnedTask;
-use datafusion_execution::TaskContext;
 
 use chrono::NaiveDate;
+use datafusion_execution::TaskContext;
 use futures::StreamExt;
 use object_store::path::Path;
 use rand::distr::SampleString;
@@ -68,6 +68,11 @@ pub type DemuxedStreamReceiver = UnboundedReceiver<(Path, RecordBatchReceiver)>;
 /// be written with the extension from the path. Otherwise the default extension
 /// will be used and the output will be split into multiple files.
 ///
+/// Output file guarantees:
+///  - Partitioned files: Files are created only for non-empty partitions.
+///  - Single-file output: 1 file is always written, even when the stream is empty.
+///  - Multi-file output: Depending on the number of record batches, 0 or more files are written.
+///
 /// Examples of `base_output_path`
 ///  * `tmp/dataset/` -> is a folder since it ends in `/`
 ///  * `tmp/dataset` -> is still a folder since it does not end in `/` but has no valid file extension
@@ -171,6 +176,21 @@ async fn row_count_demuxer(
         max_rows_per_file
     };
 
+    if single_file_output {
+        // ensure we have one file open, even when the input stream is empty
+        open_file_streams.push(create_new_file_stream(
+            &base_output_path,
+            &write_id,
+            part_idx,
+            &file_extension,
+            single_file_output,
+            max_buffered_batches,
+            &mut tx,
+        )?);
+        row_counts.push(0);
+        part_idx += 1;
+    }
+
     while let Some(rb) = input.next().await.transpose()? {
         // ensure we have at least minimum_parallel_files open
         if open_file_streams.len() < minimum_parallel_files {
diff --git a/datafusion/execution/src/object_store.rs b/datafusion/execution/src/object_store.rs
index ef83128ac681..aedee7d44460 100644
--- a/datafusion/execution/src/object_store.rs
+++ b/datafusion/execution/src/object_store.rs
@@ -20,7 +20,9 @@
 //! and query data inside these systems.
 
 use dashmap::DashMap;
-use datafusion_common::{exec_err, internal_datafusion_err, DataFusionError, Result};
+use datafusion_common::{
+    exec_err, internal_datafusion_err, not_impl_err, DataFusionError, Result,
+};
 #[cfg(not(target_arch = "wasm32"))]
 use object_store::local::LocalFileSystem;
 use object_store::ObjectStore;
@@ -154,6 +156,13 @@ pub trait ObjectStoreRegistry: Send + Sync + std::fmt::Debug + 'static {
         store: Arc<dyn ObjectStore>,
     ) -> Option<Arc<dyn ObjectStore>>;
 
+    /// Deregister the store previously registered with the same key. Returns the
+    /// deregistered store if it existed.
+    #[allow(unused_variables)]
+    fn deregister_store(&self, url: &Url) -> Result<Arc<dyn ObjectStore>> {
+        not_impl_err!("ObjectStoreRegistry::deregister_store is not implemented for this ObjectStoreRegistry")
+    }
+
     /// Get a suitable store for the provided URL. For example:
     ///
     /// - URL with scheme `file:///` or no scheme will return the default LocalFS store
@@ -230,6 +239,17 @@ impl ObjectStoreRegistry for DefaultObjectStoreRegistry {
         self.object_stores.insert(s, store)
     }
 
+    fn deregister_store(&self, url: &Url) -> Result<Arc<dyn ObjectStore>> {
+        let s = get_url_key(url);
+        let (_, object_store) = self.object_stores
+            .remove(&s)
+            .ok_or_else(|| {
+                internal_datafusion_err!("Failed to deregister object store. No suitable object store found for {url}. See `RuntimeEnv::register_object_store`")
+            })?;
+
+        Ok(object_store)
+    }
+
     fn get_store(&self, url: &Url) -> Result<Arc<dyn ObjectStore>> {
         let s = get_url_key(url);
         self.object_stores
diff --git a/datafusion/execution/src/parquet_encryption.rs b/datafusion/execution/src/parquet_encryption.rs
index 73881e11ca72..027421e08f54 100644
--- a/datafusion/execution/src/parquet_encryption.rs
+++ b/datafusion/execution/src/parquet_encryption.rs
@@ -41,14 +41,14 @@ pub trait EncryptionFactory: Send + Sync + std::fmt::Debug + 'static {
         config: &EncryptionFactoryOptions,
         schema: &SchemaRef,
         file_path: &Path,
-    ) -> Result<Option<FileEncryptionProperties>>;
+    ) -> Result<Option<Arc<FileEncryptionProperties>>>;
 
     /// Generate file decryption properties to use when reading a Parquet file.
     async fn get_file_decryption_properties(
         &self,
         config: &EncryptionFactoryOptions,
         file_path: &Path,
-    ) -> Result<Option<FileDecryptionProperties>>;
+    ) -> Result<Option<Arc<FileDecryptionProperties>>>;
 }
 
 /// Stores [`EncryptionFactory`] implementations that can be retrieved by a unique string identifier
diff --git a/datafusion/execution/src/runtime_env.rs b/datafusion/execution/src/runtime_env.rs
index db045a8b7e8a..b0d0a966b7a2 100644
--- a/datafusion/execution/src/runtime_env.rs
+++ b/datafusion/execution/src/runtime_env.rs
@@ -114,8 +114,6 @@ impl RuntimeEnv {
     /// ```
     ///
     /// # Example: Register remote URL object store like [Github](https://github.com)
-    ///
-    ///
     /// ```
     /// # use std::sync::Arc;
     /// # use url::Url;
@@ -141,6 +139,12 @@ impl RuntimeEnv {
         self.object_store_registry.register_store(url, object_store)
     }
 
+    /// Deregisters a custom `ObjectStore` previously registered for a specific url.
+    /// See [`ObjectStoreRegistry::deregister_store`] for more details.
+    pub fn deregister_object_store(&self, url: &Url) -> Result<Arc<dyn ObjectStore>> {
+        self.object_store_registry.deregister_store(url)
+    }
+
     /// Retrieves a `ObjectStore` instance for a url by consulting the
     /// registry. See [`ObjectStoreRegistry::get_store`] for more
     /// details.
diff --git a/datafusion/expr-common/src/type_coercion/aggregates.rs b/datafusion/expr-common/src/type_coercion/aggregates.rs
index e77a072a84f3..55a8843394b5 100644
--- a/datafusion/expr-common/src/type_coercion/aggregates.rs
+++ b/datafusion/expr-common/src/type_coercion/aggregates.rs
@@ -16,31 +16,12 @@
 // under the License.
 
 use crate::signature::TypeSignature;
-use arrow::datatypes::{
-    DataType, FieldRef, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
-    DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DECIMAL32_MAX_PRECISION,
-    DECIMAL32_MAX_SCALE, DECIMAL64_MAX_PRECISION, DECIMAL64_MAX_SCALE,
-};
+use arrow::datatypes::{DataType, FieldRef};
 
 use datafusion_common::{internal_err, plan_err, Result};
 
-pub static STRINGS: &[DataType] =
-    &[DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View];
-
-pub static SIGNED_INTEGERS: &[DataType] = &[
-    DataType::Int8,
-    DataType::Int16,
-    DataType::Int32,
-    DataType::Int64,
-];
-
-pub static UNSIGNED_INTEGERS: &[DataType] = &[
-    DataType::UInt8,
-    DataType::UInt16,
-    DataType::UInt32,
-    DataType::UInt64,
-];
-
+// TODO: remove usage of these (INTEGERS and NUMERICS) in favour of signatures
+//       see https://github.com/apache/datafusion/issues/18092
 pub static INTEGERS: &[DataType] = &[
     DataType::Int8,
     DataType::Int16,
@@ -65,24 +46,6 @@ pub static NUMERICS: &[DataType] = &[
     DataType::Float64,
 ];
 
-pub static TIMESTAMPS: &[DataType] = &[
-    DataType::Timestamp(TimeUnit::Second, None),
-    DataType::Timestamp(TimeUnit::Millisecond, None),
-    DataType::Timestamp(TimeUnit::Microsecond, None),
-    DataType::Timestamp(TimeUnit::Nanosecond, None),
-];
-
-pub static DATES: &[DataType] = &[DataType::Date32, DataType::Date64];
-
-pub static BINARYS: &[DataType] = &[DataType::Binary, DataType::LargeBinary];
-
-pub static TIMES: &[DataType] = &[
-    DataType::Time32(TimeUnit::Second),
-    DataType::Time32(TimeUnit::Millisecond),
-    DataType::Time64(TimeUnit::Microsecond),
-    DataType::Time64(TimeUnit::Nanosecond),
-];
-
 /// Validate the length of `input_fields` matches the `signature` for `agg_fun`.
 ///
 /// This method DOES NOT validate the argument fields - only that (at least one,
@@ -144,260 +107,3 @@ pub fn check_arg_count(
     }
     Ok(())
 }
-
-/// Function return type of a sum
-pub fn sum_return_type(arg_type: &DataType) -> Result<DataType> {
-    match arg_type {
-        DataType::Int64 => Ok(DataType::Int64),
-        DataType::UInt64 => Ok(DataType::UInt64),
-        DataType::Float64 => Ok(DataType::Float64),
-        DataType::Decimal32(precision, scale) => {
-            // in the spark, the result type is DECIMAL(min(38,precision+10), s)
-            // ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
-            let new_precision = DECIMAL32_MAX_PRECISION.min(*precision + 10);
-            Ok(DataType::Decimal32(new_precision, *scale))
-        }
-        DataType::Decimal64(precision, scale) => {
-            // in the spark, the result type is DECIMAL(min(38,precision+10), s)
-            // ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
-            let new_precision = DECIMAL64_MAX_PRECISION.min(*precision + 10);
-            Ok(DataType::Decimal64(new_precision, *scale))
-        }
-        DataType::Decimal128(precision, scale) => {
-            // In the spark, the result type is DECIMAL(min(38,precision+10), s)
-            // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
-            let new_precision = DECIMAL128_MAX_PRECISION.min(*precision + 10);
-            Ok(DataType::Decimal128(new_precision, *scale))
-        }
-        DataType::Decimal256(precision, scale) => {
-            // In the spark, the result type is DECIMAL(min(38,precision+10), s)
-            // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
-            let new_precision = DECIMAL256_MAX_PRECISION.min(*precision + 10);
-            Ok(DataType::Decimal256(new_precision, *scale))
-        }
-        other => plan_err!("SUM does not support type \"{other:?}\""),
-    }
-}
-
-/// Function return type of variance
-pub fn variance_return_type(arg_type: &DataType) -> Result<DataType> {
-    if NUMERICS.contains(arg_type) {
-        Ok(DataType::Float64)
-    } else {
-        plan_err!("VAR does not support {arg_type}")
-    }
-}
-
-/// Function return type of covariance
-pub fn covariance_return_type(arg_type: &DataType) -> Result<DataType> {
-    if NUMERICS.contains(arg_type) {
-        Ok(DataType::Float64)
-    } else {
-        plan_err!("COVAR does not support {arg_type}")
-    }
-}
-
-/// Function return type of correlation
-pub fn correlation_return_type(arg_type: &DataType) -> Result<DataType> {
-    if NUMERICS.contains(arg_type) {
-        Ok(DataType::Float64)
-    } else {
-        plan_err!("CORR does not support {arg_type}")
-    }
-}
-
-/// Function return type of an average
-pub fn avg_return_type(func_name: &str, arg_type: &DataType) -> Result<DataType> {
-    match arg_type {
-        DataType::Decimal32(precision, scale) => {
-            // In the spark, the result type is DECIMAL(min(38,precision+4), min(38,scale+4)).
-            // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
-            let new_precision = DECIMAL32_MAX_PRECISION.min(*precision + 4);
-            let new_scale = DECIMAL32_MAX_SCALE.min(*scale + 4);
-            Ok(DataType::Decimal32(new_precision, new_scale))
-        }
-        DataType::Decimal64(precision, scale) => {
-            // In the spark, the result type is DECIMAL(min(38,precision+4), min(38,scale+4)).
-            // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
-            let new_precision = DECIMAL64_MAX_PRECISION.min(*precision + 4);
-            let new_scale = DECIMAL64_MAX_SCALE.min(*scale + 4);
-            Ok(DataType::Decimal64(new_precision, new_scale))
-        }
-        DataType::Decimal128(precision, scale) => {
-            // In the spark, the result type is DECIMAL(min(38,precision+4), min(38,scale+4)).
-            // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
-            let new_precision = DECIMAL128_MAX_PRECISION.min(*precision + 4);
-            let new_scale = DECIMAL128_MAX_SCALE.min(*scale + 4);
-            Ok(DataType::Decimal128(new_precision, new_scale))
-        }
-        DataType::Decimal256(precision, scale) => {
-            // In the spark, the result type is DECIMAL(min(38,precision+4), min(38,scale+4)).
-            // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
-            let new_precision = DECIMAL256_MAX_PRECISION.min(*precision + 4);
-            let new_scale = DECIMAL256_MAX_SCALE.min(*scale + 4);
-            Ok(DataType::Decimal256(new_precision, new_scale))
-        }
-        DataType::Duration(time_unit) => Ok(DataType::Duration(*time_unit)),
-        arg_type if NUMERICS.contains(arg_type) => Ok(DataType::Float64),
-        DataType::Dictionary(_, dict_value_type) => {
-            avg_return_type(func_name, dict_value_type.as_ref())
-        }
-        other => plan_err!("{func_name} does not support {other:?}"),
-    }
-}
-
-/// Internal sum type of an average
-pub fn avg_sum_type(arg_type: &DataType) -> Result<DataType> {
-    match arg_type {
-        DataType::Decimal32(precision, scale) => {
-            // In the spark, the sum type of avg is DECIMAL(min(38,precision+10), s)
-            let new_precision = DECIMAL32_MAX_PRECISION.min(*precision + 10);
-            Ok(DataType::Decimal32(new_precision, *scale))
-        }
-        DataType::Decimal64(precision, scale) => {
-            // In the spark, the sum type of avg is DECIMAL(min(38,precision+10), s)
-            let new_precision = DECIMAL64_MAX_PRECISION.min(*precision + 10);
-            Ok(DataType::Decimal64(new_precision, *scale))
-        }
-        DataType::Decimal128(precision, scale) => {
-            // In the spark, the sum type of avg is DECIMAL(min(38,precision+10), s)
-            let new_precision = DECIMAL128_MAX_PRECISION.min(*precision + 10);
-            Ok(DataType::Decimal128(new_precision, *scale))
-        }
-        DataType::Decimal256(precision, scale) => {
-            // In Spark the sum type of avg is DECIMAL(min(38,precision+10), s)
-            let new_precision = DECIMAL256_MAX_PRECISION.min(*precision + 10);
-            Ok(DataType::Decimal256(new_precision, *scale))
-        }
-        DataType::Duration(time_unit) => Ok(DataType::Duration(*time_unit)),
-        arg_type if NUMERICS.contains(arg_type) => Ok(DataType::Float64),
-        DataType::Dictionary(_, dict_value_type) => {
-            avg_sum_type(dict_value_type.as_ref())
-        }
-        other => plan_err!("AVG does not support {other:?}"),
-    }
-}
-
-pub fn is_sum_support_arg_type(arg_type: &DataType) -> bool {
-    match arg_type {
-        DataType::Dictionary(_, dict_value_type) => {
-            is_sum_support_arg_type(dict_value_type.as_ref())
-        }
-        _ => matches!(
-            arg_type,
-            arg_type if NUMERICS.contains(arg_type)
-            || matches!(arg_type, DataType::Decimal32(_, _) | DataType::Decimal64(_, _) |DataType::Decimal128(_, _) | DataType::Decimal256(_, _))
-        ),
-    }
-}
-
-pub fn is_avg_support_arg_type(arg_type: &DataType) -> bool {
-    match arg_type {
-        DataType::Dictionary(_, dict_value_type) => {
-            is_avg_support_arg_type(dict_value_type.as_ref())
-        }
-        _ => matches!(
-            arg_type,
-            arg_type if NUMERICS.contains(arg_type)
-            || matches!(arg_type, DataType::Decimal32(_, _) | DataType::Decimal64(_, _) |DataType::Decimal128(_, _) | DataType::Decimal256(_, _))
-        ),
-    }
-}
-
-pub fn is_variance_support_arg_type(arg_type: &DataType) -> bool {
-    matches!(
-        arg_type,
-        arg_type if NUMERICS.contains(arg_type)
-    )
-}
-
-pub fn is_covariance_support_arg_type(arg_type: &DataType) -> bool {
-    matches!(
-        arg_type,
-        arg_type if NUMERICS.contains(arg_type)
-    )
-}
-
-pub fn is_correlation_support_arg_type(arg_type: &DataType) -> bool {
-    matches!(
-        arg_type,
-        arg_type if NUMERICS.contains(arg_type)
-    )
-}
-
-pub fn is_integer_arg_type(arg_type: &DataType) -> bool {
-    arg_type.is_integer()
-}
-
-pub fn coerce_avg_type(func_name: &str, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-    // Supported types smallint, int, bigint, real, double precision, decimal, or interval
-    // Refer to https://www.postgresql.org/docs/8.2/functions-aggregate.html doc
-    fn coerced_type(func_name: &str, data_type: &DataType) -> Result<DataType> {
-        match &data_type {
-            DataType::Decimal32(p, s) => Ok(DataType::Decimal32(*p, *s)),
-            DataType::Decimal64(p, s) => Ok(DataType::Decimal64(*p, *s)),
-            DataType::Decimal128(p, s) => Ok(DataType::Decimal128(*p, *s)),
-            DataType::Decimal256(p, s) => Ok(DataType::Decimal256(*p, *s)),
-            d if d.is_numeric() => Ok(DataType::Float64),
-            DataType::Duration(time_unit) => Ok(DataType::Duration(*time_unit)),
-            DataType::Dictionary(_, v) => coerced_type(func_name, v.as_ref()),
-            _ => {
-                plan_err!(
-                    "The function {:?} does not support inputs of type {}.",
-                    func_name,
-                    data_type
-                )
-            }
-        }
-    }
-    Ok(vec![coerced_type(func_name, &arg_types[0])?])
-}
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_variance_return_data_type() -> Result<()> {
-        let data_type = DataType::Float64;
-        let result_type = variance_return_type(&data_type)?;
-        assert_eq!(DataType::Float64, result_type);
-
-        let data_type = DataType::Decimal128(36, 10);
-        assert!(variance_return_type(&data_type).is_err());
-        Ok(())
-    }
-
-    #[test]
-    fn test_sum_return_data_type() -> Result<()> {
-        let data_type = DataType::Decimal128(10, 5);
-        let result_type = sum_return_type(&data_type)?;
-        assert_eq!(DataType::Decimal128(20, 5), result_type);
-
-        let data_type = DataType::Decimal128(36, 10);
-        let result_type = sum_return_type(&data_type)?;
-        assert_eq!(DataType::Decimal128(38, 10), result_type);
-        Ok(())
-    }
-
-    #[test]
-    fn test_covariance_return_data_type() -> Result<()> {
-        let data_type = DataType::Float64;
-        let result_type = covariance_return_type(&data_type)?;
-        assert_eq!(DataType::Float64, result_type);
-
-        let data_type = DataType::Decimal128(36, 10);
-        assert!(covariance_return_type(&data_type).is_err());
-        Ok(())
-    }
-
-    #[test]
-    fn test_correlation_return_data_type() -> Result<()> {
-        let data_type = DataType::Float64;
-        let result_type = correlation_return_type(&data_type)?;
-        assert_eq!(DataType::Float64, result_type);
-
-        let data_type = DataType::Decimal128(36, 10);
-        assert!(correlation_return_type(&data_type).is_err());
-        Ok(())
-    }
-}
diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs
index 52bb211d9b99..122e0f987b6f 100644
--- a/datafusion/expr-common/src/type_coercion/binary.rs
+++ b/datafusion/expr-common/src/type_coercion/binary.rs
@@ -866,6 +866,7 @@ pub fn comparison_coercion_numeric(
         return Some(lhs_type.clone());
     }
     binary_numeric_coercion(lhs_type, rhs_type)
+        .or_else(|| dictionary_comparison_coercion_numeric(lhs_type, rhs_type, true))
         .or_else(|| string_coercion(lhs_type, rhs_type))
         .or_else(|| null_coercion(lhs_type, rhs_type))
         .or_else(|| string_numeric_coercion_as_numeric(lhs_type, rhs_type))
@@ -1353,38 +1354,75 @@ fn both_numeric_or_null_and_numeric(lhs_type: &DataType, rhs_type: &DataType) ->
     }
 }
 
-/// Coercion rules for Dictionaries: the type that both lhs and rhs
+/// Generic coercion rules for Dictionaries: the type that both lhs and rhs
 /// can be casted to for the purpose of a computation.
 ///
 /// Not all operators support dictionaries, if `preserve_dictionaries` is true
-/// dictionaries will be preserved if possible
-fn dictionary_comparison_coercion(
+/// dictionaries will be preserved if possible.
+///
+/// The `coerce_fn` parameter determines which comparison coercion function to use
+/// for comparing the dictionary value types.
+fn dictionary_comparison_coercion_generic(
     lhs_type: &DataType,
     rhs_type: &DataType,
     preserve_dictionaries: bool,
+    coerce_fn: fn(&DataType, &DataType) -> Option<DataType>,
 ) -> Option<DataType> {
     use arrow::datatypes::DataType::*;
     match (lhs_type, rhs_type) {
         (
             Dictionary(_lhs_index_type, lhs_value_type),
             Dictionary(_rhs_index_type, rhs_value_type),
-        ) => comparison_coercion(lhs_value_type, rhs_value_type),
+        ) => coerce_fn(lhs_value_type, rhs_value_type),
         (d @ Dictionary(_, value_type), other_type)
         | (other_type, d @ Dictionary(_, value_type))
             if preserve_dictionaries && value_type.as_ref() == other_type =>
         {
             Some(d.clone())
         }
-        (Dictionary(_index_type, value_type), _) => {
-            comparison_coercion(value_type, rhs_type)
-        }
-        (_, Dictionary(_index_type, value_type)) => {
-            comparison_coercion(lhs_type, value_type)
-        }
+        (Dictionary(_index_type, value_type), _) => coerce_fn(value_type, rhs_type),
+        (_, Dictionary(_index_type, value_type)) => coerce_fn(lhs_type, value_type),
         _ => None,
     }
 }
 
+/// Coercion rules for Dictionaries: the type that both lhs and rhs
+/// can be casted to for the purpose of a computation.
+///
+/// Not all operators support dictionaries, if `preserve_dictionaries` is true
+/// dictionaries will be preserved if possible
+fn dictionary_comparison_coercion(
+    lhs_type: &DataType,
+    rhs_type: &DataType,
+    preserve_dictionaries: bool,
+) -> Option<DataType> {
+    dictionary_comparison_coercion_generic(
+        lhs_type,
+        rhs_type,
+        preserve_dictionaries,
+        comparison_coercion,
+    )
+}
+
+/// Coercion rules for Dictionaries with numeric preference: similar to
+/// [`dictionary_comparison_coercion`] but uses [`comparison_coercion_numeric`]
+/// which prefers numeric types over strings when both are present.
+///
+/// This is used by [`comparison_coercion_numeric`] to maintain consistent
+/// numeric-preferring semantics when dealing with dictionary types.
+fn dictionary_comparison_coercion_numeric(
+    lhs_type: &DataType,
+    rhs_type: &DataType,
+    preserve_dictionaries: bool,
+) -> Option<DataType> {
+    dictionary_comparison_coercion_generic(
+        lhs_type,
+        rhs_type,
+        preserve_dictionaries,
+        comparison_coercion_numeric,
+    )
+}
+
 /// Coercion rules for string concat.
 /// This is a union of string coercion rules and specified rules:
 /// 1. At least one side of lhs and rhs should be string type (Utf8 / LargeUtf8)
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 282b3f6a0f55..6077b3c1e5bb 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -18,7 +18,7 @@
 //! Logical Expressions: [`Expr`]
 
 use std::cmp::Ordering;
-use std::collections::{BTreeMap, HashSet};
+use std::collections::HashSet;
 use std::fmt::{self, Display, Formatter, Write};
 use std::hash::{Hash, Hasher};
 use std::mem;
@@ -45,6 +45,10 @@ use sqlparser::ast::{
     RenameSelectItem, ReplaceSelectElement,
 };
 
+// Moved in 51.0.0 to datafusion_common
+pub use datafusion_common::metadata::FieldMetadata;
+use datafusion_common::metadata::ScalarAndMetadata;
+
 // This mirrors sqlparser::ast::NullTreatment but we need our own variant
 // for when the sql feature is disabled.
 #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd)]
@@ -421,6 +425,14 @@ impl From<WindowFunction> for Expr {
     }
 }
 
+/// Create an [`Expr`] from an [`ScalarAndMetadata`]
+impl From<ScalarAndMetadata> for Expr {
+    fn from(value: ScalarAndMetadata) -> Self {
+        let (value, metadata) = value.into_inner();
+        Expr::Literal(value, metadata)
+    }
+}
+
 /// Create an [`Expr`] from an optional qualifier and a [`FieldRef`]. This is
 /// useful for creating [`Expr`] from a [`DFSchema`].
 ///
@@ -447,235 +459,6 @@ impl<'a> TreeNodeContainer<'a, Self> for Expr {
     }
 }
 
-/// Literal metadata
-///
-/// Stores metadata associated with a literal expressions
-/// and is designed to be fast to `clone`.
-///
-/// This structure is used to store metadata associated with a literal expression, and it
-/// corresponds to the `metadata` field on [`Field`].
-///
-/// # Example: Create [`FieldMetadata`] from a [`Field`]
-/// ```
-/// # use std::collections::HashMap;
-/// # use datafusion_expr::expr::FieldMetadata;
-/// # use arrow::datatypes::{Field, DataType};
-/// # let field = Field::new("c1", DataType::Int32, true)
-/// #  .with_metadata(HashMap::from([("foo".to_string(), "bar".to_string())]));
-/// // Create a new `FieldMetadata` instance from a `Field`
-/// let metadata = FieldMetadata::new_from_field(&field);
-/// // There is also a `From` impl:
-/// let metadata = FieldMetadata::from(&field);
-/// ```
-///
-/// # Example: Update a [`Field`] with [`FieldMetadata`]
-/// ```
-/// # use datafusion_expr::expr::FieldMetadata;
-/// # use arrow::datatypes::{Field, DataType};
-/// # let field = Field::new("c1", DataType::Int32, true);
-/// # let metadata = FieldMetadata::new_from_field(&field);
-/// // Add any metadata from `FieldMetadata` to `Field`
-/// let updated_field = metadata.add_to_field(field);
-/// ```
-///
-#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
-pub struct FieldMetadata {
-    /// The inner metadata of a literal expression, which is a map of string
-    /// keys to string values.
-    ///
-    /// Note this is not a `HashMap` because `HashMap` does not provide
-    /// implementations for traits like `Debug` and `Hash`.
-    inner: Arc<BTreeMap<String, String>>,
-}
-
-impl Default for FieldMetadata {
-    fn default() -> Self {
-        Self::new_empty()
-    }
-}
-
-impl FieldMetadata {
-    /// Create a new empty metadata instance.
-    pub fn new_empty() -> Self {
-        Self {
-            inner: Arc::new(BTreeMap::new()),
-        }
-    }
-
-    /// Merges two optional `FieldMetadata` instances, overwriting any existing
-    /// keys in `m` with keys from `n` if present.
-    ///
-    /// This function is commonly used in alias operations, particularly for literals
-    /// with metadata. When creating an alias expression, the metadata from the original
-    /// expression (such as a literal) is combined with any metadata specified on the alias.
-    ///
-    /// # Arguments
-    ///
-    /// * `m` - The first metadata (typically from the original expression like a literal)
-    /// * `n` - The second metadata (typically from the alias definition)
-    ///
-    /// # Merge Strategy
-    ///
-    /// - If both metadata instances exist, they are merged with `n` taking precedence
-    /// - Keys from `n` will overwrite keys from `m` if they have the same name
-    /// - If only one metadata instance exists, it is returned unchanged
-    /// - If neither exists, `None` is returned
-    ///
-    /// # Example usage
-    /// ```rust
-    /// use datafusion_expr::expr::FieldMetadata;
-    /// use std::collections::BTreeMap;
-    ///
-    /// // Create metadata for a literal expression
-    /// let literal_metadata = Some(FieldMetadata::from(BTreeMap::from([
-    ///     ("source".to_string(), "constant".to_string()),
-    ///     ("type".to_string(), "int".to_string()),
-    /// ])));
-    ///
-    /// // Create metadata for an alias
-    /// let alias_metadata = Some(FieldMetadata::from(BTreeMap::from([
-    ///     ("description".to_string(), "answer".to_string()),
-    ///     ("source".to_string(), "user".to_string()), // This will override literal's "source"
-    /// ])));
-    ///
-    /// // Merge the metadata
-    /// let merged = FieldMetadata::merge_options(
-    ///     literal_metadata.as_ref(),
-    ///     alias_metadata.as_ref(),
-    /// );
-    ///
-    /// // Result contains: {"source": "user", "type": "int", "description": "answer"}
-    /// assert!(merged.is_some());
-    /// ```
-    pub fn merge_options(
-        m: Option<&FieldMetadata>,
-        n: Option<&FieldMetadata>,
-    ) -> Option<FieldMetadata> {
-        match (m, n) {
-            (Some(m), Some(n)) => {
-                let mut merged = m.clone();
-                merged.extend(n.clone());
-                Some(merged)
-            }
-            (Some(m), None) => Some(m.clone()),
-            (None, Some(n)) => Some(n.clone()),
-            (None, None) => None,
-        }
-    }
-
-    /// Create a new metadata instance from a `Field`'s metadata.
-    pub fn new_from_field(field: &Field) -> Self {
-        let inner = field
-            .metadata()
-            .iter()
-            .map(|(k, v)| (k.to_string(), v.to_string()))
-            .collect();
-        Self {
-            inner: Arc::new(inner),
-        }
-    }
-
-    /// Create a new metadata instance from a map of string keys to string values.
-    pub fn new(inner: BTreeMap<String, String>) -> Self {
-        Self {
-            inner: Arc::new(inner),
-        }
-    }
-
-    /// Get the inner metadata as a reference to a `BTreeMap`.
-    pub fn inner(&self) -> &BTreeMap<String, String> {
-        &self.inner
-    }
-
-    /// Return the inner metadata
-    pub fn into_inner(self) -> Arc<BTreeMap<String, String>> {
-        self.inner
-    }
-
-    /// Adds metadata from `other` into `self`, overwriting any existing keys.
-    pub fn extend(&mut self, other: Self) {
-        if other.is_empty() {
-            return;
-        }
-        let other = Arc::unwrap_or_clone(other.into_inner());
-        Arc::make_mut(&mut self.inner).extend(other);
-    }
-
-    /// Returns true if the metadata is empty.
-    pub fn is_empty(&self) -> bool {
-        self.inner.is_empty()
-    }
-
-    /// Returns the number of key-value pairs in the metadata.
-    pub fn len(&self) -> usize {
-        self.inner.len()
-    }
-
-    /// Convert this `FieldMetadata` into a `HashMap<String, String>`
-    pub fn to_hashmap(&self) -> std::collections::HashMap<String, String> {
-        self.inner
-            .iter()
-            .map(|(k, v)| (k.to_string(), v.to_string()))
-            .collect()
-    }
-
-    /// Updates the metadata on the Field with this metadata, if it is not empty.
-    pub fn add_to_field(&self, field: Field) -> Field {
-        if self.inner.is_empty() {
-            return field;
-        }
-
-        field.with_metadata(self.to_hashmap())
-    }
-}
-
-impl From<&Field> for FieldMetadata {
-    fn from(field: &Field) -> Self {
-        Self::new_from_field(field)
-    }
-}
-
-impl From<BTreeMap<String, String>> for FieldMetadata {
-    fn from(inner: BTreeMap<String, String>) -> Self {
-        Self::new(inner)
-    }
-}
-
-impl From<std::collections::HashMap<String, String>> for FieldMetadata {
-    fn from(map: std::collections::HashMap<String, String>) -> Self {
-        Self::new(map.into_iter().collect())
-    }
-}
-
-/// From reference
-impl From<&std::collections::HashMap<String, String>> for FieldMetadata {
-    fn from(map: &std::collections::HashMap<String, String>) -> Self {
-        let inner = map
-            .iter()
-            .map(|(k, v)| (k.to_string(), v.to_string()))
-            .collect();
-        Self::new(inner)
-    }
-}
-
-/// From hashbrown map
-impl From<HashMap<String, String>> for FieldMetadata {
-    fn from(map: HashMap<String, String>) -> Self {
-        let inner = map.into_iter().collect();
-        Self::new(inner)
-    }
-}
-
-impl From<&HashMap<String, String>> for FieldMetadata {
-    fn from(map: &HashMap<String, String>) -> Self {
-        let inner = map
-            .into_iter()
-            .map(|(k, v)| (k.to_string(), v.to_string()))
-            .collect();
-        Self::new(inner)
-    }
-}
-
 /// The metadata used in [`Field::metadata`].
 ///
 /// This represents the metadata associated with an Arrow [`Field`]. The metadata consists of key-value pairs.
@@ -1370,13 +1153,22 @@ pub struct Placeholder {
     /// The identifier of the parameter, including the leading `$` (e.g, `"$1"` or `"$foo"`)
     pub id: String,
     /// The type the parameter will be filled in with
-    pub data_type: Option<DataType>,
+    pub field: Option<FieldRef>,
 }
 
 impl Placeholder {
     /// Create a new Placeholder expression
+    #[deprecated(since = "51.0.0", note = "Use new_with_field instead")]
     pub fn new(id: String, data_type: Option<DataType>) -> Self {
-        Self { id, data_type }
+        Self {
+            id,
+            field: data_type.map(|dt| Arc::new(Field::new("", dt, true))),
+        }
+    }
+
+    /// Create a new Placeholder expression from a Field
+    pub fn new_with_field(id: String, field: Option<FieldRef>) -> Self {
+        Self { id, field }
     }
 }
 
@@ -1843,7 +1635,7 @@ impl Expr {
     /// ```
     /// # use datafusion_expr::col;
     /// # use std::collections::HashMap;
-    /// # use datafusion_expr::expr::FieldMetadata;
+    /// # use datafusion_common::metadata::FieldMetadata;
     /// let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
     /// let metadata = FieldMetadata::from(metadata);
     /// let expr = col("foo").alias_with_metadata("bar", Some(metadata));
@@ -1875,7 +1667,7 @@ impl Expr {
     /// ```
     /// # use datafusion_expr::col;
     /// # use std::collections::HashMap;
-    /// # use datafusion_expr::expr::FieldMetadata;
+    /// # use datafusion_common::metadata::FieldMetadata;
     /// let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
     /// let metadata = FieldMetadata::from(metadata);
     /// let expr = col("foo").alias_qualified_with_metadata(Some("tbl"), "bar", Some(metadata));
@@ -2886,19 +2678,23 @@ impl HashNode for Expr {
     }
 }
 
-// Modifies expr if it is a placeholder with datatype of right
+// Modifies expr to match the DataType, metadata, and nullability of other if it is
+// a placeholder with previously unspecified type information (i.e., most placeholders)
 fn rewrite_placeholder(expr: &mut Expr, other: &Expr, schema: &DFSchema) -> Result<()> {
-    if let Expr::Placeholder(Placeholder { id: _, data_type }) = expr {
-        if data_type.is_none() {
-            let other_dt = other.get_type(schema);
-            match other_dt {
+    if let Expr::Placeholder(Placeholder { id: _, field }) = expr {
+        if field.is_none() {
+            let other_field = other.to_field(schema);
+            match other_field {
                 Err(e) => {
                     Err(e.context(format!(
                         "Can not find type of {other} needed to infer type of {expr}"
                     )))?;
                 }
-                Ok(dt) => {
-                    *data_type = Some(dt);
+                Ok((_, other_field)) => {
+                    // We can't infer the nullability of the future parameter that might
+                    // be bound, so ensure this is set to true
+                    *field =
+                        Some(other_field.as_ref().clone().with_nullable(true).into());
                 }
             }
         };
@@ -3715,8 +3511,8 @@ pub fn physical_name(expr: &Expr) -> Result<String> {
 mod test {
     use crate::expr_fn::col;
     use crate::{
-        case, lit, qualified_wildcard, wildcard, wildcard_with_options, ColumnarValue,
-        ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Volatility,
+        case, lit, placeholder, qualified_wildcard, wildcard, wildcard_with_options,
+        ColumnarValue, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Volatility,
     };
     use arrow::datatypes::{Field, Schema};
     use sqlparser::ast;
@@ -3730,15 +3526,15 @@ mod test {
         let param_placeholders = vec![
             Expr::Placeholder(Placeholder {
                 id: "$1".to_string(),
-                data_type: None,
+                field: None,
             }),
             Expr::Placeholder(Placeholder {
                 id: "$2".to_string(),
-                data_type: None,
+                field: None,
             }),
             Expr::Placeholder(Placeholder {
                 id: "$3".to_string(),
-                data_type: None,
+                field: None,
             }),
         ];
         let in_list = Expr::InList(InList {
@@ -3764,8 +3560,8 @@ mod test {
                     match expr {
                         Expr::Placeholder(placeholder) => {
                             assert_eq!(
-                                placeholder.data_type,
-                                Some(DataType::Int32),
+                                placeholder.field.unwrap().data_type(),
+                                &DataType::Int32,
                                 "Placeholder {} should infer Int32",
                                 placeholder.id
                             );
@@ -3789,7 +3585,7 @@ mod test {
             expr: Box::new(col("name")),
             pattern: Box::new(Expr::Placeholder(Placeholder {
                 id: "$1".to_string(),
-                data_type: None,
+                field: None,
             })),
             negated: false,
             case_insensitive: false,
@@ -3802,7 +3598,7 @@ mod test {
         match inferred_expr {
             Expr::Like(like) => match *like.pattern {
                 Expr::Placeholder(placeholder) => {
-                    assert_eq!(placeholder.data_type, Some(DataType::Utf8));
+                    assert_eq!(placeholder.field.unwrap().data_type(), &DataType::Utf8);
                 }
                 _ => panic!("Expected Placeholder"),
             },
@@ -3817,8 +3613,8 @@ mod test {
             Expr::SimilarTo(like) => match *like.pattern {
                 Expr::Placeholder(placeholder) => {
                     assert_eq!(
-                        placeholder.data_type,
-                        Some(DataType::Utf8),
+                        placeholder.field.unwrap().data_type(),
+                        &DataType::Utf8,
                         "Placeholder {} should infer Utf8",
                         placeholder.id
                     );
@@ -3829,6 +3625,39 @@ mod test {
         }
     }
 
+    #[test]
+    fn infer_placeholder_with_metadata() {
+        // name == $1, where name is a non-nullable string
+        let schema =
+            Arc::new(Schema::new(vec![Field::new("name", DataType::Utf8, false)
+                .with_metadata(
+                    [("some_key".to_string(), "some_value".to_string())].into(),
+                )]));
+        let df_schema = DFSchema::try_from(schema).unwrap();
+
+        let expr = binary_expr(col("name"), Operator::Eq, placeholder("$1"));
+
+        let (inferred_expr, _) = expr.infer_placeholder_types(&df_schema).unwrap();
+        match inferred_expr {
+            Expr::BinaryExpr(BinaryExpr { right, .. }) => match *right {
+                Expr::Placeholder(placeholder) => {
+                    assert_eq!(
+                        placeholder.field.as_ref().unwrap().data_type(),
+                        &DataType::Utf8
+                    );
+                    assert_eq!(
+                        placeholder.field.as_ref().unwrap().metadata(),
+                        df_schema.field(0).metadata()
+                    );
+                    // Inferred placeholder should still be nullable
+                    assert!(placeholder.field.as_ref().unwrap().is_nullable());
+                }
+                _ => panic!("Expected Placeholder"),
+            },
+            _ => panic!("Expected BinaryExpr"),
+        }
+    }
+
     #[test]
     fn format_case_when() -> Result<()> {
         let expr = case(col("a"))
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 4666411dd540..c777c4978f99 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -119,13 +119,13 @@ pub fn ident(name: impl Into<String>) -> Expr {
 ///
 /// ```rust
 /// # use datafusion_expr::{placeholder};
-/// let p = placeholder("$0"); // $0, refers to parameter 1
-/// assert_eq!(p.to_string(), "$0")
+/// let p = placeholder("$1"); // $1, refers to parameter 1
+/// assert_eq!(p.to_string(), "$1")
 /// ```
 pub fn placeholder(id: impl Into<String>) -> Expr {
     Expr::Placeholder(Placeholder {
         id: id.into(),
-        data_type: None,
+        field: None,
     })
 }
 
diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs
index e803e3534130..8c557a5630f0 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -17,8 +17,8 @@
 
 use super::{Between, Expr, Like};
 use crate::expr::{
-    AggregateFunction, AggregateFunctionParams, Alias, BinaryExpr, Cast, FieldMetadata,
-    InList, InSubquery, Placeholder, ScalarFunction, TryCast, Unnest, WindowFunction,
+    AggregateFunction, AggregateFunctionParams, Alias, BinaryExpr, Cast, InList,
+    InSubquery, Placeholder, ScalarFunction, TryCast, Unnest, WindowFunction,
     WindowFunctionParams,
 };
 use crate::type_coercion::functions::{
@@ -28,6 +28,7 @@ use crate::udf::ReturnFieldArgs;
 use crate::{utils, LogicalPlan, Projection, Subquery, WindowFunctionDefinition};
 use arrow::compute::can_cast_types;
 use arrow::datatypes::{DataType, Field, FieldRef};
+use datafusion_common::metadata::FieldMetadata;
 use datafusion_common::{
     not_impl_err, plan_datafusion_err, plan_err, Column, DataFusionError, ExprSchema,
     Result, Spans, TableReference,
@@ -104,9 +105,9 @@ impl ExprSchemable for Expr {
     fn get_type(&self, schema: &dyn ExprSchema) -> Result<DataType> {
         match self {
             Expr::Alias(Alias { expr, name, .. }) => match &**expr {
-                Expr::Placeholder(Placeholder { data_type, .. }) => match &data_type {
+                Expr::Placeholder(Placeholder { field, .. }) => match &field {
                     None => schema.data_type(&Column::from_name(name)).cloned(),
-                    Some(dt) => Ok(dt.clone()),
+                    Some(field) => Ok(field.data_type().clone()),
                 },
                 _ => expr.get_type(schema),
             },
@@ -211,9 +212,9 @@ impl ExprSchemable for Expr {
             )
             .get_result_type(),
             Expr::Like { .. } | Expr::SimilarTo { .. } => Ok(DataType::Boolean),
-            Expr::Placeholder(Placeholder { data_type, .. }) => {
-                if let Some(dtype) = data_type {
-                    Ok(dtype.clone())
+            Expr::Placeholder(Placeholder { field, .. }) => {
+                if let Some(field) = field {
+                    Ok(field.data_type().clone())
                 } else {
                     // If the placeholder's type hasn't been specified, treat it as
                     // null (unspecified placeholders generate an error during planning)
@@ -309,10 +310,12 @@ impl ExprSchemable for Expr {
                     window_function,
                 )
                 .map(|(_, nullable)| nullable),
-            Expr::ScalarVariable(_, _)
-            | Expr::TryCast { .. }
-            | Expr::Unnest(_)
-            | Expr::Placeholder(_) => Ok(true),
+            Expr::Placeholder(Placeholder { id: _, field }) => {
+                Ok(field.as_ref().map(|f| f.is_nullable()).unwrap_or(true))
+            }
+            Expr::ScalarVariable(_, _) | Expr::TryCast { .. } | Expr::Unnest(_) => {
+                Ok(true)
+            }
             Expr::IsNull(_)
             | Expr::IsNotNull(_)
             | Expr::IsTrue(_)
@@ -428,25 +431,11 @@ impl ExprSchemable for Expr {
         let field = match self {
             Expr::Alias(Alias {
                 expr,
-                name,
+                name: _,
                 metadata,
                 ..
             }) => {
-                let field = match &**expr {
-                    Expr::Placeholder(Placeholder { data_type, .. }) => {
-                        match &data_type {
-                            None => schema
-                                .data_type_and_nullable(&Column::from_name(name))
-                                .map(|(d, n)| Field::new(&schema_name, d.clone(), n)),
-                            Some(dt) => Ok(Field::new(
-                                &schema_name,
-                                dt.clone(),
-                                expr.nullable(schema)?,
-                            )),
-                        }
-                    }
-                    _ => expr.to_field(schema).map(|(_, f)| f.as_ref().clone()),
-                }?;
+                let field = expr.to_field(schema).map(|(_, f)| f.as_ref().clone())?;
 
                 let mut combined_metadata = expr.metadata(schema)?;
                 if let Some(metadata) = metadata {
@@ -594,6 +583,10 @@ impl ExprSchemable for Expr {
                 .to_field(schema)
                 .map(|(_, f)| f.as_ref().clone().with_data_type(data_type.clone()))
                 .map(Arc::new),
+            Expr::Placeholder(Placeholder {
+                id: _,
+                field: Some(field),
+            }) => Ok(field.as_ref().clone().with_name(&schema_name).into()),
             Expr::Like(_)
             | Expr::SimilarTo(_)
             | Expr::Not(_)
@@ -776,10 +769,12 @@ pub fn cast_subquery(subquery: Subquery, cast_to_type: &DataType) -> Result<Subq
 
 #[cfg(test)]
 mod tests {
+    use std::collections::HashMap;
+
     use super::*;
     use crate::{col, lit, out_ref_col_with_metadata};
 
-    use datafusion_common::{internal_err, DFSchema, HashMap, ScalarValue};
+    use datafusion_common::{internal_err, DFSchema, ScalarValue};
 
     macro_rules! test_is_expr_nullable {
         ($EXPR_TYPE:ident) => {{
@@ -905,7 +900,7 @@ mod tests {
 
         let schema = DFSchema::from_unqualified_fields(
             vec![meta.add_to_field(Field::new("foo", DataType::Int32, true))].into(),
-            std::collections::HashMap::new(),
+            HashMap::new(),
         )
         .unwrap();
 
@@ -921,6 +916,52 @@ mod tests {
         assert_eq!(meta, outer_ref.metadata(&schema).unwrap());
     }
 
+    #[test]
+    fn test_expr_placeholder() {
+        let schema = MockExprSchema::new();
+
+        let mut placeholder_meta = HashMap::new();
+        placeholder_meta.insert("bar".to_string(), "buzz".to_string());
+        let placeholder_meta = FieldMetadata::from(placeholder_meta);
+
+        let expr = Expr::Placeholder(Placeholder::new_with_field(
+            "".to_string(),
+            Some(
+                Field::new("", DataType::Utf8, true)
+                    .with_metadata(placeholder_meta.to_hashmap())
+                    .into(),
+            ),
+        ));
+
+        assert_eq!(
+            expr.data_type_and_nullable(&schema).unwrap(),
+            (DataType::Utf8, true)
+        );
+        assert_eq!(placeholder_meta, expr.metadata(&schema).unwrap());
+
+        let expr_alias = expr.alias("a placeholder by any other name");
+        assert_eq!(
+            expr_alias.data_type_and_nullable(&schema).unwrap(),
+            (DataType::Utf8, true)
+        );
+        assert_eq!(placeholder_meta, expr_alias.metadata(&schema).unwrap());
+
+        // Non-nullable placeholder field should remain non-nullable
+        let expr = Expr::Placeholder(Placeholder::new_with_field(
+            "".to_string(),
+            Some(Field::new("", DataType::Utf8, false).into()),
+        ));
+        assert_eq!(
+            expr.data_type_and_nullable(&schema).unwrap(),
+            (DataType::Utf8, false)
+        );
+        let expr_alias = expr.alias("a placeholder by any other name");
+        assert_eq!(
+            expr_alias.data_type_and_nullable(&schema).unwrap(),
+            (DataType::Utf8, false)
+        );
+    }
+
     #[derive(Debug)]
     struct MockExprSchema {
         field: Field,
diff --git a/datafusion/expr/src/literal.rs b/datafusion/expr/src/literal.rs
index c4bd43bc0a62..335d7b471f5f 100644
--- a/datafusion/expr/src/literal.rs
+++ b/datafusion/expr/src/literal.rs
@@ -17,9 +17,8 @@
 
 //! Literal module contains foundational types that are used to represent literals in DataFusion.
 
-use crate::expr::FieldMetadata;
 use crate::Expr;
-use datafusion_common::ScalarValue;
+use datafusion_common::{metadata::FieldMetadata, ScalarValue};
 
 /// Create a literal expression
 pub fn lit<T: Literal>(n: T) -> Expr {
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index 7a283b0420d3..a430add3f786 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -25,7 +25,7 @@ use std::iter::once;
 use std::sync::Arc;
 
 use crate::dml::CopyTo;
-use crate::expr::{Alias, FieldMetadata, PlannedReplaceSelectItem, Sort as SortExpr};
+use crate::expr::{Alias, PlannedReplaceSelectItem, Sort as SortExpr};
 use crate::expr_rewriter::{
     coerce_plan_expr_for_schema, normalize_col,
     normalize_col_with_schemas_and_ambiguity_check, normalize_cols, normalize_sorts,
@@ -50,9 +50,10 @@ use crate::{
 
 use super::dml::InsertOp;
 use arrow::compute::can_cast_types;
-use arrow::datatypes::{DataType, Field, Fields, Schema, SchemaRef};
+use arrow::datatypes::{DataType, Field, FieldRef, Fields, Schema, SchemaRef};
 use datafusion_common::display::ToStringifiedPlan;
 use datafusion_common::file_options::file_type::FileType;
+use datafusion_common::metadata::FieldMetadata;
 use datafusion_common::{
     exec_err, get_target_functional_dependencies, internal_datafusion_err, not_impl_err,
     plan_datafusion_err, plan_err, Column, Constraints, DFSchema, DFSchemaRef,
@@ -622,11 +623,11 @@ impl LogicalPlanBuilder {
     }
 
     /// Make a builder for a prepare logical plan from the builder's plan
-    pub fn prepare(self, name: String, data_types: Vec<DataType>) -> Result<Self> {
+    pub fn prepare(self, name: String, fields: Vec<FieldRef>) -> Result<Self> {
         Ok(Self::new(LogicalPlan::Statement(Statement::Prepare(
             Prepare {
                 name,
-                data_types,
+                fields,
                 input: self.plan,
             },
         ))))
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index b8200ab8a48c..9541f35e3062 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -51,9 +51,10 @@ use crate::{
     WindowFunctionDefinition,
 };
 
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+use arrow::datatypes::{DataType, Field, FieldRef, Schema, SchemaRef};
 use datafusion_common::cse::{NormalizeEq, Normalizeable};
 use datafusion_common::format::ExplainFormat;
+use datafusion_common::metadata::check_metadata_with_storage_equal;
 use datafusion_common::tree_node::{
     Transformed, TreeNode, TreeNodeContainer, TreeNodeRecursion,
 };
@@ -1098,15 +1099,13 @@ impl LogicalPlan {
                 }))
             }
             LogicalPlan::Statement(Statement::Prepare(Prepare {
-                name,
-                data_types,
-                ..
+                name, fields, ..
             })) => {
                 self.assert_no_expressions(expr)?;
                 let input = self.only_input(inputs)?;
                 Ok(LogicalPlan::Statement(Statement::Prepare(Prepare {
                     name: name.clone(),
-                    data_types: data_types.clone(),
+                    fields: fields.clone(),
                     input: Arc::new(input),
                 })))
             }
@@ -1282,7 +1281,7 @@ impl LogicalPlan {
             if let LogicalPlan::Statement(Statement::Prepare(prepare_lp)) =
                 plan_with_values
             {
-                param_values.verify(&prepare_lp.data_types)?;
+                param_values.verify_fields(&prepare_lp.fields)?;
                 // try and take ownership of the input if is not shared, clone otherwise
                 Arc::unwrap_or_clone(prepare_lp.input)
             } else {
@@ -1463,8 +1462,10 @@ impl LogicalPlan {
                     let original_name = name_preserver.save(&e);
                     let transformed_expr = e.transform_up(|e| {
                         if let Expr::Placeholder(Placeholder { id, .. }) = e {
-                            let value = param_values.get_placeholders_with_values(&id)?;
-                            Ok(Transformed::yes(Expr::Literal(value, None)))
+                            let (value, metadata) = param_values
+                                .get_placeholders_with_values(&id)?
+                                .into_inner();
+                            Ok(Transformed::yes(Expr::Literal(value, metadata)))
                         } else {
                             Ok(Transformed::no(e))
                         }
@@ -1494,24 +1495,43 @@ impl LogicalPlan {
     }
 
     /// Walk the logical plan, find any `Placeholder` tokens, and return a map of their IDs and DataTypes
+    ///
+    /// Note that this will drop any extension or field metadata attached to parameters. Use
+    /// [`LogicalPlan::get_parameter_fields`] to keep extension metadata.
     pub fn get_parameter_types(
         &self,
     ) -> Result<HashMap<String, Option<DataType>>, DataFusionError> {
-        let mut param_types: HashMap<String, Option<DataType>> = HashMap::new();
+        let mut parameter_fields = self.get_parameter_fields()?;
+        Ok(parameter_fields
+            .drain()
+            .map(|(name, maybe_field)| {
+                (name, maybe_field.map(|field| field.data_type().clone()))
+            })
+            .collect())
+    }
+
+    /// Walk the logical plan, find any `Placeholder` tokens, and return a map of their IDs and FieldRefs
+    pub fn get_parameter_fields(
+        &self,
+    ) -> Result<HashMap<String, Option<FieldRef>>, DataFusionError> {
+        let mut param_types: HashMap<String, Option<FieldRef>> = HashMap::new();
 
         self.apply_with_subqueries(|plan| {
             plan.apply_expressions(|expr| {
                 expr.apply(|expr| {
-                    if let Expr::Placeholder(Placeholder { id, data_type }) = expr {
+                    if let Expr::Placeholder(Placeholder { id, field }) = expr {
                         let prev = param_types.get(id);
-                        match (prev, data_type) {
-                            (Some(Some(prev)), Some(dt)) => {
-                                if prev != dt {
-                                    plan_err!("Conflicting types for {id}")?;
-                                }
+                        match (prev, field) {
+                            (Some(Some(prev)), Some(field)) => {
+                                check_metadata_with_storage_equal(
+                                    (field.data_type(), Some(field.metadata())),
+                                    (prev.data_type(), Some(prev.metadata())),
+                                    "parameter",
+                                    &format!(": Conflicting types for id {id}"),
+                                )?;
                             }
-                            (_, Some(dt)) => {
-                                param_types.insert(id.clone(), Some(dt.clone()));
+                            (_, Some(field)) => {
+                                param_types.insert(id.clone(), Some(Arc::clone(field)));
                             }
                             _ => {
                                 param_types.insert(id.clone(), None);
@@ -2753,7 +2773,8 @@ pub struct Union {
 
 impl Union {
     /// Constructs new Union instance deriving schema from inputs.
-    fn try_new(inputs: Vec<Arc<LogicalPlan>>) -> Result<Self> {
+    /// Schema data types must match exactly.
+    pub fn try_new(inputs: Vec<Arc<LogicalPlan>>) -> Result<Self> {
         let schema = Self::derive_schema_from_inputs(&inputs, false, false)?;
         Ok(Union { inputs, schema })
     }
@@ -4230,6 +4251,7 @@ mod tests {
         binary_expr, col, exists, in_subquery, lit, placeholder, scalar_subquery,
         GroupingSet,
     };
+    use datafusion_common::metadata::ScalarAndMetadata;
     use datafusion_common::tree_node::{
         TransformedResult, TreeNodeRewriter, TreeNodeVisitor,
     };
@@ -4770,6 +4792,38 @@ mod tests {
             .expect_err("unexpectedly succeeded to replace an invalid placeholder");
     }
 
+    #[test]
+    fn test_replace_placeholder_mismatched_metadata() {
+        let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]);
+
+        // Create a prepared statement with explicit fields that do not have metadata
+        let plan = table_scan(TableReference::none(), &schema, None)
+            .unwrap()
+            .filter(col("id").eq(placeholder("$1")))
+            .unwrap()
+            .build()
+            .unwrap();
+        let prepared_builder = LogicalPlanBuilder::new(plan)
+            .prepare(
+                "".to_string(),
+                vec![Field::new("", DataType::Int32, true).into()],
+            )
+            .unwrap();
+
+        // Attempt to bind a parameter with metadata
+        let mut scalar_meta = HashMap::new();
+        scalar_meta.insert("some_key".to_string(), "some_value".to_string());
+        let param_values = ParamValues::List(vec![ScalarAndMetadata::new(
+            ScalarValue::Int32(Some(42)),
+            Some(scalar_meta.into()),
+        )]);
+        prepared_builder
+            .plan()
+            .clone()
+            .with_param_values(param_values)
+            .expect_err("prepared field metadata mismatch unexpectedly succeeded");
+    }
+
     #[test]
     fn test_nullable_schema_after_grouping_set() {
         let schema = Schema::new(vec![
@@ -5142,7 +5196,7 @@ mod tests {
             .unwrap();
 
         // Check that the placeholder parameters have not received a DataType.
-        let params = plan.get_parameter_types().unwrap();
+        let params = plan.get_parameter_fields().unwrap();
         assert_eq!(params.len(), 1);
 
         let parameter_type = params.clone().get(placeholder_value).unwrap().clone();
diff --git a/datafusion/expr/src/logical_plan/statement.rs b/datafusion/expr/src/logical_plan/statement.rs
index 6d3fe9fa75ac..bfc6b53d1136 100644
--- a/datafusion/expr/src/logical_plan/statement.rs
+++ b/datafusion/expr/src/logical_plan/statement.rs
@@ -15,7 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::datatypes::DataType;
+use arrow::datatypes::FieldRef;
+use datafusion_common::metadata::format_type_and_metadata;
 use datafusion_common::{DFSchema, DFSchemaRef};
 use itertools::Itertools as _;
 use std::fmt::{self, Display};
@@ -108,10 +109,18 @@ impl Statement {
                     }) => {
                         write!(f, "SetVariable: set {variable:?} to {value:?}")
                     }
-                    Statement::Prepare(Prepare {
-                        name, data_types, ..
-                    }) => {
-                        write!(f, "Prepare: {name:?} [{}]", data_types.iter().join(", "))
+                    Statement::Prepare(Prepare { name, fields, .. }) => {
+                        write!(
+                            f,
+                            "Prepare: {name:?} [{}]",
+                            fields
+                                .iter()
+                                .map(|f| format_type_and_metadata(
+                                    f.data_type(),
+                                    Some(f.metadata())
+                                ))
+                                .join(", ")
+                        )
                     }
                     Statement::Execute(Execute {
                         name, parameters, ..
@@ -192,7 +201,7 @@ pub struct Prepare {
     /// The name of the statement
     pub name: String,
     /// Data types of the parameters ([`Expr::Placeholder`])
-    pub data_types: Vec<DataType>,
+    pub fields: Vec<FieldRef>,
     /// The logical plan of the statements
     pub input: Arc<LogicalPlan>,
 }
diff --git a/datafusion/expr/src/test/function_stub.rs b/datafusion/expr/src/test/function_stub.rs
index 41bc64505807..8609afeae601 100644
--- a/datafusion/expr/src/test/function_stub.rs
+++ b/datafusion/expr/src/test/function_stub.rs
@@ -22,13 +22,15 @@
 use std::any::Any;
 
 use arrow::datatypes::{
-    DataType, FieldRef, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION,
-    DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION,
+    DataType, FieldRef, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
+    DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DECIMAL32_MAX_PRECISION,
+    DECIMAL32_MAX_SCALE, DECIMAL64_MAX_PRECISION, DECIMAL64_MAX_SCALE,
 };
 
+use datafusion_common::plan_err;
 use datafusion_common::{exec_err, not_impl_err, utils::take_function_args, Result};
 
-use crate::type_coercion::aggregates::{avg_return_type, coerce_avg_type, NUMERICS};
+use crate::type_coercion::aggregates::NUMERICS;
 use crate::Volatility::Immutable;
 use crate::{
     expr::AggregateFunction,
@@ -488,8 +490,61 @@ impl AggregateUDFImpl for Avg {
         &self.signature
     }
 
+    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        let [args] = take_function_args(self.name(), arg_types)?;
+
+        // Supported types smallint, int, bigint, real, double precision, decimal, or interval
+        // Refer to https://www.postgresql.org/docs/8.2/functions-aggregate.html doc
+        fn coerced_type(data_type: &DataType) -> Result<DataType> {
+            match &data_type {
+                DataType::Decimal32(p, s) => Ok(DataType::Decimal32(*p, *s)),
+                DataType::Decimal64(p, s) => Ok(DataType::Decimal64(*p, *s)),
+                DataType::Decimal128(p, s) => Ok(DataType::Decimal128(*p, *s)),
+                DataType::Decimal256(p, s) => Ok(DataType::Decimal256(*p, *s)),
+                d if d.is_numeric() => Ok(DataType::Float64),
+                DataType::Duration(time_unit) => Ok(DataType::Duration(*time_unit)),
+                DataType::Dictionary(_, v) => coerced_type(v.as_ref()),
+                _ => {
+                    plan_err!("Avg does not support inputs of type {data_type}.")
+                }
+            }
+        }
+        Ok(vec![coerced_type(args)?])
+    }
+
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        avg_return_type(self.name(), &arg_types[0])
+        match &arg_types[0] {
+            DataType::Decimal32(precision, scale) => {
+                // In the spark, the result type is DECIMAL(min(38,precision+4), min(38,scale+4)).
+                // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
+                let new_precision = DECIMAL32_MAX_PRECISION.min(*precision + 4);
+                let new_scale = DECIMAL32_MAX_SCALE.min(*scale + 4);
+                Ok(DataType::Decimal32(new_precision, new_scale))
+            }
+            DataType::Decimal64(precision, scale) => {
+                // In the spark, the result type is DECIMAL(min(38,precision+4), min(38,scale+4)).
+                // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
+                let new_precision = DECIMAL64_MAX_PRECISION.min(*precision + 4);
+                let new_scale = DECIMAL64_MAX_SCALE.min(*scale + 4);
+                Ok(DataType::Decimal64(new_precision, new_scale))
+            }
+            DataType::Decimal128(precision, scale) => {
+                // In the spark, the result type is DECIMAL(min(38,precision+4), min(38,scale+4)).
+                // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
+                let new_precision = DECIMAL128_MAX_PRECISION.min(*precision + 4);
+                let new_scale = DECIMAL128_MAX_SCALE.min(*scale + 4);
+                Ok(DataType::Decimal128(new_precision, new_scale))
+            }
+            DataType::Decimal256(precision, scale) => {
+                // In the spark, the result type is DECIMAL(min(38,precision+4), min(38,scale+4)).
+                // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
+                let new_precision = DECIMAL256_MAX_PRECISION.min(*precision + 4);
+                let new_scale = DECIMAL256_MAX_SCALE.min(*scale + 4);
+                Ok(DataType::Decimal256(new_precision, new_scale))
+            }
+            DataType::Duration(time_unit) => Ok(DataType::Duration(*time_unit)),
+            _ => Ok(DataType::Float64),
+        }
     }
 
     fn accumulator(&self, _acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
@@ -503,8 +558,4 @@ impl AggregateUDFImpl for Avg {
     fn aliases(&self) -> &[String] {
         &self.aliases
     }
-
-    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        coerce_avg_type(self.name(), arg_types)
-    }
 }
diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs
index bfd699d81485..b593f8411d24 100644
--- a/datafusion/expr/src/udaf.rs
+++ b/datafusion/expr/src/udaf.rs
@@ -746,21 +746,52 @@ pub trait AggregateUDFImpl: Debug + DynEq + DynHash + Send + Sync {
         true
     }
 
-    /// If this function is ordered-set aggregate function, return true
-    /// otherwise, return false
+    /// If this function is an ordered-set aggregate function, return `true`.
+    /// Otherwise, return `false` (default).
     ///
-    /// Ordered-set aggregate functions require an explicit `ORDER BY` clause
-    /// because the calculation performed by these functions is dependent on the
-    /// specific sequence of the input rows, unlike other aggregate functions
-    /// like `SUM`, `AVG`, or `COUNT`.
+    /// Ordered-set aggregate functions allow specifying a sort order that affects
+    /// how the function calculates its result, unlike other aggregate functions
+    /// like `SUM` or `COUNT`. For example, `percentile_cont` is an ordered-set
+    /// aggregate function that calculates the exact percentile value from a list
+    /// of values; the output of calculating the `0.75` percentile depends on if
+    /// you're calculating on an ascending or descending list of values.
     ///
-    /// An example of an ordered-set aggregate function is `percentile_cont`
-    /// which computes a specific percentile value from a sorted list of values, and
-    /// is only meaningful when the input data is ordered.
+    /// Setting this to return `true` affects only SQL parsing & planning; it allows
+    /// use of the `WITHIN GROUP` clause to specify this order, for example:
     ///
-    /// In SQL syntax, ordered-set aggregate functions are used with the
-    /// `WITHIN GROUP (ORDER BY ...)` clause to specify the ordering of the input
-    /// data.
+    /// ```sql
+    /// -- Ascending
+    /// SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY c1 ASC) FROM table;
+    /// -- Default ordering is ascending if not explicitly specified
+    /// SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY c1) FROM table;
+    /// -- Descending
+    /// SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY c1 DESC) FROM table;
+    /// ```
+    ///
+    /// This calculates the `0.75` percentile of the column `c1` from `table`,
+    /// according to the specific ordering. The column specified in the `WITHIN GROUP`
+    /// ordering clause is taken as the column to calculate values on; specifying
+    /// the `WITHIN GROUP` clause is optional so these queries are equivalent:
+    ///
+    /// ```sql
+    /// -- If no WITHIN GROUP is specified then default ordering is implementation
+    /// -- dependent; in this case ascending for percentile_cont
+    /// SELECT percentile_cont(c1, 0.75) FROM table;
+    /// SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY c1 ASC) FROM table;
+    /// ```
+    ///
+    /// Aggregate UDFs can define their default ordering if the function is called
+    /// without the `WITHIN GROUP` clause, though a default of ascending is the
+    /// standard practice.
+    ///
+    /// Note that setting this to `true` does not guarantee input sort order to
+    /// the aggregate function; it expects the function to handle ordering the
+    /// input values themselves (e.g. `percentile_cont` must buffer and sort
+    /// the values internally). That is, DataFusion does not introduce any kind
+    /// of sort into the plan for these functions.
+    ///
+    /// Setting this to `false` disallows calling this function with the `WITHIN GROUP`
+    /// clause.
     fn is_ordered_set_aggregate(&self) -> bool {
         false
     }
diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs
index d522158f7b6b..c4cd8c006d1f 100644
--- a/datafusion/expr/src/udf.rs
+++ b/datafusion/expr/src/udf.rs
@@ -252,7 +252,21 @@ impl ScalarUDF {
         Ok(result)
     }
 
-    /// Get the circuits of inner implementation
+    /// Determines which of the arguments passed to this function are evaluated eagerly
+    /// and which may be evaluated lazily.
+    ///
+    /// See [ScalarUDFImpl::conditional_arguments] for more information.
+    pub fn conditional_arguments<'a>(
+        &self,
+        args: &'a [Expr],
+    ) -> Option<(Vec<&'a Expr>, Vec<&'a Expr>)> {
+        self.inner.conditional_arguments(args)
+    }
+
+    /// Returns true if some of this `exprs` subexpressions may not be evaluated
+    /// and thus any side effects (like divide by zero) may not be encountered.
+    ///
+    /// See [ScalarUDFImpl::short_circuits] for more information.
     pub fn short_circuits(&self) -> bool {
         self.inner.short_circuits()
     }
@@ -532,6 +546,33 @@ pub trait ScalarUDFImpl: Debug + DynEq + DynHash + Send + Sync {
     /// [`DataFusionError::Internal`]: datafusion_common::DataFusionError::Internal
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType>;
 
+    /// Create a new instance of this function with updated configuration.
+    ///
+    /// This method is called when configuration options change at runtime
+    /// (e.g., via `SET` statements) to allow functions that depend on
+    /// configuration to update themselves accordingly.
+    ///
+    /// Note the current [`ConfigOptions`] are also passed to [`Self::invoke_with_args`] so
+    /// this API is not needed for functions where the values may
+    /// depend on the current options.
+    ///
+    /// This API is useful for functions where the return
+    /// **type** depends on the configuration options, such as the `now()` function
+    /// which depends on the current timezone.
+    ///
+    /// # Arguments
+    ///
+    /// * `config` - The updated configuration options
+    ///
+    /// # Returns
+    ///
+    /// * `Some(ScalarUDF)` - A new instance of this function configured with the new settings
+    /// * `None` - If this function does not change with new configuration settings (the default)
+    ///
+    fn with_updated_config(&self, _config: &ConfigOptions) -> Option<ScalarUDF> {
+        None
+    }
+
     /// What type will be returned by this function, given the arguments?
     ///
     /// By default, this function calls [`Self::return_type`] with the
@@ -656,10 +697,42 @@ pub trait ScalarUDFImpl: Debug + DynEq + DynHash + Send + Sync {
     ///
     /// Setting this to true prevents certain optimizations such as common
     /// subexpression elimination
+    ///
+    /// When overriding this function to return `true`, [ScalarUDFImpl::conditional_arguments] can also be
+    /// overridden to report more accurately which arguments are eagerly evaluated and which ones
+    /// lazily.
     fn short_circuits(&self) -> bool {
         false
     }
 
+    /// Determines which of the arguments passed to this function are evaluated eagerly
+    /// and which may be evaluated lazily.
+    ///
+    /// If this function returns `None`, all arguments are eagerly evaluated.
+    /// Returning `None` is a micro optimization that saves a needless `Vec`
+    /// allocation.
+    ///
+    /// If the function returns `Some`, returns (`eager`, `lazy`) where `eager`
+    /// are the arguments that are always evaluated, and `lazy` are the
+    /// arguments that may be evaluated lazily (i.e. may not be evaluated at all
+    /// in some cases).
+    ///
+    /// Implementations must ensure that the two returned `Vec`s are disjunct,
+    /// and that each argument from `args` is present in one the two `Vec`s.
+    ///
+    /// When overriding this function, [ScalarUDFImpl::short_circuits] must
+    /// be overridden to return `true`.
+    fn conditional_arguments<'a>(
+        &self,
+        args: &'a [Expr],
+    ) -> Option<(Vec<&'a Expr>, Vec<&'a Expr>)> {
+        if self.short_circuits() {
+            Some((vec![], args.iter().collect()))
+        } else {
+            None
+        }
+    }
+
     /// Computes the output [`Interval`] for a [`ScalarUDFImpl`], given the input
     /// intervals.
     ///
@@ -833,6 +906,10 @@ impl ScalarUDFImpl for AliasedScalarUDFImpl {
         self.inner.invoke_with_args(args)
     }
 
+    fn with_updated_config(&self, _config: &ConfigOptions) -> Option<ScalarUDF> {
+        None
+    }
+
     fn aliases(&self) -> &[String] {
         &self.aliases
     }
@@ -845,6 +922,13 @@ impl ScalarUDFImpl for AliasedScalarUDFImpl {
         self.inner.simplify(args, info)
     }
 
+    fn conditional_arguments<'a>(
+        &self,
+        args: &'a [Expr],
+    ) -> Option<(Vec<&'a Expr>, Vec<&'a Expr>)> {
+        self.inner.conditional_arguments(args)
+    }
+
     fn short_circuits(&self) -> bool {
         self.inner.short_circuits()
     }
diff --git a/datafusion/functions-aggregate-common/src/utils.rs b/datafusion/functions-aggregate-common/src/utils.rs
index b01f2c8629c9..7ce5f09373f5 100644
--- a/datafusion/functions-aggregate-common/src/utils.rs
+++ b/datafusion/functions-aggregate-common/src/utils.rs
@@ -95,6 +95,8 @@ pub struct DecimalAverager<T: DecimalType> {
     target_mul: T::Native,
     /// the output precision
     target_precision: u8,
+    /// the output scale
+    target_scale: i8,
 }
 
 impl<T: DecimalType> DecimalAverager<T> {
@@ -129,6 +131,7 @@ impl<T: DecimalType> DecimalAverager<T> {
                 sum_mul,
                 target_mul,
                 target_precision,
+                target_scale,
             })
         } else {
             // can't convert the lit decimal to the returned data type
@@ -147,8 +150,11 @@ impl<T: DecimalType> DecimalAverager<T> {
         if let Ok(value) = sum.mul_checked(self.target_mul.div_wrapping(self.sum_mul)) {
             let new_value = value.div_wrapping(count);
 
-            let validate =
-                T::validate_decimal_precision(new_value, self.target_precision);
+            let validate = T::validate_decimal_precision(
+                new_value,
+                self.target_precision,
+                self.target_scale,
+            );
 
             if validate.is_ok() {
                 Ok(new_value)
diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont.rs b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
index 0deb09184b3f..668280314e8d 100644
--- a/datafusion/functions-aggregate/src/approx_percentile_cont.rs
+++ b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
@@ -20,7 +20,7 @@ use std::fmt::{Debug, Formatter};
 use std::mem::size_of_val;
 use std::sync::Arc;
 
-use arrow::array::{Array, RecordBatch};
+use arrow::array::Array;
 use arrow::compute::{filter, is_not_null};
 use arrow::datatypes::FieldRef;
 use arrow::{
@@ -28,19 +28,19 @@ use arrow::{
         ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array,
         Int8Array, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
     },
-    datatypes::{DataType, Field, Schema},
+    datatypes::{DataType, Field},
 };
 use datafusion_common::{
-    downcast_value, internal_err, not_impl_datafusion_err, not_impl_err, plan_err,
-    Result, ScalarValue,
+    downcast_value, internal_err, not_impl_err, plan_err, DataFusionError, Result,
+    ScalarValue,
 };
 use datafusion_expr::expr::{AggregateFunction, Sort};
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::type_coercion::aggregates::{INTEGERS, NUMERICS};
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::{
-    Accumulator, AggregateUDFImpl, ColumnarValue, Documentation, Expr, Signature,
-    TypeSignature, Volatility,
+    Accumulator, AggregateUDFImpl, Documentation, Expr, Signature, TypeSignature,
+    Volatility,
 };
 use datafusion_functions_aggregate_common::tdigest::{
     TDigest, TryIntoF64, DEFAULT_MAX_SIZE,
@@ -48,6 +48,8 @@ use datafusion_functions_aggregate_common::tdigest::{
 use datafusion_macros::user_doc;
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 
+use crate::utils::{get_scalar_value, validate_percentile_expr};
+
 create_func!(ApproxPercentileCont, approx_percentile_cont_udaf);
 
 /// Computes the approximate percentile continuous of a set of numbers
@@ -164,7 +166,8 @@ impl ApproxPercentileCont {
         &self,
         args: AccumulatorArgs,
     ) -> Result<ApproxPercentileAccumulator> {
-        let percentile = validate_input_percentile_expr(&args.exprs[1])?;
+        let percentile =
+            validate_percentile_expr(&args.exprs[1], "APPROX_PERCENTILE_CONT")?;
 
         let is_descending = args
             .order_bys
@@ -214,45 +217,15 @@ impl ApproxPercentileCont {
     }
 }
 
-fn get_scalar_value(expr: &Arc<dyn PhysicalExpr>) -> Result<ScalarValue> {
-    let empty_schema = Arc::new(Schema::empty());
-    let batch = RecordBatch::new_empty(Arc::clone(&empty_schema));
-    if let ColumnarValue::Scalar(s) = expr.evaluate(&batch)? {
-        Ok(s)
-    } else {
-        internal_err!("Didn't expect ColumnarValue::Array")
-    }
-}
-
-fn validate_input_percentile_expr(expr: &Arc<dyn PhysicalExpr>) -> Result<f64> {
-    let percentile = match get_scalar_value(expr)
-        .map_err(|_| not_impl_datafusion_err!("Percentile value for 'APPROX_PERCENTILE_CONT' must be a literal, got: {expr}"))? {
-        ScalarValue::Float32(Some(value)) => {
-            value as f64
-        }
-        ScalarValue::Float64(Some(value)) => {
-            value
-        }
-        sv => {
-            return not_impl_err!(
-                "Percentile value for 'APPROX_PERCENTILE_CONT' must be Float32 or Float64 literal (got data type {})",
-                sv.data_type()
-            )
-        }
-    };
-
-    // Ensure the percentile is between 0 and 1.
-    if !(0.0..=1.0).contains(&percentile) {
-        return plan_err!(
-            "Percentile value must be between 0.0 and 1.0 inclusive, {percentile} is invalid"
-        );
-    }
-    Ok(percentile)
-}
-
 fn validate_input_max_size_expr(expr: &Arc<dyn PhysicalExpr>) -> Result<usize> {
-    let max_size = match get_scalar_value(expr)
-        .map_err(|_| not_impl_datafusion_err!("Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be a literal, got: {expr}"))? {
+    let scalar_value = get_scalar_value(expr).map_err(|_e| {
+        DataFusionError::Plan(
+            "Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be a literal"
+                .to_string(),
+        )
+    })?;
+
+    let max_size = match scalar_value {
         ScalarValue::UInt8(Some(q)) => q as usize,
         ScalarValue::UInt16(Some(q)) => q as usize,
         ScalarValue::UInt32(Some(q)) => q as usize,
@@ -262,7 +235,7 @@ fn validate_input_max_size_expr(expr: &Arc<dyn PhysicalExpr>) -> Result<usize> {
         ScalarValue::Int16(Some(q)) if q > 0 => q as usize,
         ScalarValue::Int8(Some(q)) if q > 0 => q as usize,
         sv => {
-            return not_impl_err!(
+            return plan_err!(
                 "Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be UInt > 0 literal (got data type {}).",
                 sv.data_type()
             )
diff --git a/datafusion/functions-aggregate/src/average.rs b/datafusion/functions-aggregate/src/average.rs
index d007163e7c08..11960779ed18 100644
--- a/datafusion/functions-aggregate/src/average.rs
+++ b/datafusion/functions-aggregate/src/average.rs
@@ -27,14 +27,15 @@ use arrow::datatypes::{
     i256, ArrowNativeType, DataType, Decimal128Type, Decimal256Type, Decimal32Type,
     Decimal64Type, DecimalType, DurationMicrosecondType, DurationMillisecondType,
     DurationNanosecondType, DurationSecondType, Field, FieldRef, Float64Type, TimeUnit,
-    UInt64Type, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION,
-    DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION,
+    UInt64Type, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION,
+    DECIMAL256_MAX_SCALE, DECIMAL32_MAX_PRECISION, DECIMAL32_MAX_SCALE,
+    DECIMAL64_MAX_PRECISION, DECIMAL64_MAX_SCALE,
 };
+use datafusion_common::plan_err;
 use datafusion_common::{
     exec_err, not_impl_err, utils::take_function_args, Result, ScalarValue,
 };
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
-use datafusion_expr::type_coercion::aggregates::{avg_return_type, coerce_avg_type};
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::Volatility::Immutable;
 use datafusion_expr::{
@@ -125,8 +126,61 @@ impl AggregateUDFImpl for Avg {
         &self.signature
     }
 
+    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        let [args] = take_function_args(self.name(), arg_types)?;
+
+        // Supported types smallint, int, bigint, real, double precision, decimal, or interval
+        // Refer to https://www.postgresql.org/docs/8.2/functions-aggregate.html doc
+        fn coerced_type(data_type: &DataType) -> Result<DataType> {
+            match &data_type {
+                DataType::Decimal32(p, s) => Ok(DataType::Decimal32(*p, *s)),
+                DataType::Decimal64(p, s) => Ok(DataType::Decimal64(*p, *s)),
+                DataType::Decimal128(p, s) => Ok(DataType::Decimal128(*p, *s)),
+                DataType::Decimal256(p, s) => Ok(DataType::Decimal256(*p, *s)),
+                d if d.is_numeric() => Ok(DataType::Float64),
+                DataType::Duration(time_unit) => Ok(DataType::Duration(*time_unit)),
+                DataType::Dictionary(_, v) => coerced_type(v.as_ref()),
+                _ => {
+                    plan_err!("Avg does not support inputs of type {data_type}.")
+                }
+            }
+        }
+        Ok(vec![coerced_type(args)?])
+    }
+
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        avg_return_type(self.name(), &arg_types[0])
+        match &arg_types[0] {
+            DataType::Decimal32(precision, scale) => {
+                // In the spark, the result type is DECIMAL(min(38,precision+4), min(38,scale+4)).
+                // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
+                let new_precision = DECIMAL32_MAX_PRECISION.min(*precision + 4);
+                let new_scale = DECIMAL32_MAX_SCALE.min(*scale + 4);
+                Ok(DataType::Decimal32(new_precision, new_scale))
+            }
+            DataType::Decimal64(precision, scale) => {
+                // In the spark, the result type is DECIMAL(min(38,precision+4), min(38,scale+4)).
+                // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
+                let new_precision = DECIMAL64_MAX_PRECISION.min(*precision + 4);
+                let new_scale = DECIMAL64_MAX_SCALE.min(*scale + 4);
+                Ok(DataType::Decimal64(new_precision, new_scale))
+            }
+            DataType::Decimal128(precision, scale) => {
+                // In the spark, the result type is DECIMAL(min(38,precision+4), min(38,scale+4)).
+                // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
+                let new_precision = DECIMAL128_MAX_PRECISION.min(*precision + 4);
+                let new_scale = DECIMAL128_MAX_SCALE.min(*scale + 4);
+                Ok(DataType::Decimal128(new_precision, new_scale))
+            }
+            DataType::Decimal256(precision, scale) => {
+                // In the spark, the result type is DECIMAL(min(38,precision+4), min(38,scale+4)).
+                // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
+                let new_precision = DECIMAL256_MAX_PRECISION.min(*precision + 4);
+                let new_scale = DECIMAL256_MAX_SCALE.min(*scale + 4);
+                Ok(DataType::Decimal256(new_precision, new_scale))
+            }
+            DataType::Duration(time_unit) => Ok(DataType::Duration(*time_unit)),
+            _ => Ok(DataType::Float64),
+        }
     }
 
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
@@ -452,11 +506,6 @@ impl AggregateUDFImpl for Avg {
         ReversedUDAF::Identical
     }
 
-    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        let [args] = take_function_args(self.name(), arg_types)?;
-        coerce_avg_type(self.name(), std::slice::from_ref(args))
-    }
-
     fn documentation(&self) -> Option<&Documentation> {
         self.doc()
     }
diff --git a/datafusion/functions-aggregate/src/lib.rs b/datafusion/functions-aggregate/src/lib.rs
index 4f282301ce5b..056cd45fa2c3 100644
--- a/datafusion/functions-aggregate/src/lib.rs
+++ b/datafusion/functions-aggregate/src/lib.rs
@@ -81,6 +81,7 @@ pub mod hyperloglog;
 pub mod median;
 pub mod min_max;
 pub mod nth_value;
+pub mod percentile_cont;
 pub mod regr;
 pub mod stddev;
 pub mod string_agg;
@@ -88,6 +89,7 @@ pub mod sum;
 pub mod variance;
 
 pub mod planner;
+mod utils;
 
 use crate::approx_percentile_cont::approx_percentile_cont_udaf;
 use crate::approx_percentile_cont_with_weight::approx_percentile_cont_with_weight_udaf;
@@ -123,6 +125,7 @@ pub mod expr_fn {
     pub use super::min_max::max;
     pub use super::min_max::min;
     pub use super::nth_value::nth_value;
+    pub use super::percentile_cont::percentile_cont;
     pub use super::regr::regr_avgx;
     pub use super::regr::regr_avgy;
     pub use super::regr::regr_count;
@@ -171,6 +174,7 @@ pub fn all_default_aggregate_functions() -> Vec<Arc<AggregateUDF>> {
         approx_distinct::approx_distinct_udaf(),
         approx_percentile_cont_udaf(),
         approx_percentile_cont_with_weight_udaf(),
+        percentile_cont::percentile_cont_udaf(),
         string_agg::string_agg_udaf(),
         bit_and_or_xor::bit_and_udaf(),
         bit_and_or_xor::bit_or_udaf(),
@@ -207,13 +211,7 @@ mod tests {
     #[test]
     fn test_no_duplicate_name() -> Result<()> {
         let mut names = HashSet::new();
-        let migrated_functions = ["array_agg", "count", "max", "min"];
         for func in all_default_aggregate_functions() {
-            // TODO: remove this
-            // These functions are in intermediate migration state, skip them
-            if migrated_functions.contains(&func.name().to_lowercase().as_str()) {
-                continue;
-            }
             assert!(
                 names.insert(func.name().to_string().to_lowercase()),
                 "duplicate function name: {}",
diff --git a/datafusion/functions-aggregate/src/min_max/min_max_bytes.rs b/datafusion/functions-aggregate/src/min_max/min_max_bytes.rs
index 05321c2ff52d..30b2739c08ed 100644
--- a/datafusion/functions-aggregate/src/min_max/min_max_bytes.rs
+++ b/datafusion/functions-aggregate/src/min_max/min_max_bytes.rs
@@ -20,7 +20,8 @@ use arrow::array::{
     LargeBinaryBuilder, LargeStringBuilder, StringBuilder, StringViewBuilder,
 };
 use arrow::datatypes::DataType;
-use datafusion_common::{internal_err, Result};
+use datafusion_common::hash_map::Entry;
+use datafusion_common::{internal_err, HashMap, Result};
 use datafusion_expr::{EmitTo, GroupsAccumulator};
 use datafusion_functions_aggregate_common::aggregate::groups_accumulator::nulls::apply_filter_as_nulls;
 use std::mem::size_of;
@@ -391,14 +392,6 @@ struct MinMaxBytesState {
     total_data_bytes: usize,
 }
 
-#[derive(Debug, Clone, Copy)]
-enum MinMaxLocation<'a> {
-    /// the min/max value is stored in the existing `min_max` array
-    ExistingMinMax,
-    /// the min/max value is stored in the input array at the given index
-    Input(&'a [u8]),
-}
-
 /// Implement the MinMaxBytesAccumulator with a comparison function
 /// for comparing strings
 impl MinMaxBytesState {
@@ -450,7 +443,7 @@ impl MinMaxBytesState {
         // Minimize value copies by calculating the new min/maxes for each group
         // in this batch (either the existing min/max or the new input value)
         // and updating the owned values in `self.min_maxes` at most once
-        let mut locations = vec![MinMaxLocation::ExistingMinMax; total_num_groups];
+        let mut locations = HashMap::<usize, &[u8]>::with_capacity(group_indices.len());
 
         // Figure out the new min value for each group
         for (new_val, group_index) in iter.into_iter().zip(group_indices.iter()) {
@@ -459,32 +452,29 @@ impl MinMaxBytesState {
                 continue; // skip nulls
             };
 
-            let existing_val = match locations[group_index] {
-                // previous input value was the min/max, so compare it
-                MinMaxLocation::Input(existing_val) => existing_val,
-                MinMaxLocation::ExistingMinMax => {
-                    let Some(existing_val) = self.min_max[group_index].as_ref() else {
-                        // no existing min/max, so this is the new min/max
-                        locations[group_index] = MinMaxLocation::Input(new_val);
-                        continue;
-                    };
-                    existing_val.as_ref()
+            match locations.entry(group_index) {
+                Entry::Occupied(mut occupied_entry) => {
+                    if cmp(new_val, occupied_entry.get()) {
+                        occupied_entry.insert(new_val);
+                    }
+                }
+                Entry::Vacant(vacant_entry) => {
+                    if let Some(old_val) = self.min_max[group_index].as_ref() {
+                        if cmp(new_val, old_val) {
+                            vacant_entry.insert(new_val);
+                        }
+                    } else {
+                        vacant_entry.insert(new_val);
+                    }
                 }
             };
-
-            // Compare the new value to the existing value, replacing if necessary
-            if cmp(new_val, existing_val) {
-                locations[group_index] = MinMaxLocation::Input(new_val);
-            }
         }
 
         // Update self.min_max with any new min/max values we found in the input
-        for (group_index, location) in locations.iter().enumerate() {
-            match location {
-                MinMaxLocation::ExistingMinMax => {}
-                MinMaxLocation::Input(new_val) => self.set_value(group_index, new_val),
-            }
+        for (group_index, location) in locations.iter() {
+            self.set_value(*group_index, location);
         }
+
         Ok(())
     }
 
diff --git a/datafusion/functions-aggregate/src/percentile_cont.rs b/datafusion/functions-aggregate/src/percentile_cont.rs
new file mode 100644
index 000000000000..8e9e9a3144d4
--- /dev/null
+++ b/datafusion/functions-aggregate/src/percentile_cont.rs
@@ -0,0 +1,814 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::fmt::{Debug, Formatter};
+use std::mem::{size_of, size_of_val};
+use std::sync::Arc;
+
+use arrow::array::{
+    ArrowNumericType, BooleanArray, ListArray, PrimitiveArray, PrimitiveBuilder,
+};
+use arrow::buffer::{OffsetBuffer, ScalarBuffer};
+use arrow::{
+    array::{Array, ArrayRef, AsArray},
+    datatypes::{
+        ArrowNativeType, DataType, Decimal128Type, Decimal256Type, Decimal32Type,
+        Decimal64Type, Field, FieldRef, Float16Type, Float32Type, Float64Type,
+    },
+};
+
+use arrow::array::ArrowNativeTypeOp;
+
+use datafusion_common::{
+    internal_datafusion_err, internal_err, plan_err, DataFusionError, HashSet, Result,
+    ScalarValue,
+};
+use datafusion_expr::expr::{AggregateFunction, Sort};
+use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
+use datafusion_expr::type_coercion::aggregates::NUMERICS;
+use datafusion_expr::utils::format_state_name;
+use datafusion_expr::{
+    Accumulator, AggregateUDFImpl, Documentation, Expr, Signature, TypeSignature,
+    Volatility,
+};
+use datafusion_expr::{EmitTo, GroupsAccumulator};
+use datafusion_functions_aggregate_common::aggregate::groups_accumulator::accumulate::accumulate;
+use datafusion_functions_aggregate_common::aggregate::groups_accumulator::nulls::filtered_null_mask;
+use datafusion_functions_aggregate_common::utils::Hashable;
+use datafusion_macros::user_doc;
+
+use crate::utils::validate_percentile_expr;
+
+/// Precision multiplier for linear interpolation calculations.
+///
+/// This value of 1,000,000 was chosen to balance precision with overflow safety:
+/// - Provides 6 decimal places of precision for the fractional component
+/// - Small enough to avoid overflow when multiplied with typical numeric values
+/// - Sufficient precision for most statistical applications
+///
+/// The interpolation formula: `lower + (upper - lower) * fraction`
+/// is computed as: `lower + ((upper - lower) * (fraction * PRECISION)) / PRECISION`
+/// to avoid floating-point operations on integer types while maintaining precision.
+const INTERPOLATION_PRECISION: usize = 1_000_000;
+
+create_func!(PercentileCont, percentile_cont_udaf);
+
+/// Computes the exact percentile continuous of a set of numbers
+pub fn percentile_cont(order_by: Sort, percentile: Expr) -> Expr {
+    let expr = order_by.expr.clone();
+    let args = vec![expr, percentile];
+
+    Expr::AggregateFunction(AggregateFunction::new_udf(
+        percentile_cont_udaf(),
+        args,
+        false,
+        None,
+        vec![order_by],
+        None,
+    ))
+}
+
+#[user_doc(
+    doc_section(label = "General Functions"),
+    description = "Returns the exact percentile of input values, interpolating between values if needed.",
+    syntax_example = "percentile_cont(percentile) WITHIN GROUP (ORDER BY expression)",
+    sql_example = r#"```sql
+> SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY column_name) FROM table_name;
++----------------------------------------------------------+
+| percentile_cont(0.75) WITHIN GROUP (ORDER BY column_name) |
++----------------------------------------------------------+
+| 45.5                                                     |
++----------------------------------------------------------+
+```
+
+An alternate syntax is also supported:
+```sql
+> SELECT percentile_cont(column_name, 0.75) FROM table_name;
++---------------------------------------+
+| percentile_cont(column_name, 0.75)    |
++---------------------------------------+
+| 45.5                                  |
++---------------------------------------+
+```"#,
+    standard_argument(name = "expression", prefix = "The"),
+    argument(
+        name = "percentile",
+        description = "Percentile to compute. Must be a float value between 0 and 1 (inclusive)."
+    )
+)]
+/// PERCENTILE_CONT aggregate expression. This uses an exact calculation and stores all values
+/// in memory before computing the result. If an approximation is sufficient then
+/// APPROX_PERCENTILE_CONT provides a much more efficient solution.
+///
+/// If using the distinct variation, the memory usage will be similarly high if the
+/// cardinality is high as it stores all distinct values in memory before computing the
+/// result, but if cardinality is low then memory usage will also be lower.
+#[derive(PartialEq, Eq, Hash)]
+pub struct PercentileCont {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+
+impl Debug for PercentileCont {
+    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
+        f.debug_struct("PercentileCont")
+            .field("name", &self.name())
+            .field("signature", &self.signature)
+            .finish()
+    }
+}
+
+impl Default for PercentileCont {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl PercentileCont {
+    pub fn new() -> Self {
+        let mut variants = Vec::with_capacity(NUMERICS.len());
+        // Accept any numeric value paired with a float64 percentile
+        for num in NUMERICS {
+            variants.push(TypeSignature::Exact(vec![num.clone(), DataType::Float64]));
+        }
+        Self {
+            signature: Signature::one_of(variants, Volatility::Immutable),
+            aliases: vec![String::from("quantile_cont")],
+        }
+    }
+
+    fn create_accumulator(&self, args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
+        let percentile = validate_percentile_expr(&args.exprs[1], "PERCENTILE_CONT")?;
+
+        let is_descending = args
+            .order_bys
+            .first()
+            .map(|sort_expr| sort_expr.options.descending)
+            .unwrap_or(false);
+
+        let percentile = if is_descending {
+            1.0 - percentile
+        } else {
+            percentile
+        };
+
+        macro_rules! helper {
+            ($t:ty, $dt:expr) => {
+                if args.is_distinct {
+                    Ok(Box::new(DistinctPercentileContAccumulator::<$t> {
+                        data_type: $dt.clone(),
+                        distinct_values: HashSet::new(),
+                        percentile,
+                    }))
+                } else {
+                    Ok(Box::new(PercentileContAccumulator::<$t> {
+                        data_type: $dt.clone(),
+                        all_values: vec![],
+                        percentile,
+                    }))
+                }
+            };
+        }
+
+        let input_dt = args.exprs[0].data_type(args.schema)?;
+        match input_dt {
+            // For integer types, use Float64 internally since percentile_cont returns Float64
+            DataType::Int8
+            | DataType::Int16
+            | DataType::Int32
+            | DataType::Int64
+            | DataType::UInt8
+            | DataType::UInt16
+            | DataType::UInt32
+            | DataType::UInt64 => helper!(Float64Type, DataType::Float64),
+            DataType::Float16 => helper!(Float16Type, input_dt),
+            DataType::Float32 => helper!(Float32Type, input_dt),
+            DataType::Float64 => helper!(Float64Type, input_dt),
+            DataType::Decimal32(_, _) => helper!(Decimal32Type, input_dt),
+            DataType::Decimal64(_, _) => helper!(Decimal64Type, input_dt),
+            DataType::Decimal128(_, _) => helper!(Decimal128Type, input_dt),
+            DataType::Decimal256(_, _) => helper!(Decimal256Type, input_dt),
+            _ => Err(DataFusionError::NotImplemented(format!(
+                "PercentileContAccumulator not supported for {} with {}",
+                args.name, input_dt,
+            ))),
+        }
+    }
+}
+
+impl AggregateUDFImpl for PercentileCont {
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "percentile_cont"
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        if !arg_types[0].is_numeric() {
+            return plan_err!("percentile_cont requires numeric input types");
+        }
+        // PERCENTILE_CONT performs linear interpolation and should return a float type
+        // For integer inputs, return Float64 (matching PostgreSQL/DuckDB behavior)
+        // For float inputs, preserve the float type
+        match &arg_types[0] {
+            DataType::Float16 | DataType::Float32 | DataType::Float64 => {
+                Ok(arg_types[0].clone())
+            }
+            DataType::Decimal32(_, _)
+            | DataType::Decimal64(_, _)
+            | DataType::Decimal128(_, _)
+            | DataType::Decimal256(_, _) => Ok(arg_types[0].clone()),
+            DataType::UInt8
+            | DataType::UInt16
+            | DataType::UInt32
+            | DataType::UInt64
+            | DataType::Int8
+            | DataType::Int16
+            | DataType::Int32
+            | DataType::Int64 => Ok(DataType::Float64),
+            // Shouldn't happen due to signature check, but just in case
+            dt => plan_err!(
+                "percentile_cont does not support input type {}, must be numeric",
+                dt
+            ),
+        }
+    }
+
+    fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<FieldRef>> {
+        //Intermediate state is a list of the elements we have collected so far
+        let input_type = args.input_fields[0].data_type().clone();
+        // For integer types, we store as Float64 internally
+        let storage_type = match &input_type {
+            DataType::Int8
+            | DataType::Int16
+            | DataType::Int32
+            | DataType::Int64
+            | DataType::UInt8
+            | DataType::UInt16
+            | DataType::UInt32
+            | DataType::UInt64 => DataType::Float64,
+            _ => input_type,
+        };
+
+        let field = Field::new_list_field(storage_type, true);
+        let state_name = if args.is_distinct {
+            "distinct_percentile_cont"
+        } else {
+            "percentile_cont"
+        };
+
+        Ok(vec![Field::new(
+            format_state_name(args.name, state_name),
+            DataType::List(Arc::new(field)),
+            true,
+        )
+        .into()])
+    }
+
+    fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
+        self.create_accumulator(acc_args)
+    }
+
+    fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool {
+        !args.is_distinct
+    }
+
+    fn create_groups_accumulator(
+        &self,
+        args: AccumulatorArgs,
+    ) -> Result<Box<dyn GroupsAccumulator>> {
+        let num_args = args.exprs.len();
+        if num_args != 2 {
+            return internal_err!(
+                "percentile_cont should have 2 args, but found num args:{}",
+                args.exprs.len()
+            );
+        }
+
+        let percentile = validate_percentile_expr(&args.exprs[1], "PERCENTILE_CONT")?;
+
+        let is_descending = args
+            .order_bys
+            .first()
+            .map(|sort_expr| sort_expr.options.descending)
+            .unwrap_or(false);
+
+        let percentile = if is_descending {
+            1.0 - percentile
+        } else {
+            percentile
+        };
+
+        macro_rules! helper {
+            ($t:ty, $dt:expr) => {
+                Ok(Box::new(PercentileContGroupsAccumulator::<$t>::new(
+                    $dt, percentile,
+                )))
+            };
+        }
+
+        let input_dt = args.exprs[0].data_type(args.schema)?;
+        match input_dt {
+            // For integer types, use Float64 internally since percentile_cont returns Float64
+            DataType::Int8
+            | DataType::Int16
+            | DataType::Int32
+            | DataType::Int64
+            | DataType::UInt8
+            | DataType::UInt16
+            | DataType::UInt32
+            | DataType::UInt64 => helper!(Float64Type, DataType::Float64),
+            DataType::Float16 => helper!(Float16Type, input_dt),
+            DataType::Float32 => helper!(Float32Type, input_dt),
+            DataType::Float64 => helper!(Float64Type, input_dt),
+            DataType::Decimal32(_, _) => helper!(Decimal32Type, input_dt),
+            DataType::Decimal64(_, _) => helper!(Decimal64Type, input_dt),
+            DataType::Decimal128(_, _) => helper!(Decimal128Type, input_dt),
+            DataType::Decimal256(_, _) => helper!(Decimal256Type, input_dt),
+            _ => Err(DataFusionError::NotImplemented(format!(
+                "PercentileContGroupsAccumulator not supported for {} with {}",
+                args.name, input_dt,
+            ))),
+        }
+    }
+
+    fn supports_null_handling_clause(&self) -> bool {
+        false
+    }
+
+    fn is_ordered_set_aggregate(&self) -> bool {
+        true
+    }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        self.doc()
+    }
+}
+
+/// The percentile_cont accumulator accumulates the raw input values
+/// as native types.
+///
+/// The intermediate state is represented as a List of scalar values updated by
+/// `merge_batch` and a `Vec` of native values that are converted to scalar values
+/// in the final evaluation step so that we avoid expensive conversions and
+/// allocations during `update_batch`.
+struct PercentileContAccumulator<T: ArrowNumericType> {
+    data_type: DataType,
+    all_values: Vec<T::Native>,
+    percentile: f64,
+}
+
+impl<T: ArrowNumericType> Debug for PercentileContAccumulator<T> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "PercentileContAccumulator({}, percentile={})",
+            self.data_type, self.percentile
+        )
+    }
+}
+
+impl<T: ArrowNumericType> Accumulator for PercentileContAccumulator<T> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
+        // Convert `all_values` to `ListArray` and return a single List ScalarValue
+
+        // Build offsets
+        let offsets =
+            OffsetBuffer::new(ScalarBuffer::from(vec![0, self.all_values.len() as i32]));
+
+        // Build inner array
+        let values_array = PrimitiveArray::<T>::new(
+            ScalarBuffer::from(std::mem::take(&mut self.all_values)),
+            None,
+        )
+        .with_data_type(self.data_type.clone());
+
+        // Build the result list array
+        let list_array = ListArray::new(
+            Arc::new(Field::new_list_field(self.data_type.clone(), true)),
+            offsets,
+            Arc::new(values_array),
+            None,
+        );
+
+        Ok(vec![ScalarValue::List(Arc::new(list_array))])
+    }
+
+    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
+        // Cast to target type if needed (e.g., integer to Float64)
+        let values = if values[0].data_type() != &self.data_type {
+            arrow::compute::cast(&values[0], &self.data_type)?
+        } else {
+            Arc::clone(&values[0])
+        };
+
+        let values = values.as_primitive::<T>();
+        self.all_values.reserve(values.len() - values.null_count());
+        self.all_values.extend(values.iter().flatten());
+        Ok(())
+    }
+
+    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
+        let array = states[0].as_list::<i32>();
+        for v in array.iter().flatten() {
+            self.update_batch(&[v])?
+        }
+        Ok(())
+    }
+
+    fn evaluate(&mut self) -> Result<ScalarValue> {
+        let d = std::mem::take(&mut self.all_values);
+        let value = calculate_percentile::<T>(d, self.percentile);
+        ScalarValue::new_primitive::<T>(value, &self.data_type)
+    }
+
+    fn size(&self) -> usize {
+        size_of_val(self) + self.all_values.capacity() * size_of::<T::Native>()
+    }
+}
+
+/// The percentile_cont groups accumulator accumulates the raw input values
+///
+/// For calculating the exact percentile of groups, we need to store all values
+/// of groups before final evaluation.
+/// So values in each group will be stored in a `Vec<T>`, and the total group values
+/// will be actually organized as a `Vec<Vec<T>>`.
+///
+#[derive(Debug)]
+struct PercentileContGroupsAccumulator<T: ArrowNumericType + Send> {
+    data_type: DataType,
+    group_values: Vec<Vec<T::Native>>,
+    percentile: f64,
+}
+
+impl<T: ArrowNumericType + Send> PercentileContGroupsAccumulator<T> {
+    pub fn new(data_type: DataType, percentile: f64) -> Self {
+        Self {
+            data_type,
+            group_values: Vec::new(),
+            percentile,
+        }
+    }
+}
+
+impl<T: ArrowNumericType + Send> GroupsAccumulator
+    for PercentileContGroupsAccumulator<T>
+{
+    fn update_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        // For ordered-set aggregates, we only care about the ORDER BY column (first element)
+        // The percentile parameter is already stored in self.percentile
+
+        // Cast to target type if needed (e.g., integer to Float64)
+        let values_array = if values[0].data_type() != &self.data_type {
+            arrow::compute::cast(&values[0], &self.data_type)?
+        } else {
+            Arc::clone(&values[0])
+        };
+
+        let values = values_array.as_primitive::<T>();
+
+        // Push the `not nulls + not filtered` row into its group
+        self.group_values.resize(total_num_groups, Vec::new());
+        accumulate(
+            group_indices,
+            values,
+            opt_filter,
+            |group_index, new_value| {
+                self.group_values[group_index].push(new_value);
+            },
+        );
+
+        Ok(())
+    }
+
+    fn merge_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        // Since aggregate filter should be applied in partial stage, in final stage there should be no filter
+        _opt_filter: Option<&BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        assert_eq!(values.len(), 1, "one argument to merge_batch");
+
+        let input_group_values = values[0].as_list::<i32>();
+
+        // Ensure group values big enough
+        self.group_values.resize(total_num_groups, Vec::new());
+
+        // Extend values to related groups
+        group_indices
+            .iter()
+            .zip(input_group_values.iter())
+            .for_each(|(&group_index, values_opt)| {
+                if let Some(values) = values_opt {
+                    let values = values.as_primitive::<T>();
+                    self.group_values[group_index].extend(values.values().iter());
+                }
+            });
+
+        Ok(())
+    }
+
+    fn state(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>> {
+        // Emit values
+        let emit_group_values = emit_to.take_needed(&mut self.group_values);
+
+        // Build offsets
+        let mut offsets = Vec::with_capacity(self.group_values.len() + 1);
+        offsets.push(0);
+        let mut cur_len = 0_i32;
+        for group_value in &emit_group_values {
+            cur_len += group_value.len() as i32;
+            offsets.push(cur_len);
+        }
+        let offsets = OffsetBuffer::new(ScalarBuffer::from(offsets));
+
+        // Build inner array
+        let flatten_group_values =
+            emit_group_values.into_iter().flatten().collect::<Vec<_>>();
+        let group_values_array =
+            PrimitiveArray::<T>::new(ScalarBuffer::from(flatten_group_values), None)
+                .with_data_type(self.data_type.clone());
+
+        // Build the result list array
+        let result_list_array = ListArray::new(
+            Arc::new(Field::new_list_field(self.data_type.clone(), true)),
+            offsets,
+            Arc::new(group_values_array),
+            None,
+        );
+
+        Ok(vec![Arc::new(result_list_array)])
+    }
+
+    fn evaluate(&mut self, emit_to: EmitTo) -> Result<ArrayRef> {
+        // Emit values
+        let emit_group_values = emit_to.take_needed(&mut self.group_values);
+
+        // Calculate percentile for each group
+        let mut evaluate_result_builder =
+            PrimitiveBuilder::<T>::new().with_data_type(self.data_type.clone());
+        for values in emit_group_values {
+            let value = calculate_percentile::<T>(values, self.percentile);
+            evaluate_result_builder.append_option(value);
+        }
+
+        Ok(Arc::new(evaluate_result_builder.finish()))
+    }
+
+    fn convert_to_state(
+        &self,
+        values: &[ArrayRef],
+        opt_filter: Option<&BooleanArray>,
+    ) -> Result<Vec<ArrayRef>> {
+        assert_eq!(values.len(), 1, "one argument to merge_batch");
+
+        // Cast to target type if needed (e.g., integer to Float64)
+        let values_array = if values[0].data_type() != &self.data_type {
+            arrow::compute::cast(&values[0], &self.data_type)?
+        } else {
+            Arc::clone(&values[0])
+        };
+
+        let input_array = values_array.as_primitive::<T>();
+
+        // Directly convert the input array to states, each row will be
+        // seen as a respective group.
+        // For detail, the `input_array` will be converted to a `ListArray`.
+        // And if row is `not null + not filtered`, it will be converted to a list
+        // with only one element; otherwise, this row in `ListArray` will be set
+        // to null.
+
+        // Reuse values buffer in `input_array` to build `values` in `ListArray`
+        let values = PrimitiveArray::<T>::new(input_array.values().clone(), None)
+            .with_data_type(self.data_type.clone());
+
+        // `offsets` in `ListArray`, each row as a list element
+        let offset_end = i32::try_from(input_array.len()).map_err(|e| {
+            internal_datafusion_err!(
+                "cast array_len to i32 failed in convert_to_state of group percentile_cont, err:{e:?}"
+            )
+        })?;
+        let offsets = (0..=offset_end).collect::<Vec<_>>();
+        // Safety: The offsets vector is constructed as a sequential range from 0 to input_array.len(),
+        // which guarantees all OffsetBuffer invariants:
+        // 1. Offsets are monotonically increasing (each element is prev + 1)
+        // 2. No offset exceeds the values array length (max offset = input_array.len())
+        // 3. First offset is 0 and last offset equals the total length
+        // Therefore new_unchecked is safe to use here.
+        let offsets = unsafe { OffsetBuffer::new_unchecked(ScalarBuffer::from(offsets)) };
+
+        // `nulls` for converted `ListArray`
+        let nulls = filtered_null_mask(opt_filter, input_array);
+
+        let converted_list_array = ListArray::new(
+            Arc::new(Field::new_list_field(self.data_type.clone(), true)),
+            offsets,
+            Arc::new(values),
+            nulls,
+        );
+
+        Ok(vec![Arc::new(converted_list_array)])
+    }
+
+    fn supports_convert_to_state(&self) -> bool {
+        true
+    }
+
+    fn size(&self) -> usize {
+        self.group_values
+            .iter()
+            .map(|values| values.capacity() * size_of::<T::Native>())
+            .sum::<usize>()
+            // account for size of self.group_values too
+            + self.group_values.capacity() * size_of::<Vec<T::Native>>()
+    }
+}
+
+/// The distinct percentile_cont accumulator accumulates the raw input values
+/// using a HashSet to eliminate duplicates.
+///
+/// The intermediate state is represented as a List of scalar values updated by
+/// `merge_batch` and a `Vec` of `ArrayRef` that are converted to scalar values
+/// in the final evaluation step so that we avoid expensive conversions and
+/// allocations during `update_batch`.
+struct DistinctPercentileContAccumulator<T: ArrowNumericType> {
+    data_type: DataType,
+    distinct_values: HashSet<Hashable<T::Native>>,
+    percentile: f64,
+}
+
+impl<T: ArrowNumericType> Debug for DistinctPercentileContAccumulator<T> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "DistinctPercentileContAccumulator({}, percentile={})",
+            self.data_type, self.percentile
+        )
+    }
+}
+
+impl<T: ArrowNumericType> Accumulator for DistinctPercentileContAccumulator<T> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
+        let all_values = self
+            .distinct_values
+            .iter()
+            .map(|x| ScalarValue::new_primitive::<T>(Some(x.0), &self.data_type))
+            .collect::<Result<Vec<_>>>()?;
+
+        let arr = ScalarValue::new_list_nullable(&all_values, &self.data_type);
+        Ok(vec![ScalarValue::List(arr)])
+    }
+
+    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
+        if values.is_empty() {
+            return Ok(());
+        }
+
+        // Cast to target type if needed (e.g., integer to Float64)
+        let values = if values[0].data_type() != &self.data_type {
+            arrow::compute::cast(&values[0], &self.data_type)?
+        } else {
+            Arc::clone(&values[0])
+        };
+
+        let array = values.as_primitive::<T>();
+        match array.nulls().filter(|x| x.null_count() > 0) {
+            Some(n) => {
+                for idx in n.valid_indices() {
+                    self.distinct_values.insert(Hashable(array.value(idx)));
+                }
+            }
+            None => array.values().iter().for_each(|x| {
+                self.distinct_values.insert(Hashable(*x));
+            }),
+        }
+        Ok(())
+    }
+
+    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
+        let array = states[0].as_list::<i32>();
+        for v in array.iter().flatten() {
+            self.update_batch(&[v])?
+        }
+        Ok(())
+    }
+
+    fn evaluate(&mut self) -> Result<ScalarValue> {
+        let d = std::mem::take(&mut self.distinct_values)
+            .into_iter()
+            .map(|v| v.0)
+            .collect::<Vec<_>>();
+        let value = calculate_percentile::<T>(d, self.percentile);
+        ScalarValue::new_primitive::<T>(value, &self.data_type)
+    }
+
+    fn size(&self) -> usize {
+        size_of_val(self) + self.distinct_values.capacity() * size_of::<T::Native>()
+    }
+}
+
+/// Calculate the percentile value for a given set of values.
+/// This function performs an exact calculation by sorting all values.
+///
+/// The percentile is calculated using linear interpolation between closest ranks.
+/// For percentile p and n values:
+/// - If p * (n-1) is an integer, return the value at that position
+/// - Otherwise, interpolate between the two closest values
+fn calculate_percentile<T: ArrowNumericType>(
+    mut values: Vec<T::Native>,
+    percentile: f64,
+) -> Option<T::Native> {
+    let cmp = |x: &T::Native, y: &T::Native| x.compare(*y);
+
+    let len = values.len();
+    if len == 0 {
+        None
+    } else if len == 1 {
+        Some(values[0])
+    } else if percentile == 0.0 {
+        // Get minimum value
+        Some(
+            *values
+                .iter()
+                .min_by(|a, b| cmp(a, b))
+                .expect("we checked for len > 0 a few lines above"),
+        )
+    } else if percentile == 1.0 {
+        // Get maximum value
+        Some(
+            *values
+                .iter()
+                .max_by(|a, b| cmp(a, b))
+                .expect("we checked for len > 0 a few lines above"),
+        )
+    } else {
+        // Calculate the index using the formula: p * (n - 1)
+        let index = percentile * ((len - 1) as f64);
+        let lower_index = index.floor() as usize;
+        let upper_index = index.ceil() as usize;
+
+        if lower_index == upper_index {
+            // Exact index, return the value at that position
+            let (_, value, _) = values.select_nth_unstable_by(lower_index, cmp);
+            Some(*value)
+        } else {
+            // Need to interpolate between two values
+            // First, partition at lower_index to get the lower value
+            let (_, lower_value, _) = values.select_nth_unstable_by(lower_index, cmp);
+            let lower_value = *lower_value;
+
+            // Then partition at upper_index to get the upper value
+            let (_, upper_value, _) = values.select_nth_unstable_by(upper_index, cmp);
+            let upper_value = *upper_value;
+
+            // Linear interpolation using wrapping arithmetic
+            // We use wrapping operations here (matching the approach in median.rs) because:
+            // 1. Both values come from the input data, so diff is bounded by the value range
+            // 2. fraction is between 0 and 1, and INTERPOLATION_PRECISION is small enough
+            //    to prevent overflow when combined with typical numeric ranges
+            // 3. The result is guaranteed to be between lower_value and upper_value
+            // 4. For floating-point types, wrapping ops behave the same as standard ops
+            let fraction = index - (lower_index as f64);
+            let diff = upper_value.sub_wrapping(lower_value);
+            let interpolated = lower_value.add_wrapping(
+                diff.mul_wrapping(T::Native::usize_as(
+                    (fraction * INTERPOLATION_PRECISION as f64) as usize,
+                ))
+                .div_wrapping(T::Native::usize_as(INTERPOLATION_PRECISION)),
+            );
+            Some(interpolated)
+        }
+    }
+}
diff --git a/datafusion/functions-aggregate/src/utils.rs b/datafusion/functions-aggregate/src/utils.rs
new file mode 100644
index 000000000000..c058b64f9572
--- /dev/null
+++ b/datafusion/functions-aggregate/src/utils.rs
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow::array::RecordBatch;
+use arrow::datatypes::Schema;
+use datafusion_common::{internal_err, plan_err, DataFusionError, Result, ScalarValue};
+use datafusion_expr::ColumnarValue;
+use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+
+/// Evaluates a physical expression to extract its scalar value.
+///
+/// This is used to extract constant values from expressions (like percentile parameters)
+/// by evaluating them against an empty record batch.
+pub(crate) fn get_scalar_value(expr: &Arc<dyn PhysicalExpr>) -> Result<ScalarValue> {
+    let empty_schema = Arc::new(Schema::empty());
+    let batch = RecordBatch::new_empty(Arc::clone(&empty_schema));
+    if let ColumnarValue::Scalar(s) = expr.evaluate(&batch)? {
+        Ok(s)
+    } else {
+        internal_err!("Didn't expect ColumnarValue::Array")
+    }
+}
+
+/// Validates that a percentile expression is a literal float value between 0.0 and 1.0.
+///
+/// Used by both `percentile_cont` and `approx_percentile_cont` to validate their
+/// percentile parameters.
+pub(crate) fn validate_percentile_expr(
+    expr: &Arc<dyn PhysicalExpr>,
+    fn_name: &str,
+) -> Result<f64> {
+    let scalar_value = get_scalar_value(expr).map_err(|_e| {
+        DataFusionError::Plan(format!(
+            "Percentile value for '{fn_name}' must be a literal"
+        ))
+    })?;
+
+    let percentile = match scalar_value {
+        ScalarValue::Float32(Some(value)) => value as f64,
+        ScalarValue::Float64(Some(value)) => value,
+        sv => {
+            return plan_err!(
+                "Percentile value for '{fn_name}' must be Float32 or Float64 literal (got data type {})",
+                sv.data_type()
+            )
+        }
+    };
+
+    // Ensure the percentile is between 0 and 1.
+    if !(0.0..=1.0).contains(&percentile) {
+        return plan_err!(
+            "Percentile value must be between 0.0 and 1.0 inclusive, {percentile} is invalid"
+        );
+    }
+    Ok(percentile)
+}
diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs
index f34fea0c4ba0..080b2f16d92f 100644
--- a/datafusion/functions-nested/src/array_has.rs
+++ b/datafusion/functions-nested/src/array_has.rs
@@ -132,23 +132,26 @@ impl ScalarUDFImpl for ArrayHas {
         // if the haystack is a constant list, we can use an inlist expression which is more
         // efficient because the haystack is not varying per-row
         match haystack {
+            Expr::Literal(scalar, _) if scalar.is_null() => {
+                return Ok(ExprSimplifyResult::Simplified(Expr::Literal(
+                    ScalarValue::Boolean(None),
+                    None,
+                )))
+            }
             Expr::Literal(
                 // FixedSizeList gets coerced to List
                 scalar @ ScalarValue::List(_) | scalar @ ScalarValue::LargeList(_),
                 _,
             ) => {
-                let array = scalar.to_array().unwrap(); // guarantee of ScalarValue
                 if let Ok(scalar_values) =
-                    ScalarValue::convert_array_to_scalar_vec(&array)
+                    ScalarValue::convert_array_to_scalar_vec(&scalar.to_array()?)
                 {
                     assert_eq!(scalar_values.len(), 1);
                     let list = scalar_values
                         .into_iter()
-                        // If the vec is a singular null, `list` will be empty due to this flatten().
-                        // It would be more clear if we handled the None separately, but this is more performant.
                         .flatten()
                         .flatten()
-                        .map(|v| Expr::Literal(v.clone(), None))
+                        .map(|v| Expr::Literal(v, None))
                         .collect();
 
                     return Ok(ExprSimplifyResult::Simplified(in_list(
@@ -178,6 +181,12 @@ impl ScalarUDFImpl for ArrayHas {
         args: datafusion_expr::ScalarFunctionArgs,
     ) -> Result<ColumnarValue> {
         let [first_arg, second_arg] = take_function_args(self.name(), &args.args)?;
+        if first_arg.data_type().is_null() {
+            // Always return null if the first argument is null
+            // i.e. array_has(null, element) -> null
+            return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None)));
+        }
+
         match &second_arg {
             ColumnarValue::Array(array_needle) => {
                 // the needle is already an array, convert the haystack to an array of the same length
@@ -663,6 +672,7 @@ fn general_array_has_all_and_any_kernel(
 mod tests {
     use std::sync::Arc;
 
+    use arrow::datatypes::Int32Type;
     use arrow::{
         array::{create_array, Array, ArrayRef, AsArray, Int32Array, ListArray},
         buffer::OffsetBuffer,
@@ -733,6 +743,40 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_simplify_array_has_with_null_to_null() {
+        let haystack = Expr::Literal(ScalarValue::Null, None);
+        let needle = col("c");
+
+        let props = ExecutionProps::new();
+        let context = datafusion_expr::simplify::SimplifyContext::new(&props);
+        let Ok(ExprSimplifyResult::Simplified(simplified)) =
+            ArrayHas::new().simplify(vec![haystack, needle], &context)
+        else {
+            panic!("Expected simplified expression");
+        };
+
+        assert_eq!(simplified, Expr::Literal(ScalarValue::Boolean(None), None));
+    }
+
+    #[test]
+    fn test_simplify_array_has_with_null_list_to_null() {
+        let haystack =
+            ListArray::from_iter_primitive::<Int32Type, [Option<i32>; 0], _>([None]);
+        let haystack = Expr::Literal(ScalarValue::List(Arc::new(haystack)), None);
+        let needle = col("c");
+
+        let props = ExecutionProps::new();
+        let context = datafusion_expr::simplify::SimplifyContext::new(&props);
+        let Ok(ExprSimplifyResult::Simplified(simplified)) =
+            ArrayHas::new().simplify(vec![haystack, needle], &context)
+        else {
+            panic!("Expected simplified expression");
+        };
+
+        assert_eq!(simplified, Expr::Literal(ScalarValue::Boolean(None), None));
+    }
+
     #[test]
     fn test_array_has_complex_list_not_simplified() {
         let haystack = col("c1");
@@ -757,13 +801,9 @@ mod tests {
             Field::new_list("", Field::new("", DataType::Int32, true), true),
             true,
         ));
-        let needle_field = Arc::new(Field::new("needle", DataType::Int32, true));
-        let return_field = Arc::new(Field::new_list(
-            "return",
-            Field::new("", DataType::Boolean, true),
-            true,
-        ));
 
+        let needle_field = Arc::new(Field::new("needle", DataType::Int32, true));
+        let return_field = Arc::new(Field::new("return", DataType::Boolean, true));
         let haystack = ListArray::new(
             Field::new_list_field(DataType::Int32, true).into(),
             OffsetBuffer::new(vec![0, 0].into()),
@@ -773,7 +813,6 @@ mod tests {
 
         let haystack = ColumnarValue::Array(Arc::new(haystack));
         let needle = ColumnarValue::Scalar(ScalarValue::Int32(Some(1)));
-
         let result = ArrayHas::new().invoke_with_args(ScalarFunctionArgs {
             args: vec![haystack, needle],
             arg_fields: vec![haystack_field, needle_field],
@@ -789,4 +828,34 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_array_has_list_null_haystack() -> Result<(), DataFusionError> {
+        let haystack_field = Arc::new(Field::new("haystack", DataType::Null, true));
+        let needle_field = Arc::new(Field::new("needle", DataType::Int32, true));
+        let return_field = Arc::new(Field::new("return", DataType::Boolean, true));
+        let haystack =
+            ListArray::from_iter_primitive::<Int32Type, [Option<i32>; 0], _>([
+                None, None, None,
+            ]);
+
+        let haystack = ColumnarValue::Array(Arc::new(haystack));
+        let needle = ColumnarValue::Scalar(ScalarValue::Int32(Some(1)));
+        let result = ArrayHas::new().invoke_with_args(ScalarFunctionArgs {
+            args: vec![haystack, needle],
+            arg_fields: vec![haystack_field, needle_field],
+            number_rows: 1,
+            return_field,
+            config_options: Arc::new(ConfigOptions::default()),
+        })?;
+
+        let output = result.into_array(1)?;
+        let output = output.as_boolean();
+        assert_eq!(output.len(), 3);
+        for i in 0..3 {
+            assert!(output.is_null(i));
+        }
+
+        Ok(())
+    }
 }
diff --git a/datafusion/functions-nested/src/set_ops.rs b/datafusion/functions-nested/src/set_ops.rs
index 555767f8f070..53642bf1622b 100644
--- a/datafusion/functions-nested/src/set_ops.rs
+++ b/datafusion/functions-nested/src/set_ops.rs
@@ -29,9 +29,7 @@ use arrow::datatypes::{DataType, Field, FieldRef};
 use arrow::row::{RowConverter, SortField};
 use datafusion_common::cast::{as_large_list_array, as_list_array};
 use datafusion_common::utils::ListCoercion;
-use datafusion_common::{
-    exec_err, internal_err, plan_err, utils::take_function_args, Result,
-};
+use datafusion_common::{exec_err, internal_err, utils::take_function_args, Result};
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
 };
@@ -289,13 +287,7 @@ impl ScalarUDFImpl for ArrayDistinct {
     }
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        match &arg_types[0] {
-            List(field) => Ok(DataType::new_list(field.data_type().clone(), true)),
-            LargeList(field) => {
-                Ok(DataType::new_large_list(field.data_type().clone(), true))
-            }
-            arg_type => plan_err!("{} does not support type {arg_type}", self.name()),
-        }
+        Ok(arg_types[0].clone())
     }
 
     fn invoke_with_args(
@@ -563,3 +555,54 @@ fn general_array_distinct<OffsetSize: OffsetSizeTrait>(
         array.nulls().cloned(),
     )?))
 }
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use arrow::{
+        array::{Int32Array, ListArray},
+        buffer::OffsetBuffer,
+        datatypes::{DataType, Field},
+    };
+    use datafusion_common::{config::ConfigOptions, DataFusionError};
+    use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
+
+    use crate::set_ops::array_distinct_udf;
+
+    #[test]
+    fn test_array_distinct_inner_nullability_result_type_match_return_type(
+    ) -> Result<(), DataFusionError> {
+        let udf = array_distinct_udf();
+
+        for inner_nullable in [true, false] {
+            let inner_field = Field::new_list_field(DataType::Int32, inner_nullable);
+            let input_field =
+                Field::new_list("input", Arc::new(inner_field.clone()), true);
+
+            // [[1, 1, 2]]
+            let input_array = ListArray::new(
+                inner_field.into(),
+                OffsetBuffer::new(vec![0, 3].into()),
+                Arc::new(Int32Array::new(vec![1, 1, 2].into(), None)),
+                None,
+            );
+
+            let input_array = ColumnarValue::Array(Arc::new(input_array));
+
+            let result = udf.invoke_with_args(ScalarFunctionArgs {
+                args: vec![input_array],
+                arg_fields: vec![input_field.clone().into()],
+                number_rows: 1,
+                return_field: input_field.clone().into(),
+                config_options: Arc::new(ConfigOptions::default()),
+            })?;
+
+            assert_eq!(
+                result.data_type(),
+                udf.return_type(&[input_field.data_type().clone()])?
+            );
+        }
+        Ok(())
+    }
+}
diff --git a/datafusion/functions-nested/src/string.rs b/datafusion/functions-nested/src/string.rs
index 3373f7a9838e..61caa3ac7076 100644
--- a/datafusion/functions-nested/src/string.rs
+++ b/datafusion/functions-nested/src/string.rs
@@ -369,27 +369,38 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
             List(..) => {
                 let list_array = as_list_array(&arr)?;
                 for i in 0..list_array.len() {
-                    compute_array_to_string(
-                        arg,
-                        list_array.value(i),
-                        delimiter.clone(),
-                        null_string.clone(),
-                        with_null_string,
-                    )?;
+                    if !list_array.is_null(i) {
+                        compute_array_to_string(
+                            arg,
+                            list_array.value(i),
+                            delimiter.clone(),
+                            null_string.clone(),
+                            with_null_string,
+                        )?;
+                    } else if with_null_string {
+                        arg.push_str(&null_string);
+                        arg.push_str(&delimiter);
+                    }
                 }
 
                 Ok(arg)
             }
             FixedSizeList(..) => {
                 let list_array = as_fixed_size_list_array(&arr)?;
+
                 for i in 0..list_array.len() {
-                    compute_array_to_string(
-                        arg,
-                        list_array.value(i),
-                        delimiter.clone(),
-                        null_string.clone(),
-                        with_null_string,
-                    )?;
+                    if !list_array.is_null(i) {
+                        compute_array_to_string(
+                            arg,
+                            list_array.value(i),
+                            delimiter.clone(),
+                            null_string.clone(),
+                            with_null_string,
+                        )?;
+                    } else if with_null_string {
+                        arg.push_str(&null_string);
+                        arg.push_str(&delimiter);
+                    }
                 }
 
                 Ok(arg)
@@ -397,13 +408,18 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
             LargeList(..) => {
                 let list_array = as_large_list_array(&arr)?;
                 for i in 0..list_array.len() {
-                    compute_array_to_string(
-                        arg,
-                        list_array.value(i),
-                        delimiter.clone(),
-                        null_string.clone(),
-                        with_null_string,
-                    )?;
+                    if !list_array.is_null(i) {
+                        compute_array_to_string(
+                            arg,
+                            list_array.value(i),
+                            delimiter.clone(),
+                            null_string.clone(),
+                            with_null_string,
+                        )?;
+                    } else if with_null_string {
+                        arg.push_str(&null_string);
+                        arg.push_str(&delimiter);
+                    }
                 }
 
                 Ok(arg)
diff --git a/datafusion/functions-window-common/src/expr.rs b/datafusion/functions-window-common/src/expr.rs
index 774cd5182b30..d72cd412f017 100644
--- a/datafusion/functions-window-common/src/expr.rs
+++ b/datafusion/functions-window-common/src/expr.rs
@@ -37,7 +37,7 @@ impl<'a> ExpressionArgs<'a> {
     ///
     /// * `input_exprs` - The expressions passed as arguments
     ///   to the user-defined window function.
-    /// * `input_types` - The data types corresponding to the
+    /// * `input_fields` - The fields corresponding to the
     ///   arguments to the user-defined window function.
     ///
     pub fn new(
diff --git a/datafusion/functions-window-common/src/partition.rs b/datafusion/functions-window-common/src/partition.rs
index 61125e596130..df0a81540117 100644
--- a/datafusion/functions-window-common/src/partition.rs
+++ b/datafusion/functions-window-common/src/partition.rs
@@ -42,7 +42,7 @@ impl<'a> PartitionEvaluatorArgs<'a> {
     ///
     /// * `input_exprs` - The expressions passed as arguments
     ///   to the user-defined window function.
-    /// * `input_types` - The data types corresponding to the
+    /// * `input_fields` - The fields corresponding to the
     ///   arguments to the user-defined window function.
     /// * `is_reversed` - Set to `true` if and only if the user-defined
     ///   window function is reversible and is reversed.
diff --git a/datafusion/functions-window/src/nth_value.rs b/datafusion/functions-window/src/nth_value.rs
index 329d8aa5ab17..1ba6ad5ce0d4 100644
--- a/datafusion/functions-window/src/nth_value.rs
+++ b/datafusion/functions-window/src/nth_value.rs
@@ -40,39 +40,28 @@ use std::hash::Hash;
 use std::ops::Range;
 use std::sync::{Arc, LazyLock};
 
-get_or_init_udwf!(
+define_udwf_and_expr!(
     First,
     first_value,
-    "returns the first value in the window frame",
+    [arg],
+    "Returns the first value in the window frame",
     NthValue::first
 );
-get_or_init_udwf!(
+define_udwf_and_expr!(
     Last,
     last_value,
-    "returns the last value in the window frame",
+    [arg],
+    "Returns the last value in the window frame",
     NthValue::last
 );
 get_or_init_udwf!(
     NthValue,
     nth_value,
-    "returns the nth value in the window frame",
+    "Returns the nth value in the window frame",
     NthValue::nth
 );
 
-/// Create an expression to represent the `first_value` window function
-///
-pub fn first_value(arg: datafusion_expr::Expr) -> datafusion_expr::Expr {
-    first_value_udwf().call(vec![arg])
-}
-
-/// Create an expression to represent the `last_value` window function
-///
-pub fn last_value(arg: datafusion_expr::Expr) -> datafusion_expr::Expr {
-    last_value_udwf().call(vec![arg])
-}
-
 /// Create an expression to represent the `nth_value` window function
-///
 pub fn nth_value(arg: datafusion_expr::Expr, n: i64) -> datafusion_expr::Expr {
     nth_value_udwf().call(vec![arg, n.lit()])
 }
diff --git a/datafusion/functions-window/src/ntile.rs b/datafusion/functions-window/src/ntile.rs
index d188db3bbf59..008caaa848aa 100644
--- a/datafusion/functions-window/src/ntile.rs
+++ b/datafusion/functions-window/src/ntile.rs
@@ -25,8 +25,7 @@ use datafusion_common::arrow::array::{ArrayRef, UInt64Array};
 use datafusion_common::arrow::datatypes::{DataType, Field};
 use datafusion_common::{exec_datafusion_err, exec_err, Result};
 use datafusion_expr::{
-    Documentation, Expr, LimitEffect, PartitionEvaluator, Signature, Volatility,
-    WindowUDFImpl,
+    Documentation, LimitEffect, PartitionEvaluator, Signature, Volatility, WindowUDFImpl,
 };
 use datafusion_functions_window_common::field;
 use datafusion_functions_window_common::partition::PartitionEvaluatorArgs;
@@ -37,16 +36,13 @@ use std::any::Any;
 use std::fmt::Debug;
 use std::sync::Arc;
 
-get_or_init_udwf!(
+define_udwf_and_expr!(
     Ntile,
     ntile,
-    "integer ranging from 1 to the argument value, dividing the partition as equally as possible"
+    [arg],
+    "Integer ranging from 1 to the argument value, dividing the partition as equally as possible."
 );
 
-pub fn ntile(arg: Expr) -> Expr {
-    ntile_udwf().call(vec![arg])
-}
-
 #[user_doc(
     doc_section(label = "Ranking Functions"),
     description = "Integer ranging from 1 to the argument value, dividing the partition as equally as possible",
diff --git a/datafusion/functions/src/core/arrow_cast.rs b/datafusion/functions/src/core/arrow_cast.rs
index 94a41ba4bb25..c4e58601cd10 100644
--- a/datafusion/functions/src/core/arrow_cast.rs
+++ b/datafusion/functions/src/core/arrow_cast.rs
@@ -60,16 +60,26 @@ use datafusion_macros::user_doc;
     description = "Casts a value to a specific Arrow data type.",
     syntax_example = "arrow_cast(expression, datatype)",
     sql_example = r#"```sql
-> select arrow_cast(-5, 'Int8') as a,
+> select
+  arrow_cast(-5,    'Int8') as a,
   arrow_cast('foo', 'Dictionary(Int32, Utf8)') as b,
-  arrow_cast('bar', 'LargeUtf8') as c,
-  arrow_cast('2023-01-02T12:53:02', 'Timestamp(Microsecond, Some("+08:00"))') as d
-  ;
-+----+-----+-----+---------------------------+
-| a  | b   | c   | d                         |
-+----+-----+-----+---------------------------+
-| -5 | foo | bar | 2023-01-02T12:53:02+08:00 |
-+----+-----+-----+---------------------------+
+  arrow_cast('bar', 'LargeUtf8') as c;
+
++----+-----+-----+
+| a  | b   | c   |
++----+-----+-----+
+| -5 | foo | bar |
++----+-----+-----+
+
+> select
+  arrow_cast('2023-01-02T12:53:02', 'Timestamp(µs, "+08:00")') as d,
+  arrow_cast('2023-01-02T12:53:02', 'Timestamp(µs)') as e;
+
++---------------------------+---------------------+
+| d                         | e                   |
++---------------------------+---------------------+
+| 2023-01-02T12:53:02+08:00 | 2023-01-02T12:53:02 |
++---------------------------+---------------------+
 ```"#,
     argument(
         name = "expression",
diff --git a/datafusion/functions/src/core/coalesce.rs b/datafusion/functions/src/core/coalesce.rs
index 3fba539dd04b..aab1f445d559 100644
--- a/datafusion/functions/src/core/coalesce.rs
+++ b/datafusion/functions/src/core/coalesce.rs
@@ -47,7 +47,7 @@ use std::any::Any;
 )]
 #[derive(Debug, PartialEq, Eq, Hash)]
 pub struct CoalesceFunc {
-    signature: Signature,
+    pub(super) signature: Signature,
 }
 
 impl Default for CoalesceFunc {
@@ -126,6 +126,15 @@ impl ScalarUDFImpl for CoalesceFunc {
         internal_err!("coalesce should have been simplified to case")
     }
 
+    fn conditional_arguments<'a>(
+        &self,
+        args: &'a [Expr],
+    ) -> Option<(Vec<&'a Expr>, Vec<&'a Expr>)> {
+        let eager = vec![&args[0]];
+        let lazy = args[1..].iter().collect();
+        Some((eager, lazy))
+    }
+
     fn short_circuits(&self) -> bool {
         true
     }
diff --git a/datafusion/functions/src/core/nvl.rs b/datafusion/functions/src/core/nvl.rs
index c8b34c4b1780..0b9968a88fc9 100644
--- a/datafusion/functions/src/core/nvl.rs
+++ b/datafusion/functions/src/core/nvl.rs
@@ -15,21 +15,19 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::array::Array;
-use arrow::compute::is_not_null;
-use arrow::compute::kernels::zip::zip;
-use arrow::datatypes::DataType;
-use datafusion_common::{utils::take_function_args, Result};
+use crate::core::coalesce::CoalesceFunc;
+use arrow::datatypes::{DataType, FieldRef};
+use datafusion_common::Result;
+use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
 use datafusion_expr::{
-    ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
-    Volatility,
+    ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs,
+    ScalarUDFImpl, Signature, Volatility,
 };
 use datafusion_macros::user_doc;
-use std::sync::Arc;
 
 #[user_doc(
     doc_section(label = "Conditional Functions"),
-    description = "Returns _expression2_ if _expression1_ is NULL otherwise it returns _expression1_.",
+    description = "Returns _expression2_ if _expression1_ is NULL otherwise it returns _expression1_ and _expression2_ is not evaluated. This function can be used to substitute a default value for NULL values.",
     syntax_example = "nvl(expression1, expression2)",
     sql_example = r#"```sql
 > select nvl(null, 'a');
@@ -57,7 +55,7 @@ use std::sync::Arc;
 )]
 #[derive(Debug, PartialEq, Eq, Hash)]
 pub struct NVLFunc {
-    signature: Signature,
+    coalesce: CoalesceFunc,
     aliases: Vec<String>,
 }
 
@@ -90,11 +88,13 @@ impl Default for NVLFunc {
 impl NVLFunc {
     pub fn new() -> Self {
         Self {
-            signature: Signature::uniform(
-                2,
-                SUPPORTED_NVL_TYPES.to_vec(),
-                Volatility::Immutable,
-            ),
+            coalesce: CoalesceFunc {
+                signature: Signature::uniform(
+                    2,
+                    SUPPORTED_NVL_TYPES.to_vec(),
+                    Volatility::Immutable,
+                ),
+            },
             aliases: vec![String::from("ifnull")],
         }
     }
@@ -110,209 +110,45 @@ impl ScalarUDFImpl for NVLFunc {
     }
 
     fn signature(&self) -> &Signature {
-        &self.signature
+        &self.coalesce.signature
     }
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        Ok(arg_types[0].clone())
+        self.coalesce.return_type(arg_types)
     }
 
-    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
-        nvl_func(&args.args)
-    }
-
-    fn aliases(&self) -> &[String] {
-        &self.aliases
-    }
-
-    fn documentation(&self) -> Option<&Documentation> {
-        self.doc()
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
+        self.coalesce.return_field_from_args(args)
     }
-}
-
-fn nvl_func(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    let [lhs, rhs] = take_function_args("nvl/ifnull", args)?;
-    let (lhs_array, rhs_array) = match (lhs, rhs) {
-        (ColumnarValue::Array(lhs), ColumnarValue::Scalar(rhs)) => {
-            (Arc::clone(lhs), rhs.to_array_of_size(lhs.len())?)
-        }
-        (ColumnarValue::Array(lhs), ColumnarValue::Array(rhs)) => {
-            (Arc::clone(lhs), Arc::clone(rhs))
-        }
-        (ColumnarValue::Scalar(lhs), ColumnarValue::Array(rhs)) => {
-            (lhs.to_array_of_size(rhs.len())?, Arc::clone(rhs))
-        }
-        (ColumnarValue::Scalar(lhs), ColumnarValue::Scalar(rhs)) => {
-            let mut current_value = lhs;
-            if lhs.is_null() {
-                current_value = rhs;
-            }
-            return Ok(ColumnarValue::Scalar(current_value.clone()));
-        }
-    };
-    let to_apply = is_not_null(&lhs_array)?;
-    let value = zip(&to_apply, &lhs_array, &rhs_array)?;
-    Ok(ColumnarValue::Array(value))
-}
-
-#[cfg(test)]
-mod tests {
-    use std::sync::Arc;
-
-    use arrow::array::*;
 
-    use super::*;
-    use datafusion_common::ScalarValue;
-
-    #[test]
-    fn nvl_int32() -> Result<()> {
-        let a = Int32Array::from(vec![
-            Some(1),
-            Some(2),
-            None,
-            None,
-            Some(3),
-            None,
-            None,
-            Some(4),
-            Some(5),
-        ]);
-        let a = ColumnarValue::Array(Arc::new(a));
-
-        let lit_array = ColumnarValue::Scalar(ScalarValue::Int32(Some(6i32)));
-
-        let result = nvl_func(&[a, lit_array])?;
-        let result = result.into_array(0).expect("Failed to convert to array");
-
-        let expected = Arc::new(Int32Array::from(vec![
-            Some(1),
-            Some(2),
-            Some(6),
-            Some(6),
-            Some(3),
-            Some(6),
-            Some(6),
-            Some(4),
-            Some(5),
-        ])) as ArrayRef;
-        assert_eq!(expected.as_ref(), result.as_ref());
-        Ok(())
+    fn simplify(
+        &self,
+        args: Vec<Expr>,
+        info: &dyn SimplifyInfo,
+    ) -> Result<ExprSimplifyResult> {
+        self.coalesce.simplify(args, info)
     }
 
-    #[test]
-    // Ensure that arrays with no nulls can also invoke nvl() correctly
-    fn nvl_int32_non_nulls() -> Result<()> {
-        let a = Int32Array::from(vec![1, 3, 10, 7, 8, 1, 2, 4, 5]);
-        let a = ColumnarValue::Array(Arc::new(a));
-
-        let lit_array = ColumnarValue::Scalar(ScalarValue::Int32(Some(20i32)));
-
-        let result = nvl_func(&[a, lit_array])?;
-        let result = result.into_array(0).expect("Failed to convert to array");
-
-        let expected = Arc::new(Int32Array::from(vec![
-            Some(1),
-            Some(3),
-            Some(10),
-            Some(7),
-            Some(8),
-            Some(1),
-            Some(2),
-            Some(4),
-            Some(5),
-        ])) as ArrayRef;
-        assert_eq!(expected.as_ref(), result.as_ref());
-        Ok(())
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        self.coalesce.invoke_with_args(args)
     }
 
-    #[test]
-    fn nvl_boolean() -> Result<()> {
-        let a = BooleanArray::from(vec![Some(true), Some(false), None]);
-        let a = ColumnarValue::Array(Arc::new(a));
-
-        let lit_array = ColumnarValue::Scalar(ScalarValue::Boolean(Some(false)));
-
-        let result = nvl_func(&[a, lit_array])?;
-        let result = result.into_array(0).expect("Failed to convert to array");
-
-        let expected = Arc::new(BooleanArray::from(vec![
-            Some(true),
-            Some(false),
-            Some(false),
-        ])) as ArrayRef;
-
-        assert_eq!(expected.as_ref(), result.as_ref());
-        Ok(())
+    fn conditional_arguments<'a>(
+        &self,
+        args: &'a [Expr],
+    ) -> Option<(Vec<&'a Expr>, Vec<&'a Expr>)> {
+        self.coalesce.conditional_arguments(args)
     }
 
-    #[test]
-    fn nvl_string() -> Result<()> {
-        let a = StringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]);
-        let a = ColumnarValue::Array(Arc::new(a));
-
-        let lit_array = ColumnarValue::Scalar(ScalarValue::from("bax"));
-
-        let result = nvl_func(&[a, lit_array])?;
-        let result = result.into_array(0).expect("Failed to convert to array");
-
-        let expected = Arc::new(StringArray::from(vec![
-            Some("foo"),
-            Some("bar"),
-            Some("bax"),
-            Some("baz"),
-        ])) as ArrayRef;
-
-        assert_eq!(expected.as_ref(), result.as_ref());
-        Ok(())
+    fn short_circuits(&self) -> bool {
+        self.coalesce.short_circuits()
     }
 
-    #[test]
-    fn nvl_literal_first() -> Result<()> {
-        let a = Int32Array::from(vec![Some(1), Some(2), None, None, Some(3), Some(4)]);
-        let a = ColumnarValue::Array(Arc::new(a));
-
-        let lit_array = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32)));
-
-        let result = nvl_func(&[lit_array, a])?;
-        let result = result.into_array(0).expect("Failed to convert to array");
-
-        let expected = Arc::new(Int32Array::from(vec![
-            Some(2),
-            Some(2),
-            Some(2),
-            Some(2),
-            Some(2),
-            Some(2),
-        ])) as ArrayRef;
-        assert_eq!(expected.as_ref(), result.as_ref());
-        Ok(())
+    fn aliases(&self) -> &[String] {
+        &self.aliases
     }
 
-    #[test]
-    fn nvl_scalar() -> Result<()> {
-        let a_null = ColumnarValue::Scalar(ScalarValue::Int32(None));
-        let b_null = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32)));
-
-        let result_null = nvl_func(&[a_null, b_null])?;
-        let result_null = result_null
-            .into_array(1)
-            .expect("Failed to convert to array");
-
-        let expected_null = Arc::new(Int32Array::from(vec![Some(2i32)])) as ArrayRef;
-
-        assert_eq!(expected_null.as_ref(), result_null.as_ref());
-
-        let a_nnull = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32)));
-        let b_nnull = ColumnarValue::Scalar(ScalarValue::Int32(Some(1i32)));
-
-        let result_nnull = nvl_func(&[a_nnull, b_nnull])?;
-        let result_nnull = result_nnull
-            .into_array(1)
-            .expect("Failed to convert to array");
-
-        let expected_nnull = Arc::new(Int32Array::from(vec![Some(2i32)])) as ArrayRef;
-        assert_eq!(expected_nnull.as_ref(), result_nnull.as_ref());
-
-        Ok(())
+    fn documentation(&self) -> Option<&Documentation> {
+        self.doc()
     }
 }
diff --git a/datafusion/functions/src/core/nvl2.rs b/datafusion/functions/src/core/nvl2.rs
index 82aa8d2a4cd5..45cb6760d062 100644
--- a/datafusion/functions/src/core/nvl2.rs
+++ b/datafusion/functions/src/core/nvl2.rs
@@ -15,17 +15,16 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::array::Array;
-use arrow::compute::is_not_null;
-use arrow::compute::kernels::zip::zip;
-use arrow::datatypes::DataType;
+use arrow::datatypes::{DataType, Field, FieldRef};
 use datafusion_common::{internal_err, utils::take_function_args, Result};
 use datafusion_expr::{
-    type_coercion::binary::comparison_coercion, ColumnarValue, Documentation,
-    ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+    conditional_expressions::CaseBuilder,
+    simplify::{ExprSimplifyResult, SimplifyInfo},
+    type_coercion::binary::comparison_coercion,
+    ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs,
+    ScalarUDFImpl, Signature, Volatility,
 };
 use datafusion_macros::user_doc;
-use std::sync::Arc;
 
 #[user_doc(
     doc_section(label = "Conditional Functions"),
@@ -95,8 +94,37 @@ impl ScalarUDFImpl for NVL2Func {
         Ok(arg_types[1].clone())
     }
 
-    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
-        nvl2_func(&args.args)
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
+        let nullable =
+            args.arg_fields[1].is_nullable() || args.arg_fields[2].is_nullable();
+        let return_type = args.arg_fields[1].data_type().clone();
+        Ok(Field::new(self.name(), return_type, nullable).into())
+    }
+
+    fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        internal_err!("nvl2 should have been simplified to case")
+    }
+
+    fn simplify(
+        &self,
+        args: Vec<Expr>,
+        _info: &dyn SimplifyInfo,
+    ) -> Result<ExprSimplifyResult> {
+        let [test, if_non_null, if_null] = take_function_args(self.name(), args)?;
+
+        let expr = CaseBuilder::new(
+            None,
+            vec![test.is_not_null()],
+            vec![if_non_null],
+            Some(Box::new(if_null)),
+        )
+        .end()?;
+
+        Ok(ExprSimplifyResult::Simplified(expr))
+    }
+
+    fn short_circuits(&self) -> bool {
+        true
     }
 
     fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
@@ -123,42 +151,3 @@ impl ScalarUDFImpl for NVL2Func {
         self.doc()
     }
 }
-
-fn nvl2_func(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    let mut len = 1;
-    let mut is_array = false;
-    for arg in args {
-        if let ColumnarValue::Array(array) = arg {
-            len = array.len();
-            is_array = true;
-            break;
-        }
-    }
-    if is_array {
-        let args = args
-            .iter()
-            .map(|arg| match arg {
-                ColumnarValue::Scalar(scalar) => scalar.to_array_of_size(len),
-                ColumnarValue::Array(array) => Ok(Arc::clone(array)),
-            })
-            .collect::<Result<Vec<_>>>()?;
-        let [tested, if_non_null, if_null] = take_function_args("nvl2", args)?;
-        let to_apply = is_not_null(&tested)?;
-        let value = zip(&to_apply, &if_non_null, &if_null)?;
-        Ok(ColumnarValue::Array(value))
-    } else {
-        let [tested, if_non_null, if_null] = take_function_args("nvl2", args)?;
-        match &tested {
-            ColumnarValue::Array(_) => {
-                internal_err!("except Scalar value, but got Array")
-            }
-            ColumnarValue::Scalar(scalar) => {
-                if scalar.is_null() {
-                    Ok(if_null.clone())
-                } else {
-                    Ok(if_non_null.clone())
-                }
-            }
-        }
-    }
-}
diff --git a/datafusion/functions/src/datetime/current_date.rs b/datafusion/functions/src/datetime/current_date.rs
index 0ba3afd19bed..18b99bca8638 100644
--- a/datafusion/functions/src/datetime/current_date.rs
+++ b/datafusion/functions/src/datetime/current_date.rs
@@ -36,7 +36,9 @@ Returns the current date in the session time zone.
 
 The `current_date()` return value is determined at query time and will return the same date, no matter when in the query plan the function executes.
 "#,
-    syntax_example = "current_date()"
+    syntax_example = r#"current_date()
+    (optional) SET datafusion.execution.time_zone = '+00:00';
+    SELECT current_date();"#
 )]
 #[derive(Debug, PartialEq, Eq, Hash)]
 pub struct CurrentDateFunc {
diff --git a/datafusion/functions/src/datetime/current_time.rs b/datafusion/functions/src/datetime/current_time.rs
index 79d5bfc1783c..4f5b199cce41 100644
--- a/datafusion/functions/src/datetime/current_time.rs
+++ b/datafusion/functions/src/datetime/current_time.rs
@@ -15,26 +15,32 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use arrow::array::timezone::Tz;
 use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::Time64;
 use arrow::datatypes::TimeUnit::Nanosecond;
-use std::any::Any;
-
+use chrono::TimeZone;
+use chrono::Timelike;
 use datafusion_common::{internal_err, Result, ScalarValue};
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
 use datafusion_expr::{
     ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
 };
 use datafusion_macros::user_doc;
+use std::any::Any;
 
 #[user_doc(
     doc_section(label = "Time and Date Functions"),
     description = r#"
-Returns the current UTC time.
+Returns the current time in the session time zone.
 
 The `current_time()` return value is determined at query time and will return the same time, no matter when in the query plan the function executes.
+
+The session time zone can be set using the statement 'SET datafusion.execution.time_zone = desired time zone'. The time zone can be a value like +00:00, 'Europe/London' etc.
 "#,
-    syntax_example = "current_time()"
+    syntax_example = r#"current_time()
+    (optional) SET datafusion.execution.time_zone = '+00:00';
+    SELECT current_time();"#
 )]
 #[derive(Debug, PartialEq, Eq, Hash)]
 pub struct CurrentTimeFunc {
@@ -93,7 +99,20 @@ impl ScalarUDFImpl for CurrentTimeFunc {
         info: &dyn SimplifyInfo,
     ) -> Result<ExprSimplifyResult> {
         let now_ts = info.execution_props().query_execution_start_time;
-        let nano = now_ts.timestamp_nanos_opt().map(|ts| ts % 86400000000000);
+
+        // Try to get timezone from config and convert to local time
+        let nano = info
+            .execution_props()
+            .config_options()
+            .and_then(|config| config.execution.time_zone.parse::<Tz>().ok())
+            .map_or_else(
+                || datetime_to_time_nanos(&now_ts),
+                |tz| {
+                    let local_now = tz.from_utc_datetime(&now_ts.naive_utc());
+                    datetime_to_time_nanos(&local_now)
+                },
+            );
+
         Ok(ExprSimplifyResult::Simplified(Expr::Literal(
             ScalarValue::Time64Nanosecond(nano),
             None,
@@ -104,3 +123,97 @@ impl ScalarUDFImpl for CurrentTimeFunc {
         self.doc()
     }
 }
+
+// Helper function for conversion of datetime to a timestamp.
+fn datetime_to_time_nanos<Tz: TimeZone>(dt: &chrono::DateTime<Tz>) -> Option<i64> {
+    let hour = dt.hour() as i64;
+    let minute = dt.minute() as i64;
+    let second = dt.second() as i64;
+    let nanosecond = dt.nanosecond() as i64;
+    Some((hour * 3600 + minute * 60 + second) * 1_000_000_000 + nanosecond)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow::datatypes::{DataType, TimeUnit::Nanosecond};
+    use chrono::{DateTime, Utc};
+    use datafusion_common::{Result, ScalarValue};
+    use datafusion_expr::execution_props::ExecutionProps;
+    use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
+    use std::sync::Arc;
+
+    struct MockSimplifyInfo {
+        execution_props: ExecutionProps,
+    }
+
+    impl SimplifyInfo for MockSimplifyInfo {
+        fn is_boolean_type(&self, _expr: &Expr) -> Result<bool> {
+            Ok(false)
+        }
+
+        fn nullable(&self, _expr: &Expr) -> Result<bool> {
+            Ok(true)
+        }
+
+        fn execution_props(&self) -> &ExecutionProps {
+            &self.execution_props
+        }
+
+        fn get_data_type(&self, _expr: &Expr) -> Result<DataType> {
+            Ok(Time64(Nanosecond))
+        }
+    }
+
+    fn set_session_timezone_env(tz: &str, start_time: DateTime<Utc>) -> MockSimplifyInfo {
+        let mut config = datafusion_common::config::ConfigOptions::default();
+        config.execution.time_zone = tz.to_string();
+        let mut execution_props =
+            ExecutionProps::new().with_query_execution_start_time(start_time);
+        execution_props.config_options = Some(Arc::new(config));
+        MockSimplifyInfo { execution_props }
+    }
+
+    #[test]
+    fn test_current_time_timezone_offset() {
+        // Use a fixed start time for consistent testing
+        let start_time = Utc.with_ymd_and_hms(2025, 1, 1, 12, 0, 0).unwrap();
+
+        // Test with UTC+05:00
+        let info_plus_5 = set_session_timezone_env("+05:00", start_time);
+        let result_plus_5 = CurrentTimeFunc::new()
+            .simplify(vec![], &info_plus_5)
+            .unwrap();
+
+        // Test with UTC-05:00
+        let info_minus_5 = set_session_timezone_env("-05:00", start_time);
+        let result_minus_5 = CurrentTimeFunc::new()
+            .simplify(vec![], &info_minus_5)
+            .unwrap();
+
+        // Extract nanoseconds from results
+        let nanos_plus_5 = match result_plus_5 {
+            ExprSimplifyResult::Simplified(Expr::Literal(
+                ScalarValue::Time64Nanosecond(Some(n)),
+                _,
+            )) => n,
+            _ => panic!("Expected Time64Nanosecond literal"),
+        };
+
+        let nanos_minus_5 = match result_minus_5 {
+            ExprSimplifyResult::Simplified(Expr::Literal(
+                ScalarValue::Time64Nanosecond(Some(n)),
+                _,
+            )) => n,
+            _ => panic!("Expected Time64Nanosecond literal"),
+        };
+
+        // Calculate the difference: UTC+05:00 should be 10 hours ahead of UTC-05:00
+        let difference = nanos_plus_5 - nanos_minus_5;
+
+        // 10 hours in nanoseconds
+        let expected_offset = 10i64 * 3600 * 1_000_000_000;
+
+        assert_eq!(difference, expected_offset, "Expected 10-hour offset difference in nanoseconds between UTC+05:00 and UTC-05:00");
+    }
+}
diff --git a/datafusion/functions/src/datetime/date_bin.rs b/datafusion/functions/src/datetime/date_bin.rs
index 74e286de0f58..c4e89743bd55 100644
--- a/datafusion/functions/src/datetime/date_bin.rs
+++ b/datafusion/functions/src/datetime/date_bin.rs
@@ -687,7 +687,7 @@ mod tests {
         let res = invoke_date_bin_with_args(args, 1, return_field);
         assert_eq!(
             res.err().unwrap().strip_backtrace(),
-            "Execution error: DATE_BIN expects origin argument to be a TIMESTAMP with nanosecond precision but got Timestamp(Microsecond, None)"
+            "Execution error: DATE_BIN expects origin argument to be a TIMESTAMP with nanosecond precision but got Timestamp(µs)"
         );
 
         args = vec![
diff --git a/datafusion/functions/src/datetime/mod.rs b/datafusion/functions/src/datetime/mod.rs
index 5729b1edae95..d80f14facf82 100644
--- a/datafusion/functions/src/datetime/mod.rs
+++ b/datafusion/functions/src/datetime/mod.rs
@@ -45,7 +45,6 @@ make_udf_function!(date_part::DatePartFunc, date_part);
 make_udf_function!(date_trunc::DateTruncFunc, date_trunc);
 make_udf_function!(make_date::MakeDateFunc, make_date);
 make_udf_function!(from_unixtime::FromUnixtimeFunc, from_unixtime);
-make_udf_function!(now::NowFunc, now);
 make_udf_function!(to_char::ToCharFunc, to_char);
 make_udf_function!(to_date::ToDateFunc, to_date);
 make_udf_function!(to_local_time::ToLocalTimeFunc, to_local_time);
@@ -56,6 +55,9 @@ make_udf_function!(to_timestamp::ToTimestampMillisFunc, to_timestamp_millis);
 make_udf_function!(to_timestamp::ToTimestampMicrosFunc, to_timestamp_micros);
 make_udf_function!(to_timestamp::ToTimestampNanosFunc, to_timestamp_nanos);
 
+// create UDF with config
+make_udf_function_with_config!(now::NowFunc, now);
+
 // we cannot currently use the export_functions macro since it doesn't handle
 // functions with varargs currently
 
@@ -91,6 +93,7 @@ pub mod expr_fn {
     ),(
         now,
         "returns the current timestamp in nanoseconds, using the same value for all instances of now() in same statement",
+        @config
     ),
     (
         to_local_time,
@@ -255,6 +258,7 @@ pub mod expr_fn {
 
 /// Returns all DataFusion functions defined in this package
 pub fn functions() -> Vec<Arc<ScalarUDF>> {
+    use datafusion_common::config::ConfigOptions;
     vec![
         current_date(),
         current_time(),
@@ -263,7 +267,7 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
         date_trunc(),
         from_unixtime(),
         make_date(),
-        now(),
+        now(&ConfigOptions::default()),
         to_char(),
         to_date(),
         to_local_time(),
diff --git a/datafusion/functions/src/datetime/now.rs b/datafusion/functions/src/datetime/now.rs
index 65dadb42a89e..96a35c241ff0 100644
--- a/datafusion/functions/src/datetime/now.rs
+++ b/datafusion/functions/src/datetime/now.rs
@@ -19,12 +19,14 @@ use arrow::datatypes::DataType::Timestamp;
 use arrow::datatypes::TimeUnit::Nanosecond;
 use arrow::datatypes::{DataType, Field, FieldRef};
 use std::any::Any;
+use std::sync::Arc;
 
+use datafusion_common::config::ConfigOptions;
 use datafusion_common::{internal_err, Result, ScalarValue};
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
 use datafusion_expr::{
-    ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarUDFImpl, Signature,
-    Volatility,
+    ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarUDF, ScalarUDFImpl,
+    Signature, Volatility,
 };
 use datafusion_macros::user_doc;
 
@@ -41,19 +43,30 @@ The `now()` return value is determined at query time and will return the same ti
 pub struct NowFunc {
     signature: Signature,
     aliases: Vec<String>,
+    timezone: Option<Arc<str>>,
 }
 
 impl Default for NowFunc {
     fn default() -> Self {
-        Self::new()
+        Self::new_with_config(&ConfigOptions::default())
     }
 }
 
 impl NowFunc {
+    #[deprecated(since = "50.2.0", note = "use `new_with_config` instead")]
     pub fn new() -> Self {
         Self {
             signature: Signature::nullary(Volatility::Stable),
             aliases: vec!["current_timestamp".to_string()],
+            timezone: Some(Arc::from("+00")),
+        }
+    }
+
+    pub fn new_with_config(config: &ConfigOptions) -> Self {
+        Self {
+            signature: Signature::nullary(Volatility::Stable),
+            aliases: vec!["current_timestamp".to_string()],
+            timezone: Some(Arc::from(config.execution.time_zone.as_str())),
         }
     }
 }
@@ -77,10 +90,14 @@ impl ScalarUDFImpl for NowFunc {
         &self.signature
     }
 
+    fn with_updated_config(&self, config: &ConfigOptions) -> Option<ScalarUDF> {
+        Some(Self::new_with_config(config).into())
+    }
+
     fn return_field_from_args(&self, _args: ReturnFieldArgs) -> Result<FieldRef> {
         Ok(Field::new(
             self.name(),
-            Timestamp(Nanosecond, Some("+00:00".into())),
+            Timestamp(Nanosecond, self.timezone.clone()),
             false,
         )
         .into())
@@ -106,8 +123,9 @@ impl ScalarUDFImpl for NowFunc {
             .execution_props()
             .query_execution_start_time
             .timestamp_nanos_opt();
+
         Ok(ExprSimplifyResult::Simplified(Expr::Literal(
-            ScalarValue::TimestampNanosecond(now_ts, Some("+00:00".into())),
+            ScalarValue::TimestampNanosecond(now_ts, self.timezone.clone()),
             None,
         )))
     }
diff --git a/datafusion/functions/src/encoding/inner.rs b/datafusion/functions/src/encoding/inner.rs
index 5baa91936320..e5314ad220c8 100644
--- a/datafusion/functions/src/encoding/inner.rs
+++ b/datafusion/functions/src/encoding/inner.rs
@@ -24,7 +24,10 @@ use arrow::{
     datatypes::{ByteArrayType, DataType},
 };
 use arrow_buffer::{Buffer, OffsetBufferBuilder};
-use base64::{engine::general_purpose, Engine as _};
+use base64::{
+    engine::{DecodePaddingMode, GeneralPurpose, GeneralPurposeConfig},
+    Engine as _,
+};
 use datafusion_common::{
     cast::{as_generic_binary_array, as_generic_string_array},
     not_impl_err, plan_err,
@@ -40,6 +43,14 @@ use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use datafusion_macros::user_doc;
 use std::any::Any;
 
+// Allow padding characters, but don't require them, and don't generate them.
+const BASE64_ENGINE: GeneralPurpose = GeneralPurpose::new(
+    &base64::alphabet::STANDARD,
+    GeneralPurposeConfig::new()
+        .with_encode_padding(false)
+        .with_decode_padding_mode(DecodePaddingMode::Indifferent),
+);
+
 #[user_doc(
     doc_section(label = "Binary String Functions"),
     description = "Encode binary data into a textual representation.",
@@ -302,7 +313,7 @@ fn hex_encode(input: &[u8]) -> String {
 }
 
 fn base64_encode(input: &[u8]) -> String {
-    general_purpose::STANDARD_NO_PAD.encode(input)
+    BASE64_ENGINE.encode(input)
 }
 
 fn hex_decode(input: &[u8], buf: &mut [u8]) -> Result<usize> {
@@ -315,7 +326,7 @@ fn hex_decode(input: &[u8], buf: &mut [u8]) -> Result<usize> {
 }
 
 fn base64_decode(input: &[u8], buf: &mut [u8]) -> Result<usize> {
-    general_purpose::STANDARD_NO_PAD
+    BASE64_ENGINE
         .decode_slice(input, buf)
         .map_err(|e| internal_datafusion_err!("Failed to decode from base64: {e}"))
 }
@@ -364,18 +375,16 @@ where
 impl Encoding {
     fn encode_scalar(self, value: Option<&[u8]>) -> ColumnarValue {
         ColumnarValue::Scalar(match self {
-            Self::Base64 => ScalarValue::Utf8(
-                value.map(|v| general_purpose::STANDARD_NO_PAD.encode(v)),
-            ),
+            Self::Base64 => ScalarValue::Utf8(value.map(|v| BASE64_ENGINE.encode(v))),
             Self::Hex => ScalarValue::Utf8(value.map(hex::encode)),
         })
     }
 
     fn encode_large_scalar(self, value: Option<&[u8]>) -> ColumnarValue {
         ColumnarValue::Scalar(match self {
-            Self::Base64 => ScalarValue::LargeUtf8(
-                value.map(|v| general_purpose::STANDARD_NO_PAD.encode(v)),
-            ),
+            Self::Base64 => {
+                ScalarValue::LargeUtf8(value.map(|v| BASE64_ENGINE.encode(v)))
+            }
             Self::Hex => ScalarValue::LargeUtf8(value.map(hex::encode)),
         })
     }
@@ -411,15 +420,9 @@ impl Encoding {
         };
 
         let out = match self {
-            Self::Base64 => {
-                general_purpose::STANDARD_NO_PAD
-                    .decode(value)
-                    .map_err(|e| {
-                        internal_datafusion_err!(
-                            "Failed to decode value using base64: {e}"
-                        )
-                    })?
-            }
+            Self::Base64 => BASE64_ENGINE.decode(value).map_err(|e| {
+                internal_datafusion_err!("Failed to decode value using base64: {e}")
+            })?,
             Self::Hex => hex::decode(value).map_err(|e| {
                 internal_datafusion_err!("Failed to decode value using hex: {e}")
             })?,
@@ -435,15 +438,9 @@ impl Encoding {
         };
 
         let out = match self {
-            Self::Base64 => {
-                general_purpose::STANDARD_NO_PAD
-                    .decode(value)
-                    .map_err(|e| {
-                        internal_datafusion_err!(
-                            "Failed to decode value using base64: {e}"
-                        )
-                    })?
-            }
+            Self::Base64 => BASE64_ENGINE.decode(value).map_err(|e| {
+                internal_datafusion_err!("Failed to decode value using base64: {e}")
+            })?,
             Self::Hex => hex::decode(value).map_err(|e| {
                 internal_datafusion_err!("Failed to decode value using hex: {e}")
             })?,
diff --git a/datafusion/functions/src/macros.rs b/datafusion/functions/src/macros.rs
index 228d704e29cb..9e195f2d5291 100644
--- a/datafusion/functions/src/macros.rs
+++ b/datafusion/functions/src/macros.rs
@@ -40,6 +40,7 @@
 /// Exported functions accept:
 /// - `Vec<Expr>` argument (single argument followed by a comma)
 /// - Variable number of `Expr` arguments (zero or more arguments, must be without commas)
+/// - Functions that require config (marked with `@config` prefix)
 #[macro_export]
 macro_rules! export_functions {
     ($(($FUNC:ident, $DOC:expr, $($arg:tt)*)),*) => {
@@ -49,6 +50,15 @@ macro_rules! export_functions {
         )*
     };
 
+    // function that requires config (marked with @config)
+    (single $FUNC:ident, $DOC:expr, @config) => {
+        #[doc = $DOC]
+        pub fn $FUNC() -> datafusion_expr::Expr {
+            use datafusion_common::config::ConfigOptions;
+            super::$FUNC(&ConfigOptions::default()).call(vec![])
+        }
+    };
+
     // single vector argument (a single argument followed by a comma)
     (single $FUNC:ident, $DOC:expr, $arg:ident,) => {
         #[doc = $DOC]
@@ -89,6 +99,22 @@ macro_rules! make_udf_function {
     };
 }
 
+/// Creates a singleton `ScalarUDF` of the `$UDF` function and a function
+/// named `$NAME` which returns that singleton. The function takes a
+/// configuration argument of type `$CONFIG_TYPE` to create the UDF.
+#[macro_export]
+macro_rules! make_udf_function_with_config {
+    ($UDF:ty, $NAME:ident) => {
+        #[allow(rustdoc::redundant_explicit_links)]
+        #[doc = concat!("Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) implementation of ", stringify!($NAME))]
+        pub fn $NAME(config: &datafusion_common::config::ConfigOptions) -> std::sync::Arc<datafusion_expr::ScalarUDF> {
+            std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
+                <$UDF>::new_with_config(&config),
+            ))
+        }
+    };
+}
+
 /// Macro creates a sub module if the feature is not enabled
 ///
 /// The rationale for providing stub functions is to help users to configure datafusion
diff --git a/datafusion/functions/src/planner.rs b/datafusion/functions/src/planner.rs
index 7228cdc07e72..ccd167997003 100644
--- a/datafusion/functions/src/planner.rs
+++ b/datafusion/functions/src/planner.rs
@@ -25,7 +25,7 @@ use datafusion_expr::{
 };
 
 #[deprecated(
-    since = "0.50.0",
+    since = "50.0.0",
     note = "Use UnicodeFunctionPlanner and DateTimeFunctionPlanner instead"
 )]
 #[derive(Default, Debug)]
diff --git a/datafusion/macros/Cargo.toml b/datafusion/macros/Cargo.toml
index fe979720bc56..64781ddeaf42 100644
--- a/datafusion/macros/Cargo.toml
+++ b/datafusion/macros/Cargo.toml
@@ -43,4 +43,4 @@ proc-macro = true
 [dependencies]
 datafusion-doc = { workspace = true }
 quote = "1.0.41"
-syn = { version = "2.0.106", features = ["full"] }
+syn = { version = "2.0.108", features = ["full"] }
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 3d5dee3a7255..4fb0f8553b4b 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -2117,7 +2117,7 @@ mod test {
         assert_analyzed_plan_eq!(
             plan,
             @r#"
-        Projection: CAST(Utf8("1998-03-18") AS Timestamp(Nanosecond, None)) = CAST(CAST(Utf8("1998-03-18") AS Date32) AS Timestamp(Nanosecond, None))
+        Projection: CAST(Utf8("1998-03-18") AS Timestamp(ns)) = CAST(CAST(Utf8("1998-03-18") AS Date32) AS Timestamp(ns))
           EmptyRelation: rows=0
         "#
         )
@@ -2258,7 +2258,7 @@ mod test {
         let err = coerce_case_expression(case, &schema).unwrap_err();
         assert_snapshot!(
             err.strip_backtrace(),
-            @"Error during planning: Failed to coerce then (Date32, Float32, Binary) and else (Timestamp(Nanosecond, None)) to common types in CASE WHEN expression"
+            @"Error during planning: Failed to coerce then (Date32, Float32, Binary) and else (Timestamp(ns)) to common types in CASE WHEN expression"
         );
 
         Ok(())
@@ -2465,7 +2465,7 @@ mod test {
         assert_analyzed_plan_eq!(
             plan,
             @r#"
-        Projection: a = CAST(CAST(a AS Map(Field { name: "key_value", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false)) AS Map(Field { name: "entries", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false))
+        Projection: a = CAST(CAST(a AS Map("key_value": Struct("key": Utf8, "value": nullable Float64), unsorted)) AS Map("entries": Struct("key": Utf8, "value": nullable Float64), unsorted))
           EmptyRelation: rows=0
         "#
         )
@@ -2488,7 +2488,7 @@ mod test {
         assert_analyzed_plan_eq!(
             plan,
             @r#"
-        Projection: IntervalYearMonth("12") + CAST(Utf8("2000-01-01T00:00:00") AS Timestamp(Nanosecond, None))
+        Projection: IntervalYearMonth("12") + CAST(Utf8("2000-01-01T00:00:00") AS Timestamp(ns))
           EmptyRelation: rows=0
         "#
         )
@@ -2513,7 +2513,7 @@ mod test {
         assert_analyzed_plan_eq!(
             plan,
             @r#"
-        Projection: CAST(Utf8("1998-03-18") AS Timestamp(Nanosecond, None)) - CAST(Utf8("1998-03-18") AS Timestamp(Nanosecond, None))
+        Projection: CAST(Utf8("1998-03-18") AS Timestamp(ns)) - CAST(Utf8("1998-03-18") AS Timestamp(ns))
           EmptyRelation: rows=0
         "#
         )
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index ec1f8f991a8e..251006849459 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -652,10 +652,8 @@ impl CSEController for ExprCSEController<'_> {
             // In case of `ScalarFunction`s we don't know which children are surely
             // executed so start visiting all children conditionally and stop the
             // recursion with `TreeNodeRecursion::Jump`.
-            Expr::ScalarFunction(ScalarFunction { func, args })
-                if func.short_circuits() =>
-            {
-                Some((vec![], args.iter().collect()))
+            Expr::ScalarFunction(ScalarFunction { func, args }) => {
+                func.conditional_arguments(args)
             }
 
             // In case of `And` and `Or` the first child is surely executed, but we
diff --git a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
index c8be689fc5a4..ccf90f91e68f 100644
--- a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
+++ b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
@@ -1972,14 +1972,14 @@ mod tests {
 
         assert_optimized_plan_equal!(
             plan,
-            @r#"
+            @r"
         Projection: test.b [b:UInt32]
           LeftSemi Join:  Filter: Boolean(true) [a:UInt32, b:UInt32, c:UInt32]
             TableScan: test [a:UInt32, b:UInt32, c:UInt32]
             SubqueryAlias: __correlated_sq_1 [arr:Int32;N]
               Unnest: lists[sq.arr|depth=1] structs[] [arr:Int32;N]
-                TableScan: sq [arr:List(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
-        "#
+                TableScan: sq [arr:List(Field { data_type: Int32, nullable: true });N]
+        "
         )
     }
 
@@ -2007,14 +2007,14 @@ mod tests {
 
         assert_optimized_plan_equal!(
             plan,
-            @r#"
+            @r"
         Projection: test.b [b:UInt32]
           LeftSemi Join:  Filter: __correlated_sq_1.a = test.b [a:UInt32, b:UInt32, c:UInt32]
             TableScan: test [a:UInt32, b:UInt32, c:UInt32]
             SubqueryAlias: __correlated_sq_1 [a:UInt32;N]
               Unnest: lists[sq.a|depth=1] structs[] [a:UInt32;N]
-                TableScan: sq [a:List(Field { name: "item", data_type: UInt32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
-        "#
+                TableScan: sq [a:List(Field { data_type: UInt32, nullable: true });N]
+        "
         )
     }
 
diff --git a/datafusion/optimizer/src/replace_distinct_aggregate.rs b/datafusion/optimizer/src/replace_distinct_aggregate.rs
index 2383787fa0e8..215f5e240d5d 100644
--- a/datafusion/optimizer/src/replace_distinct_aggregate.rs
+++ b/datafusion/optimizer/src/replace_distinct_aggregate.rs
@@ -25,7 +25,7 @@ use datafusion_common::tree_node::Transformed;
 use datafusion_common::{Column, Result};
 use datafusion_expr::expr_rewriter::normalize_cols;
 use datafusion_expr::utils::expand_wildcard;
-use datafusion_expr::{col, ExprFunctionExt, LogicalPlanBuilder};
+use datafusion_expr::{col, lit, ExprFunctionExt, Limit, LogicalPlanBuilder};
 use datafusion_expr::{Aggregate, Distinct, DistinctOn, Expr, LogicalPlan};
 
 /// Optimizer that replaces logical [[Distinct]] with a logical [[Aggregate]]
@@ -54,6 +54,17 @@ use datafusion_expr::{Aggregate, Distinct, DistinctOn, Expr, LogicalPlan};
 /// )
 /// ORDER BY a DESC
 /// ```
+///
+/// In case there are no columns, the [[Distinct]] is replaced by a [[Limit]]
+///
+/// ```text
+/// SELECT DISTINCT * FROM empty_table
+/// ```
+///
+/// Into
+/// ```text
+/// SELECT * FROM empty_table LIMIT 1
+/// ```
 #[derive(Default, Debug)]
 pub struct ReplaceDistinctWithAggregate {}
 
@@ -78,6 +89,16 @@ impl OptimizerRule for ReplaceDistinctWithAggregate {
             LogicalPlan::Distinct(Distinct::All(input)) => {
                 let group_expr = expand_wildcard(input.schema(), &input, None)?;
 
+                if group_expr.is_empty() {
+                    // Special case: there are no columns to group by, so we can't replace it by a group by
+                    // however, we can replace it by LIMIT 1 because there is either no output or a single empty row
+                    return Ok(Transformed::yes(LogicalPlan::Limit(Limit {
+                        skip: None,
+                        fetch: Some(Box::new(lit(1i64))),
+                        input,
+                    })));
+                }
+
                 let field_count = input.schema().fields().len();
                 for dep in input.schema().functional_dependencies().iter() {
                     // If distinct is exactly the same with a previous GROUP BY, we can
@@ -184,15 +205,17 @@ impl OptimizerRule for ReplaceDistinctWithAggregate {
 
 #[cfg(test)]
 mod tests {
-    use std::sync::Arc;
-
     use crate::assert_optimized_plan_eq_snapshot;
     use crate::replace_distinct_aggregate::ReplaceDistinctWithAggregate;
     use crate::test::*;
+    use arrow::datatypes::{Fields, Schema};
+    use std::sync::Arc;
 
     use crate::OptimizerContext;
     use datafusion_common::Result;
-    use datafusion_expr::{col, logical_plan::builder::LogicalPlanBuilder, Expr};
+    use datafusion_expr::{
+        col, logical_plan::builder::LogicalPlanBuilder, table_scan, Expr,
+    };
     use datafusion_functions_aggregate::sum::sum;
 
     macro_rules! assert_optimized_plan_equal {
@@ -274,4 +297,16 @@ mod tests {
               TableScan: test
         ")
     }
+
+    #[test]
+    fn use_limit_1_when_no_columns() -> Result<()> {
+        let plan = table_scan(Some("test"), &Schema::new(Fields::empty()), None)?
+            .distinct()?
+            .build()?;
+
+        assert_optimized_plan_equal!(plan, @r"
+        Limit: skip=0, fetch=1
+          TableScan: test
+        ")
+    }
 }
diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
index c40906239073..204ce14e37d8 100644
--- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
@@ -29,6 +29,7 @@ use std::sync::Arc;
 
 use datafusion_common::{
     cast::{as_large_list_array, as_list_array},
+    metadata::FieldMetadata,
     tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRewriter},
 };
 use datafusion_common::{
@@ -57,7 +58,6 @@ use crate::simplify_expressions::unwrap_cast::{
     unwrap_cast_in_comparison_for_binary,
 };
 use crate::simplify_expressions::SimplifyInfo;
-use datafusion_expr::expr::FieldMetadata;
 use datafusion_expr_common::casts::try_cast_literal_to_type;
 use indexmap::IndexSet;
 use regex::Regex;
diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_predicates.rs b/datafusion/optimizer/src/simplify_expressions/simplify_predicates.rs
index 131404e60706..e811ce731310 100644
--- a/datafusion/optimizer/src/simplify_expressions/simplify_predicates.rs
+++ b/datafusion/optimizer/src/simplify_expressions/simplify_predicates.rs
@@ -194,7 +194,7 @@ fn find_most_restrictive_predicate(
     let mut best_value: Option<&ScalarValue> = None;
 
     for (idx, pred) in predicates.iter().enumerate() {
-        if let Expr::BinaryExpr(BinaryExpr { left, op: _, right }) = pred {
+        if let Expr::BinaryExpr(BinaryExpr { left, op, right }) = pred {
             // Extract the literal value based on which side has it
             let scalar_value = match (right.as_literal(), left.as_literal()) {
                 (Some(scalar), _) => Some(scalar),
@@ -207,8 +207,12 @@ fn find_most_restrictive_predicate(
                     let comparison = scalar.try_cmp(current_best)?;
                     let is_better = if find_greater {
                         comparison == std::cmp::Ordering::Greater
+                            || (comparison == std::cmp::Ordering::Equal
+                                && op == &Operator::Gt)
                     } else {
                         comparison == std::cmp::Ordering::Less
+                            || (comparison == std::cmp::Ordering::Equal
+                                && op == &Operator::Lt)
                     };
 
                     if is_better {
diff --git a/datafusion/physical-expr/benches/case_when.rs b/datafusion/physical-expr/benches/case_when.rs
index 5a88604716d2..e52aeb1aee12 100644
--- a/datafusion/physical-expr/benches/case_when.rs
+++ b/datafusion/physical-expr/benches/case_when.rs
@@ -15,110 +15,506 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::array::builder::{Int32Builder, StringBuilder};
-use arrow::datatypes::{DataType, Field, Schema};
+use arrow::array::{Array, ArrayRef, Int32Array, Int32Builder, StringArray};
+use arrow::datatypes::{ArrowNativeTypeOp, Field, Schema};
 use arrow::record_batch::RecordBatch;
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
-use datafusion_common::ScalarValue;
+use arrow::util::test_util::seedable_rng;
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
 use datafusion_expr::Operator;
-use datafusion_physical_expr::expressions::{BinaryExpr, CaseExpr, Column, Literal};
+use datafusion_physical_expr::expressions::{case, col, lit, BinaryExpr};
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+use itertools::Itertools;
+use rand::distr::uniform::SampleUniform;
+use rand::distr::Alphanumeric;
+use rand::rngs::StdRng;
+use rand::{Rng, RngCore};
+use std::fmt::{Display, Formatter};
+use std::ops::Range;
 use std::sync::Arc;
 
-fn make_col(name: &str, index: usize) -> Arc<dyn PhysicalExpr> {
-    Arc::new(Column::new(name, index))
+fn make_x_cmp_y(
+    x: &Arc<dyn PhysicalExpr>,
+    op: Operator,
+    y: i32,
+) -> Arc<dyn PhysicalExpr> {
+    Arc::new(BinaryExpr::new(Arc::clone(x), op, lit(y)))
 }
 
-fn make_lit_i32(n: i32) -> Arc<dyn PhysicalExpr> {
-    Arc::new(Literal::new(ScalarValue::Int32(Some(n))))
-}
+/// Create a record batch with the given number of rows and columns.
+/// Columns are named `c<i>` where `i` is the column index.
+///
+/// The minimum value for `column_count` is `3`.
+/// `c1` contains incrementing int32 values
+/// `c2` contains int32 values in blocks of 1000 that increment by 1000
+/// `c3` contains int32 values with one null inserted every 9 rows
+/// `c4` to `cn`, is present, contain unspecified int32 values
+fn make_batch(row_count: usize, column_count: usize) -> RecordBatch {
+    assert!(column_count >= 3);
+
+    let mut c2 = Int32Builder::new();
+    let mut c3 = Int32Builder::new();
+    for i in 0..row_count {
+        c2.append_value(i as i32 / 1000 * 1000);
 
-fn criterion_benchmark(c: &mut Criterion) {
-    // create input data
-    let mut c1 = Int32Builder::new();
-    let mut c2 = StringBuilder::new();
-    let mut c3 = StringBuilder::new();
-    for i in 0..1000 {
-        c1.append_value(i);
-        if i % 7 == 0 {
-            c2.append_null();
-        } else {
-            c2.append_value(format!("string {i}"));
-        }
         if i % 9 == 0 {
             c3.append_null();
         } else {
-            c3.append_value(format!("other string {i}"));
+            c3.append_value(i as i32);
         }
     }
-    let c1 = Arc::new(c1.finish());
+    let c1 = Arc::new(Int32Array::from_iter_values(0..row_count as i32));
     let c2 = Arc::new(c2.finish());
     let c3 = Arc::new(c3.finish());
-    let schema = Schema::new(vec![
-        Field::new("c1", DataType::Int32, true),
-        Field::new("c2", DataType::Utf8, true),
-        Field::new("c3", DataType::Utf8, true),
-    ]);
-    let batch = RecordBatch::try_new(Arc::new(schema), vec![c1, c2, c3]).unwrap();
-
-    // use same predicate for all benchmarks
-    let predicate = Arc::new(BinaryExpr::new(
-        make_col("c1", 0),
-        Operator::LtEq,
-        make_lit_i32(500),
-    ));
-
-    // CASE WHEN c1 <= 500 THEN 1 ELSE 0 END
-    c.bench_function("case_when: scalar or scalar", |b| {
+    let mut columns: Vec<ArrayRef> = vec![c1, c2, c3];
+    for _ in 3..column_count {
+        columns.push(Arc::new(Int32Array::from_iter_values(0..row_count as i32)));
+    }
+
+    let fields = columns
+        .iter()
+        .enumerate()
+        .map(|(i, c)| {
+            Field::new(
+                format!("c{}", i + 1),
+                c.data_type().clone(),
+                c.is_nullable(),
+            )
+        })
+        .collect::<Vec<_>>();
+
+    let schema = Arc::new(Schema::new(fields));
+    RecordBatch::try_new(Arc::clone(&schema), columns).unwrap()
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    run_benchmarks(c, &make_batch(8192, 3));
+    run_benchmarks(c, &make_batch(8192, 50));
+    run_benchmarks(c, &make_batch(8192, 100));
+
+    benchmark_lookup_table_case_when(c, 8192);
+}
+
+fn run_benchmarks(c: &mut Criterion, batch: &RecordBatch) {
+    let c1 = col("c1", &batch.schema()).unwrap();
+    let c2 = col("c2", &batch.schema()).unwrap();
+    let c3 = col("c3", &batch.schema()).unwrap();
+
+    // No expression, when/then/else, literal values
+    c.bench_function(
+        format!(
+            "case_when {}x{}: CASE WHEN c1 <= 500 THEN 1 ELSE 0 END",
+            batch.num_rows(),
+            batch.num_columns()
+        )
+        .as_str(),
+        |b| {
+            let expr = Arc::new(
+                case(
+                    None,
+                    vec![(make_x_cmp_y(&c1, Operator::LtEq, 500), lit(1))],
+                    Some(lit(0)),
+                )
+                .unwrap(),
+            );
+            b.iter(|| black_box(expr.evaluate(black_box(batch)).unwrap()))
+        },
+    );
+
+    // No expression, when/then/else, column reference values
+    c.bench_function(
+        format!(
+            "case_when {}x{}: CASE WHEN c1 <= 500 THEN c2 ELSE c3 END",
+            batch.num_rows(),
+            batch.num_columns()
+        )
+        .as_str(),
+        |b| {
+            let expr = Arc::new(
+                case(
+                    None,
+                    vec![(make_x_cmp_y(&c1, Operator::LtEq, 500), Arc::clone(&c2))],
+                    Some(Arc::clone(&c3)),
+                )
+                .unwrap(),
+            );
+            b.iter(|| black_box(expr.evaluate(black_box(batch)).unwrap()))
+        },
+    );
+
+    // No expression, when/then, implicit else
+    c.bench_function(
+        format!(
+            "case_when {}x{}: CASE WHEN c1 <= 500 THEN c2 [ELSE NULL] END",
+            batch.num_rows(),
+            batch.num_columns()
+        )
+        .as_str(),
+        |b| {
+            let expr = Arc::new(
+                case(
+                    None,
+                    vec![(make_x_cmp_y(&c1, Operator::LtEq, 500), Arc::clone(&c2))],
+                    None,
+                )
+                .unwrap(),
+            );
+            b.iter(|| black_box(expr.evaluate(black_box(batch)).unwrap()))
+        },
+    );
+
+    // With expression, two when/then branches
+    c.bench_function(
+        format!(
+            "case_when {}x{}: CASE c1 WHEN 1 THEN c2 WHEN 2 THEN c3 END",
+            batch.num_rows(),
+            batch.num_columns()
+        )
+        .as_str(),
+        |b| {
+            let expr = Arc::new(
+                case(
+                    Some(Arc::clone(&c1)),
+                    vec![(lit(1), Arc::clone(&c2)), (lit(2), Arc::clone(&c3))],
+                    None,
+                )
+                .unwrap(),
+            );
+            b.iter(|| black_box(expr.evaluate(black_box(batch)).unwrap()))
+        },
+    );
+
+    // Many when/then branches where all are effectively reachable
+    c.bench_function(format!("case_when {}x{}: CASE WHEN c1 == 0 THEN 0 WHEN c1 == 1 THEN 1 ... WHEN c1 == n THEN n ELSE n + 1 END", batch.num_rows(), batch.num_columns()).as_str(), |b| {
+        let when_thens = (0..batch.num_rows() as i32).map(|i| (make_x_cmp_y(&c1, Operator::Eq, i), lit(i))).collect();
         let expr = Arc::new(
-            CaseExpr::try_new(
+            case(
                 None,
-                vec![(predicate.clone(), make_lit_i32(1))],
-                Some(make_lit_i32(0)),
+                when_thens,
+                Some(lit(batch.num_rows() as i32))
             )
-            .unwrap(),
+                .unwrap(),
         );
-        b.iter(|| black_box(expr.evaluate(black_box(&batch)).unwrap()))
+        b.iter(|| black_box(expr.evaluate(black_box(batch)).unwrap()))
     });
 
-    // CASE WHEN c1 <= 500 THEN c2 [ELSE NULL] END
-    c.bench_function("case_when: column or null", |b| {
+    // Many when/then branches where all but the first few are effectively unreachable
+    c.bench_function(format!("case_when {}x{}: CASE WHEN c1 < 0 THEN 0 WHEN c1 < 1000 THEN 1 ... WHEN c1 < n * 1000 THEN n ELSE n + 1 END", batch.num_rows(), batch.num_columns()).as_str(), |b| {
+        let when_thens = (0..batch.num_rows() as i32).map(|i| (make_x_cmp_y(&c1, Operator::Lt, i * 1000), lit(i))).collect();
         let expr = Arc::new(
-            CaseExpr::try_new(None, vec![(predicate.clone(), make_col("c2", 1))], None)
+            case(
+                None,
+                when_thens,
+                Some(lit(batch.num_rows() as i32))
+            )
                 .unwrap(),
         );
-        b.iter(|| black_box(expr.evaluate(black_box(&batch)).unwrap()))
+        b.iter(|| black_box(expr.evaluate(black_box(batch)).unwrap()))
     });
 
-    // CASE WHEN c1 <= 500 THEN c2 ELSE c3 END
-    c.bench_function("case_when: expr or expr", |b| {
+    // Many when/then branches where all are effectively reachable
+    c.bench_function(format!("case_when {}x{}: CASE c1 WHEN 0 THEN 0 WHEN 1 THEN 1 ... WHEN n THEN n ELSE n + 1 END", batch.num_rows(), batch.num_columns()).as_str(), |b| {
+        let when_thens = (0..batch.num_rows() as i32).map(|i| (lit(i), lit(i))).collect();
         let expr = Arc::new(
-            CaseExpr::try_new(
-                None,
-                vec![(predicate.clone(), make_col("c2", 1))],
-                Some(make_col("c3", 2)),
+            case(
+                Some(Arc::clone(&c1)),
+                when_thens,
+                Some(lit(batch.num_rows() as i32))
             )
-            .unwrap(),
+                .unwrap(),
         );
-        b.iter(|| black_box(expr.evaluate(black_box(&batch)).unwrap()))
+        b.iter(|| black_box(expr.evaluate(black_box(batch)).unwrap()))
     });
 
-    // CASE c1 WHEN 1 THEN c2 WHEN 2 THEN c3 END
-    c.bench_function("case_when: CASE expr", |b| {
+    // Many when/then branches where all but the first few are effectively unreachable
+    c.bench_function(format!("case_when {}x{}: CASE c2 WHEN 0 THEN 0 WHEN 1000 THEN 1 ... WHEN n * 1000 THEN n ELSE n + 1 END", batch.num_rows(), batch.num_columns()).as_str(), |b| {
+        let when_thens = (0..batch.num_rows() as i32).map(|i| (lit(i * 1000), lit(i))).collect();
         let expr = Arc::new(
-            CaseExpr::try_new(
-                Some(make_col("c1", 0)),
-                vec![
-                    (make_lit_i32(1), make_col("c2", 1)),
-                    (make_lit_i32(2), make_col("c3", 2)),
-                ],
-                None,
+            case(
+                Some(Arc::clone(&c2)),
+                when_thens,
+                Some(lit(batch.num_rows() as i32))
             )
-            .unwrap(),
+                .unwrap(),
         );
-        b.iter(|| black_box(expr.evaluate(black_box(&batch)).unwrap()))
+        b.iter(|| black_box(expr.evaluate(black_box(batch)).unwrap()))
     });
 }
 
+struct Options<T> {
+    number_of_rows: usize,
+    range_of_values: Vec<T>,
+    in_range_probability: f32,
+    null_probability: f32,
+}
+
+fn generate_other_primitive_value<T: ArrowNativeTypeOp + SampleUniform>(
+    rng: &mut impl RngCore,
+    exclude: &[T],
+) -> T {
+    let mut value;
+    let retry_limit = 100;
+    for _ in 0..retry_limit {
+        value = rng.random_range(T::MIN_TOTAL_ORDER..=T::MAX_TOTAL_ORDER);
+        if !exclude.contains(&value) {
+            return value;
+        }
+    }
+
+    panic!("Could not generate out of range value after {retry_limit} attempts");
+}
+
+fn create_random_string_generator(
+    length: Range<usize>,
+) -> impl Fn(&mut dyn RngCore, &[String]) -> String {
+    assert!(length.end > length.start);
+
+    move |rng, exclude| {
+        let retry_limit = 100;
+        for _ in 0..retry_limit {
+            let length = rng.random_range(length.clone());
+            let value: String = rng
+                .sample_iter(Alphanumeric)
+                .take(length)
+                .map(char::from)
+                .collect();
+
+            if !exclude.contains(&value) {
+                return value;
+            }
+        }
+
+        panic!("Could not generate out of range value after {retry_limit} attempts");
+    }
+}
+
+/// Create column with the provided number of rows
+/// `in_range_percentage` is the percentage of values that should be inside the specified range
+/// `null_percentage` is the percentage of null values
+/// The rest of the values will be outside the specified range
+fn generate_values_for_lookup<T, A>(
+    options: Options<T>,
+    generate_other_value: impl Fn(&mut StdRng, &[T]) -> T,
+) -> A
+where
+    T: Clone,
+    A: FromIterator<Option<T>>,
+{
+    // Create a value with specified range most of the time, but also some nulls and the rest is generic
+
+    assert!(
+        options.in_range_probability + options.null_probability <= 1.0,
+        "Percentages must sum to 1.0 or less"
+    );
+
+    let rng = &mut seedable_rng();
+
+    let in_range_probability = 0.0..options.in_range_probability;
+    let null_range_probability =
+        in_range_probability.start..in_range_probability.start + options.null_probability;
+    let out_range_probability = null_range_probability.end..1.0;
+
+    (0..options.number_of_rows)
+        .map(|_| {
+            let roll: f32 = rng.random();
+
+            match roll {
+                v if out_range_probability.contains(&v) => {
+                    let index = rng.random_range(0..options.range_of_values.len());
+                    // Generate value in range
+                    Some(options.range_of_values[index].clone())
+                }
+                v if null_range_probability.contains(&v) => None,
+                _ => {
+                    // Generate value out of range
+                    Some(generate_other_value(rng, &options.range_of_values))
+                }
+            }
+        })
+        .collect::<A>()
+}
+
+fn benchmark_lookup_table_case_when(c: &mut Criterion, batch_size: usize) {
+    #[derive(Clone, Copy, Debug)]
+    struct CaseWhenLookupInput {
+        batch_size: usize,
+
+        in_range_probability: f32,
+        null_probability: f32,
+    }
+
+    impl Display for CaseWhenLookupInput {
+        fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+            write!(
+                f,
+                "case_when {} rows: in_range: {}, nulls: {}",
+                self.batch_size, self.in_range_probability, self.null_probability,
+            )
+        }
+    }
+
+    let mut case_when_lookup = c.benchmark_group("lookup_table_case_when");
+
+    for in_range_probability in [0.1, 0.5, 0.9, 1.0] {
+        for null_probability in [0.0, 0.1, 0.5] {
+            if in_range_probability + null_probability > 1.0 {
+                continue;
+            }
+
+            let input = CaseWhenLookupInput {
+                batch_size,
+                in_range_probability,
+                null_probability,
+            };
+
+            let when_thens_primitive_to_string = vec![
+                (1, "something"),
+                (2, "very"),
+                (3, "interesting"),
+                (4, "is"),
+                (5, "going"),
+                (6, "to"),
+                (7, "happen"),
+                (30, "in"),
+                (31, "datafusion"),
+                (90, "when"),
+                (91, "you"),
+                (92, "find"),
+                (93, "it"),
+                (120, "let"),
+                (240, "me"),
+                (241, "know"),
+                (244, "please"),
+                (246, "thank"),
+                (250, "you"),
+                (252, "!"),
+            ];
+            let when_thens_string_to_primitive = when_thens_primitive_to_string
+                .iter()
+                .map(|&(key, value)| (value, key))
+                .collect_vec();
+
+            for num_entries in [5, 10, 20] {
+                for (name, values_range) in [
+                    ("all equally true", 0..num_entries),
+                    // Test when early termination is beneficial
+                    ("only first 2 are true", 0..2),
+                ] {
+                    let when_thens_primitive_to_string =
+                        when_thens_primitive_to_string[values_range.clone()].to_vec();
+
+                    let when_thens_string_to_primitive =
+                        when_thens_string_to_primitive[values_range].to_vec();
+
+                    case_when_lookup.bench_with_input(
+                        BenchmarkId::new(
+                            format!(
+                                "case when i32 -> utf8, {num_entries} entries, {name}"
+                            ),
+                            input,
+                        ),
+                        &input,
+                        |b, input| {
+                            let array: Int32Array = generate_values_for_lookup(
+                                Options::<i32> {
+                                    number_of_rows: batch_size,
+                                    range_of_values: when_thens_primitive_to_string
+                                        .iter()
+                                        .map(|(key, _)| *key)
+                                        .collect(),
+                                    in_range_probability: input.in_range_probability,
+                                    null_probability: input.null_probability,
+                                },
+                                |rng, exclude| {
+                                    generate_other_primitive_value::<i32>(rng, exclude)
+                                },
+                            );
+                            let batch = RecordBatch::try_new(
+                                Arc::new(Schema::new(vec![Field::new(
+                                    "col1",
+                                    array.data_type().clone(),
+                                    true,
+                                )])),
+                                vec![Arc::new(array)],
+                            )
+                            .unwrap();
+
+                            let when_thens = when_thens_primitive_to_string
+                                .iter()
+                                .map(|&(key, value)| (lit(key), lit(value)))
+                                .collect();
+
+                            let expr = Arc::new(
+                                case(
+                                    Some(col("col1", batch.schema_ref()).unwrap()),
+                                    when_thens,
+                                    Some(lit("whatever")),
+                                )
+                                .unwrap(),
+                            );
+
+                            b.iter(|| {
+                                black_box(expr.evaluate(black_box(&batch)).unwrap())
+                            })
+                        },
+                    );
+
+                    case_when_lookup.bench_with_input(
+                        BenchmarkId::new(
+                            format!(
+                                "case when utf8 -> i32, {num_entries} entries, {name}"
+                            ),
+                            input,
+                        ),
+                        &input,
+                        |b, input| {
+                            let array: StringArray = generate_values_for_lookup(
+                                Options::<String> {
+                                    number_of_rows: batch_size,
+                                    range_of_values: when_thens_string_to_primitive
+                                        .iter()
+                                        .map(|(key, _)| (*key).to_string())
+                                        .collect(),
+                                    in_range_probability: input.in_range_probability,
+                                    null_probability: input.null_probability,
+                                },
+                                |rng, exclude| {
+                                    create_random_string_generator(3..10)(rng, exclude)
+                                },
+                            );
+                            let batch = RecordBatch::try_new(
+                                Arc::new(Schema::new(vec![Field::new(
+                                    "col1",
+                                    array.data_type().clone(),
+                                    true,
+                                )])),
+                                vec![Arc::new(array)],
+                            )
+                            .unwrap();
+
+                            let when_thens = when_thens_string_to_primitive
+                                .iter()
+                                .map(|&(key, value)| (lit(key), lit(value)))
+                                .collect();
+
+                            let expr = Arc::new(
+                                case(
+                                    Some(col("col1", batch.schema_ref()).unwrap()),
+                                    when_thens,
+                                    Some(lit(1000)),
+                                )
+                                .unwrap(),
+                            );
+
+                            b.iter(|| {
+                                black_box(expr.evaluate(black_box(&batch)).unwrap())
+                            })
+                        },
+                    );
+                }
+            }
+        }
+    }
+}
+
 criterion_group!(benches, criterion_benchmark);
 criterion_main!(benches);
diff --git a/datafusion/physical-expr/src/equivalence/class.rs b/datafusion/physical-expr/src/equivalence/class.rs
index 66ce77ef415e..5b64884f65bb 100644
--- a/datafusion/physical-expr/src/equivalence/class.rs
+++ b/datafusion/physical-expr/src/equivalence/class.rs
@@ -20,10 +20,10 @@ use std::ops::Deref;
 use std::sync::Arc;
 use std::vec::IntoIter;
 
-use super::projection::ProjectionTargets;
 use super::ProjectionMapping;
 use crate::expressions::Literal;
 use crate::physical_expr::add_offset_to_expr;
+use crate::projection::ProjectionTargets;
 use crate::{PhysicalExpr, PhysicalExprRef, PhysicalSortExpr, PhysicalSortRequirement};
 
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
diff --git a/datafusion/physical-expr/src/equivalence/mod.rs b/datafusion/physical-expr/src/equivalence/mod.rs
index bcc6835e2f6c..a7289103806b 100644
--- a/datafusion/physical-expr/src/equivalence/mod.rs
+++ b/datafusion/physical-expr/src/equivalence/mod.rs
@@ -25,12 +25,13 @@ use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
 
 mod class;
 mod ordering;
-mod projection;
 mod properties;
 
 pub use class::{AcrossPartitions, ConstExpr, EquivalenceClass, EquivalenceGroup};
 pub use ordering::OrderingEquivalenceClass;
-pub use projection::{project_ordering, project_orderings, ProjectionMapping};
+// Re-export for backwards compatibility, we recommend importing from
+// datafusion_physical_expr::projection instead
+pub use crate::projection::{project_ordering, project_orderings, ProjectionMapping};
 pub use properties::{
     calculate_union, join_equivalence_properties, EquivalenceProperties,
 };
@@ -61,7 +62,7 @@ mod tests {
 
     use arrow::compute::SortOptions;
     use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-    use datafusion_common::{plan_err, Result};
+    use datafusion_common::Result;
     use datafusion_physical_expr_common::sort_expr::PhysicalSortRequirement;
 
     /// Converts a string to a physical sort expression
@@ -95,31 +96,6 @@ mod tests {
         sort_expr
     }
 
-    pub fn output_schema(
-        mapping: &ProjectionMapping,
-        input_schema: &Arc<Schema>,
-    ) -> Result<SchemaRef> {
-        // Calculate output schema:
-        let mut fields = vec![];
-        for (source, targets) in mapping.iter() {
-            let data_type = source.data_type(input_schema)?;
-            let nullable = source.nullable(input_schema)?;
-            for (target, _) in targets.iter() {
-                let Some(column) = target.as_any().downcast_ref::<Column>() else {
-                    return plan_err!("Expects to have column");
-                };
-                fields.push(Field::new(column.name(), data_type.clone(), nullable));
-            }
-        }
-
-        let output_schema = Arc::new(Schema::new_with_metadata(
-            fields,
-            input_schema.metadata().clone(),
-        ));
-
-        Ok(output_schema)
-    }
-
     // Generate a schema which consists of 8 columns (a, b, c, d, e, f, g, h)
     pub fn create_test_schema() -> Result<SchemaRef> {
         let a = Field::new("a", DataType::Int32, true);
diff --git a/datafusion/physical-expr/src/equivalence/properties/dependency.rs b/datafusion/physical-expr/src/equivalence/properties/dependency.rs
index 26d5d32c6512..8945d18be430 100644
--- a/datafusion/physical-expr/src/equivalence/properties/dependency.rs
+++ b/datafusion/physical-expr/src/equivalence/properties/dependency.rs
@@ -387,11 +387,11 @@ mod tests {
 
     use super::*;
     use crate::equivalence::tests::{
-        convert_to_sort_reqs, create_test_params, create_test_schema, output_schema,
-        parse_sort_expr,
+        convert_to_sort_reqs, create_test_params, create_test_schema, parse_sort_expr,
     };
     use crate::equivalence::{convert_to_sort_exprs, ProjectionMapping};
     use crate::expressions::{col, BinaryExpr, CastExpr, Column};
+    use crate::projection::tests::output_schema;
     use crate::{ConstExpr, EquivalenceProperties, ScalarFunctionExpr};
 
     use arrow::compute::SortOptions;
diff --git a/datafusion/physical-expr/src/expressions/binary/kernels.rs b/datafusion/physical-expr/src/expressions/binary/kernels.rs
index 71d1242eea85..36ecd1c81619 100644
--- a/datafusion/physical-expr/src/expressions/binary/kernels.rs
+++ b/datafusion/physical-expr/src/expressions/binary/kernels.rs
@@ -145,12 +145,14 @@ pub fn concat_elements_utf8view(
     left: &StringViewArray,
     right: &StringViewArray,
 ) -> std::result::Result<StringViewArray, ArrowError> {
-    let capacity = left
-        .data_buffers()
-        .iter()
-        .zip(right.data_buffers().iter())
-        .map(|(b1, b2)| b1.len() + b2.len())
-        .sum();
+    if left.len() != right.len() {
+        return Err(ArrowError::ComputeError(format!(
+            "Arrays must have the same length: {} != {}",
+            left.len(),
+            right.len()
+        )));
+    }
+    let capacity = left.len();
     let mut result = StringViewBuilder::with_capacity(capacity);
 
     // Avoid reallocations by writing to a reused buffer (note we
diff --git a/datafusion/physical-expr/src/expressions/case.rs b/datafusion/physical-expr/src/expressions/case.rs
index d14146a20d8b..2db599047bcd 100644
--- a/datafusion/physical-expr/src/expressions/case.rs
+++ b/datafusion/physical-expr/src/expressions/case.rs
@@ -205,10 +205,15 @@ impl CaseExpr {
         let mut current_value = new_null_array(&return_type, batch.num_rows());
         // We only consider non-null values while comparing with whens
         let mut remainder = not(&base_nulls)?;
+        let mut non_null_remainder_count = remainder.true_count();
         for i in 0..self.when_then_expr.len() {
-            let when_value = self.when_then_expr[i]
-                .0
-                .evaluate_selection(batch, &remainder)?;
+            // If there are no rows left to process, break out of the loop early
+            if non_null_remainder_count == 0 {
+                break;
+            }
+
+            let when_predicate = &self.when_then_expr[i].0;
+            let when_value = when_predicate.evaluate_selection(batch, &remainder)?;
             let when_value = when_value.into_array(batch.num_rows())?;
             // build boolean array representing which rows match the "when" value
             let when_match = compare_with_eq(
@@ -224,41 +229,46 @@ impl CaseExpr {
                 _ => Cow::Owned(prep_null_mask_filter(&when_match)),
             };
             // Make sure we only consider rows that have not been matched yet
-            let when_match = and(&when_match, &remainder)?;
+            let when_value = and(&when_match, &remainder)?;
 
-            // When no rows available for when clause, skip then clause
-            if when_match.true_count() == 0 {
+            // If the predicate did not match any rows, continue to the next branch immediately
+            let when_match_count = when_value.true_count();
+            if when_match_count == 0 {
                 continue;
             }
 
-            let then_value = self.when_then_expr[i]
-                .1
-                .evaluate_selection(batch, &when_match)?;
+            let then_expression = &self.when_then_expr[i].1;
+            let then_value = then_expression.evaluate_selection(batch, &when_value)?;
 
             current_value = match then_value {
                 ColumnarValue::Scalar(ScalarValue::Null) => {
-                    nullif(current_value.as_ref(), &when_match)?
+                    nullif(current_value.as_ref(), &when_value)?
                 }
                 ColumnarValue::Scalar(then_value) => {
-                    zip(&when_match, &then_value.to_scalar()?, &current_value)?
+                    zip(&when_value, &then_value.to_scalar()?, &current_value)?
                 }
                 ColumnarValue::Array(then_value) => {
-                    zip(&when_match, &then_value, &current_value)?
+                    zip(&when_value, &then_value, &current_value)?
                 }
             };
 
-            remainder = and_not(&remainder, &when_match)?;
+            remainder = and_not(&remainder, &when_value)?;
+            non_null_remainder_count -= when_match_count;
         }
 
         if let Some(e) = self.else_expr() {
-            // keep `else_expr`'s data type and return type consistent
-            let expr = try_cast(Arc::clone(e), &batch.schema(), return_type.clone())?;
             // null and unmatched tuples should be assigned else value
             remainder = or(&base_nulls, &remainder)?;
-            let else_ = expr
-                .evaluate_selection(batch, &remainder)?
-                .into_array(batch.num_rows())?;
-            current_value = zip(&remainder, &else_, &current_value)?;
+
+            if remainder.true_count() > 0 {
+                // keep `else_expr`'s data type and return type consistent
+                let expr = try_cast(Arc::clone(e), &batch.schema(), return_type.clone())?;
+
+                let else_ = expr
+                    .evaluate_selection(batch, &remainder)?
+                    .into_array(batch.num_rows())?;
+                current_value = zip(&remainder, &else_, &current_value)?;
+            }
         }
 
         Ok(ColumnarValue::Array(current_value))
@@ -277,10 +287,15 @@ impl CaseExpr {
         // start with nulls as default output
         let mut current_value = new_null_array(&return_type, batch.num_rows());
         let mut remainder = BooleanArray::from(vec![true; batch.num_rows()]);
+        let mut remainder_count = batch.num_rows();
         for i in 0..self.when_then_expr.len() {
-            let when_value = self.when_then_expr[i]
-                .0
-                .evaluate_selection(batch, &remainder)?;
+            // If there are no rows left to process, break out of the loop early
+            if remainder_count == 0 {
+                break;
+            }
+
+            let when_predicate = &self.when_then_expr[i].0;
+            let when_value = when_predicate.evaluate_selection(batch, &remainder)?;
             let when_value = when_value.into_array(batch.num_rows())?;
             let when_value = as_boolean_array(&when_value).map_err(|_| {
                 internal_datafusion_err!("WHEN expression did not return a BooleanArray")
@@ -293,14 +308,14 @@ impl CaseExpr {
             // Make sure we only consider rows that have not been matched yet
             let when_value = and(&when_value, &remainder)?;
 
-            // When no rows available for when clause, skip then clause
-            if when_value.true_count() == 0 {
+            // If the predicate did not match any rows, continue to the next branch immediately
+            let when_match_count = when_value.true_count();
+            if when_match_count == 0 {
                 continue;
             }
 
-            let then_value = self.when_then_expr[i]
-                .1
-                .evaluate_selection(batch, &when_value)?;
+            let then_expression = &self.when_then_expr[i].1;
+            let then_value = then_expression.evaluate_selection(batch, &when_value)?;
 
             current_value = match then_value {
                 ColumnarValue::Scalar(ScalarValue::Null) => {
@@ -317,10 +332,11 @@ impl CaseExpr {
             // Succeed tuples should be filtered out for short-circuit evaluation,
             // null values for the current when expr should be kept
             remainder = and_not(&remainder, &when_value)?;
+            remainder_count -= when_match_count;
         }
 
         if let Some(e) = self.else_expr() {
-            if remainder.true_count() > 0 {
+            if remainder_count > 0 {
                 // keep `else_expr`'s data type and return type consistent
                 let expr = try_cast(Arc::clone(e), &batch.schema(), return_type.clone())?;
                 let else_ = expr
diff --git a/datafusion/physical-expr/src/expressions/cast.rs b/datafusion/physical-expr/src/expressions/cast.rs
index 407e3e6a9d29..0419161b532c 100644
--- a/datafusion/physical-expr/src/expressions/cast.rs
+++ b/datafusion/physical-expr/src/expressions/cast.rs
@@ -439,8 +439,8 @@ mod tests {
         let expression =
             cast_with_options(col("a", &schema)?, &schema, Decimal128(6, 2), None)?;
         let e = expression.evaluate(&batch).unwrap_err().strip_backtrace(); // panics on OK
-        assert_snapshot!(e, @"Arrow error: Invalid argument error: 12345679 is too large to store in a Decimal128 of precision 6. Max is 999999");
-
+        assert_snapshot!(e, @"Arrow error: Invalid argument error: 123456.79 is too large to store in a Decimal128 of precision 6. Max is 9999.99");
+        // safe cast should return null
         let expression_safe = cast_with_options(
             col("a", &schema)?,
             &schema,
diff --git a/datafusion/physical-expr/src/expressions/dynamic_filters.rs b/datafusion/physical-expr/src/expressions/dynamic_filters.rs
index a53b32c97689..964a193db833 100644
--- a/datafusion/physical-expr/src/expressions/dynamic_filters.rs
+++ b/datafusion/physical-expr/src/expressions/dynamic_filters.rs
@@ -381,14 +381,14 @@ mod test {
         )
         .unwrap();
         let snap = dynamic_filter_1.snapshot().unwrap().unwrap();
-        insta::assert_snapshot!(format!("{snap:?}"), @r#"BinaryExpr { left: Column { name: "a", index: 0 }, op: Eq, right: Literal { value: Int32(42), field: Field { name: "lit", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} } }, fail_on_overflow: false }"#);
+        insta::assert_snapshot!(format!("{snap:?}"), @r#"BinaryExpr { left: Column { name: "a", index: 0 }, op: Eq, right: Literal { value: Int32(42), field: Field { name: "lit", data_type: Int32 } }, fail_on_overflow: false }"#);
         let dynamic_filter_2 = reassign_expr_columns(
             Arc::clone(&dynamic_filter) as Arc<dyn PhysicalExpr>,
             &filter_schema_2,
         )
         .unwrap();
         let snap = dynamic_filter_2.snapshot().unwrap().unwrap();
-        insta::assert_snapshot!(format!("{snap:?}"), @r#"BinaryExpr { left: Column { name: "a", index: 1 }, op: Eq, right: Literal { value: Int32(42), field: Field { name: "lit", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} } }, fail_on_overflow: false }"#);
+        insta::assert_snapshot!(format!("{snap:?}"), @r#"BinaryExpr { left: Column { name: "a", index: 1 }, op: Eq, right: Literal { value: Int32(42), field: Field { name: "lit", data_type: Int32 } }, fail_on_overflow: false }"#);
         // Both filters allow evaluating the same expression
         let batch_1 = RecordBatch::try_new(
             Arc::clone(&filter_schema_1),
diff --git a/datafusion/physical-expr/src/expressions/literal.rs b/datafusion/physical-expr/src/expressions/literal.rs
index 6e425ee439d6..94e91d43a1c4 100644
--- a/datafusion/physical-expr/src/expressions/literal.rs
+++ b/datafusion/physical-expr/src/expressions/literal.rs
@@ -28,8 +28,8 @@ use arrow::{
     datatypes::{DataType, Schema},
     record_batch::RecordBatch,
 };
+use datafusion_common::metadata::FieldMetadata;
 use datafusion_common::{Result, ScalarValue};
-use datafusion_expr::expr::FieldMetadata;
 use datafusion_expr::Expr;
 use datafusion_expr_common::columnar_value::ColumnarValue;
 use datafusion_expr_common::interval_arithmetic::Interval;
diff --git a/datafusion/physical-expr/src/lib.rs b/datafusion/physical-expr/src/lib.rs
index 468591d34d71..aa8c9e50fd71 100644
--- a/datafusion/physical-expr/src/lib.rs
+++ b/datafusion/physical-expr/src/lib.rs
@@ -37,6 +37,7 @@ pub mod intervals;
 mod partitioning;
 mod physical_expr;
 pub mod planner;
+pub mod projection;
 mod scalar_function;
 pub mod simplifier;
 pub mod statistics;
diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs
index 73df60c42e96..7790380dffd5 100644
--- a/datafusion/physical-expr/src/planner.rs
+++ b/datafusion/physical-expr/src/planner.rs
@@ -25,13 +25,12 @@ use crate::{
 
 use arrow::datatypes::Schema;
 use datafusion_common::config::ConfigOptions;
+use datafusion_common::metadata::FieldMetadata;
 use datafusion_common::{
     exec_err, not_impl_err, plan_err, DFSchema, Result, ScalarValue, ToDFSchema,
 };
 use datafusion_expr::execution_props::ExecutionProps;
-use datafusion_expr::expr::{
-    Alias, Cast, FieldMetadata, InList, Placeholder, ScalarFunction,
-};
+use datafusion_expr::expr::{Alias, Cast, InList, Placeholder, ScalarFunction};
 use datafusion_expr::var_provider::is_system_variables;
 use datafusion_expr::var_provider::VarType;
 use datafusion_expr::{
diff --git a/datafusion/physical-expr/src/equivalence/projection.rs b/datafusion/physical-expr/src/projection.rs
similarity index 51%
rename from datafusion/physical-expr/src/equivalence/projection.rs
rename to datafusion/physical-expr/src/projection.rs
index a4ed8187cfad..e35bfbb3a20d 100644
--- a/datafusion/physical-expr/src/equivalence/projection.rs
+++ b/datafusion/physical-expr/src/projection.rs
@@ -19,14 +19,426 @@ use std::ops::Deref;
 use std::sync::Arc;
 
 use crate::expressions::Column;
+use crate::utils::collect_columns;
 use crate::PhysicalExpr;
 
-use arrow::datatypes::SchemaRef;
+use arrow::datatypes::{Field, Schema, SchemaRef};
+use datafusion_common::stats::{ColumnStatistics, Precision};
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_common::{internal_err, plan_err, Result};
+use datafusion_common::{internal_datafusion_err, internal_err, plan_err, Result};
 
 use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
 use indexmap::IndexMap;
+use itertools::Itertools;
+
+/// A projection expression as used by projection operations.
+///
+/// The expression is evaluated and the result is stored in a column
+/// with the name specified by `alias`.
+///
+/// For example, the SQL expression `a + b AS sum_ab` would be represented
+/// as a `ProjectionExpr` where `expr` is the expression `a + b`
+/// and `alias` is the string `sum_ab`.
+#[derive(Debug, Clone)]
+pub struct ProjectionExpr {
+    /// The expression that will be evaluated.
+    pub expr: Arc<dyn PhysicalExpr>,
+    /// The name of the output column for use an output schema.
+    pub alias: String,
+}
+
+impl std::fmt::Display for ProjectionExpr {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        if self.expr.to_string() == self.alias {
+            write!(f, "{}", self.alias)
+        } else {
+            write!(f, "{} AS {}", self.expr, self.alias)
+        }
+    }
+}
+
+impl ProjectionExpr {
+    /// Create a new projection expression
+    pub fn new(expr: Arc<dyn PhysicalExpr>, alias: String) -> Self {
+        Self { expr, alias }
+    }
+
+    /// Create a new projection expression from an expression and a schema using the expression's output field name as alias.
+    pub fn new_from_expression(
+        expr: Arc<dyn PhysicalExpr>,
+        schema: &Schema,
+    ) -> Result<Self> {
+        let field = expr.return_field(schema)?;
+        Ok(Self {
+            expr,
+            alias: field.name().to_string(),
+        })
+    }
+}
+
+impl From<(Arc<dyn PhysicalExpr>, String)> for ProjectionExpr {
+    fn from(value: (Arc<dyn PhysicalExpr>, String)) -> Self {
+        Self::new(value.0, value.1)
+    }
+}
+
+impl From<&(Arc<dyn PhysicalExpr>, String)> for ProjectionExpr {
+    fn from(value: &(Arc<dyn PhysicalExpr>, String)) -> Self {
+        Self::new(Arc::clone(&value.0), value.1.clone())
+    }
+}
+
+impl From<ProjectionExpr> for (Arc<dyn PhysicalExpr>, String) {
+    fn from(value: ProjectionExpr) -> Self {
+        (value.expr, value.alias)
+    }
+}
+
+/// A collection of projection expressions.
+///
+/// This struct encapsulates multiple `ProjectionExpr` instances,
+/// representing a complete projection operation and provides
+/// methods to manipulate and analyze the projection as a whole.
+#[derive(Debug, Clone)]
+pub struct Projection {
+    exprs: Vec<ProjectionExpr>,
+}
+
+impl std::fmt::Display for Projection {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let exprs: Vec<String> = self.exprs.iter().map(|e| e.to_string()).collect();
+        write!(f, "Projection[{}]", exprs.join(", "))
+    }
+}
+
+impl From<Vec<ProjectionExpr>> for Projection {
+    fn from(value: Vec<ProjectionExpr>) -> Self {
+        Self { exprs: value }
+    }
+}
+
+impl From<&[ProjectionExpr]> for Projection {
+    fn from(value: &[ProjectionExpr]) -> Self {
+        Self {
+            exprs: value.to_vec(),
+        }
+    }
+}
+
+impl AsRef<[ProjectionExpr]> for Projection {
+    fn as_ref(&self) -> &[ProjectionExpr] {
+        &self.exprs
+    }
+}
+
+impl Projection {
+    pub fn new(exprs: Vec<ProjectionExpr>) -> Self {
+        Self { exprs }
+    }
+
+    /// Returns an iterator over the projection expressions
+    pub fn iter(&self) -> impl Iterator<Item = &ProjectionExpr> {
+        self.exprs.iter()
+    }
+
+    /// Creates a ProjectionMapping from this projection
+    pub fn projection_mapping(
+        &self,
+        input_schema: &SchemaRef,
+    ) -> Result<ProjectionMapping> {
+        ProjectionMapping::try_new(
+            self.exprs
+                .iter()
+                .map(|p| (Arc::clone(&p.expr), p.alias.clone())),
+            input_schema,
+        )
+    }
+
+    /// Iterate over a clone of the projection expressions.
+    pub fn expr_iter(&self) -> impl Iterator<Item = Arc<dyn PhysicalExpr>> + '_ {
+        self.exprs.iter().map(|e| Arc::clone(&e.expr))
+    }
+
+    /// Apply another projection on top of this projection, returning the combined projection.
+    /// For example, if this projection is `SELECT c@2 AS x, b@1 AS y, a@0 as z` and the other projection is `SELECT x@0 + 1 AS c1, y@1 + z@2 as c2`,
+    /// we return a projection equivalent to `SELECT c@2 + 1 AS c1, b@1 + a@0 as c2`.
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// use std::sync::Arc;
+    /// use datafusion_physical_expr::projection::{Projection, ProjectionExpr};
+    /// use datafusion_physical_expr::expressions::{Column, BinaryExpr, Literal};
+    /// use datafusion_common::{Result, ScalarValue};
+    /// use datafusion_expr::Operator;
+    ///
+    /// fn main() -> Result<()> {
+    ///     // Example from the docstring:
+    ///     // Base projection: SELECT c@2 AS x, b@1 AS y, a@0 AS z
+    ///     let base = Projection::new(vec![
+    ///         ProjectionExpr {
+    ///             expr: Arc::new(Column::new("c", 2)),
+    ///             alias: "x".to_string(),
+    ///         },
+    ///         ProjectionExpr {
+    ///             expr: Arc::new(Column::new("b", 1)),
+    ///             alias: "y".to_string(),
+    ///         },
+    ///         ProjectionExpr {
+    ///             expr: Arc::new(Column::new("a", 0)),
+    ///             alias: "z".to_string(),
+    ///         },
+    ///     ]);
+    ///
+    ///     // Top projection: SELECT x@0 + 1 AS c1, y@1 + z@2 AS c2
+    ///     let top = Projection::new(vec![
+    ///         ProjectionExpr {
+    ///             expr: Arc::new(BinaryExpr::new(
+    ///                 Arc::new(Column::new("x", 0)),
+    ///                 Operator::Plus,
+    ///                 Arc::new(Literal::new(ScalarValue::Int32(Some(1)))),
+    ///             )),
+    ///             alias: "c1".to_string(),
+    ///         },
+    ///         ProjectionExpr {
+    ///             expr: Arc::new(BinaryExpr::new(
+    ///                 Arc::new(Column::new("y", 1)),
+    ///                 Operator::Plus,
+    ///                 Arc::new(Column::new("z", 2)),
+    ///             )),
+    ///             alias: "c2".to_string(),
+    ///         },
+    ///     ]);
+    ///
+    ///     // Expected result: SELECT c@2 + 1 AS c1, b@1 + a@0 AS c2
+    ///     let result = base.try_merge(&top)?;
+    ///
+    ///     assert_eq!(result.as_ref().len(), 2);
+    ///     assert_eq!(result.as_ref()[0].alias, "c1");
+    ///     assert_eq!(result.as_ref()[1].alias, "c2");
+    ///
+    ///     Ok(())
+    /// }
+    /// ```
+    ///
+    /// # Errors
+    /// This function returns an error if any expression in the `other` projection cannot be
+    /// applied on top of this projection.
+    pub fn try_merge(&self, other: &Projection) -> Result<Projection> {
+        let mut new_exprs = Vec::with_capacity(other.exprs.len());
+        for proj_expr in &other.exprs {
+            let new_expr = update_expr(&proj_expr.expr, &self.exprs, true)?
+                .ok_or_else(|| {
+                    internal_datafusion_err!(
+                        "Failed to combine projections: expression {} could not be applied on top of existing projections {}",
+                        proj_expr.expr,
+                        self.exprs.iter().map(|e| format!("{e}")).join(", ")
+                    )
+                })?;
+            new_exprs.push(ProjectionExpr {
+                expr: new_expr,
+                alias: proj_expr.alias.clone(),
+            });
+        }
+        Ok(Projection::new(new_exprs))
+    }
+
+    /// Extract the column indices used in this projection.
+    /// For example, for a projection `SELECT a AS x, b + 1 AS y`, where `a` is at index 0 and `b` is at index 1,
+    /// this function would return `[0, 1]`.
+    /// Repeated indices are returned only once, and the order is ascending.
+    pub fn column_indices(&self) -> Vec<usize> {
+        self.exprs
+            .iter()
+            .flat_map(|e| collect_columns(&e.expr).into_iter().map(|col| col.index()))
+            .sorted_unstable()
+            .dedup()
+            .collect_vec()
+    }
+
+    /// Project a schema according to this projection.
+    /// For example, for a projection `SELECT a AS x, b + 1 AS y`, where `a` is at index 0 and `b` is at index 1,
+    /// if the input schema is `[a: Int32, b: Int32, c: Int32]`, the output schema would be `[x: Int32, y: Int32]`.
+    /// Fields' metadata are preserved from the input schema.
+    pub fn project_schema(&self, input_schema: &Schema) -> Result<Schema> {
+        let fields: Result<Vec<Field>> = self
+            .exprs
+            .iter()
+            .map(|proj_expr| {
+                let metadata = proj_expr
+                    .expr
+                    .return_field(input_schema)?
+                    .metadata()
+                    .clone();
+
+                let field = Field::new(
+                    &proj_expr.alias,
+                    proj_expr.expr.data_type(input_schema)?,
+                    proj_expr.expr.nullable(input_schema)?,
+                )
+                .with_metadata(metadata);
+
+                Ok(field)
+            })
+            .collect();
+
+        Ok(Schema::new_with_metadata(
+            fields?,
+            input_schema.metadata().clone(),
+        ))
+    }
+
+    /// Project statistics according to this projection.
+    /// For example, for a projection `SELECT a AS x, b + 1 AS y`, where `a` is at index 0 and `b` is at index 1,
+    /// if the input statistics has column statistics for columns `a`, `b`, and `c`, the output statistics would have column statistics for columns `x` and `y`.
+    pub fn project_statistics(
+        &self,
+        mut stats: datafusion_common::Statistics,
+        input_schema: &Schema,
+    ) -> Result<datafusion_common::Statistics> {
+        let mut primitive_row_size = 0;
+        let mut primitive_row_size_possible = true;
+        let mut column_statistics = vec![];
+
+        for proj_expr in &self.exprs {
+            let expr = &proj_expr.expr;
+            let col_stats = if let Some(col) = expr.as_any().downcast_ref::<Column>() {
+                stats.column_statistics[col.index()].clone()
+            } else {
+                // TODO stats: estimate more statistics from expressions
+                // (expressions should compute their statistics themselves)
+                ColumnStatistics::new_unknown()
+            };
+            column_statistics.push(col_stats);
+            let data_type = expr.data_type(input_schema)?;
+            if let Some(value) = data_type.primitive_width() {
+                primitive_row_size += value;
+                continue;
+            }
+            primitive_row_size_possible = false;
+        }
+
+        if primitive_row_size_possible {
+            stats.total_byte_size =
+                Precision::Exact(primitive_row_size).multiply(&stats.num_rows);
+        }
+        stats.column_statistics = column_statistics;
+        Ok(stats)
+    }
+}
+
+impl<'a> IntoIterator for &'a Projection {
+    type Item = &'a ProjectionExpr;
+    type IntoIter = std::slice::Iter<'a, ProjectionExpr>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.exprs.iter()
+    }
+}
+
+impl IntoIterator for Projection {
+    type Item = ProjectionExpr;
+    type IntoIter = std::vec::IntoIter<ProjectionExpr>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.exprs.into_iter()
+    }
+}
+
+/// The function operates in two modes:
+///
+/// 1) When `sync_with_child` is `true`:
+///
+///    The function updates the indices of `expr` if the expression resides
+///    in the input plan. For instance, given the expressions `a@1 + b@2`
+///    and `c@0` with the input schema `c@2, a@0, b@1`, the expressions are
+///    updated to `a@0 + b@1` and `c@2`.
+///
+/// 2) When `sync_with_child` is `false`:
+///
+///    The function determines how the expression would be updated if a projection
+///    was placed before the plan associated with the expression. If the expression
+///    cannot be rewritten after the projection, it returns `None`. For example,
+///    given the expressions `c@0`, `a@1` and `b@2`, and the projection with
+///    an output schema of `a, c_new`, then `c@0` becomes `c_new@1`, `a@1` becomes
+///    `a@0`, but `b@2` results in `None` since the projection does not include `b`.
+///
+/// # Errors
+/// This function returns an error if `sync_with_child` is `true` and if any expression references
+/// an index that is out of bounds for `projected_exprs`.
+/// For example:
+///
+/// - `expr` is `a@3`
+/// - `projected_exprs` is \[`a@0`, `b@1`\]
+///
+/// In this case, `a@3` references index 3, which is out of bounds for `projected_exprs` (which has length 2).
+pub fn update_expr(
+    expr: &Arc<dyn PhysicalExpr>,
+    projected_exprs: &[ProjectionExpr],
+    sync_with_child: bool,
+) -> Result<Option<Arc<dyn PhysicalExpr>>> {
+    #[derive(Debug, PartialEq)]
+    enum RewriteState {
+        /// The expression is unchanged.
+        Unchanged,
+        /// Some part of the expression has been rewritten
+        RewrittenValid,
+        /// Some part of the expression has been rewritten, but some column
+        /// references could not be.
+        RewrittenInvalid,
+    }
+
+    let mut state = RewriteState::Unchanged;
+
+    let new_expr = Arc::clone(expr)
+        .transform_up(|expr| {
+            if state == RewriteState::RewrittenInvalid {
+                return Ok(Transformed::no(expr));
+            }
+
+            let Some(column) = expr.as_any().downcast_ref::<Column>() else {
+                return Ok(Transformed::no(expr));
+            };
+            if sync_with_child {
+                state = RewriteState::RewrittenValid;
+                // Update the index of `column`:
+                let projected_expr = projected_exprs.get(column.index()).ok_or_else(|| {
+                    internal_datafusion_err!(
+                        "Column index {} out of bounds for projected expressions of length {}",
+                        column.index(),
+                        projected_exprs.len()
+                    )
+                })?;
+                Ok(Transformed::yes(Arc::clone(&projected_expr.expr)))
+            } else {
+                // default to invalid, in case we can't find the relevant column
+                state = RewriteState::RewrittenInvalid;
+                // Determine how to update `column` to accommodate `projected_exprs`
+                projected_exprs
+                    .iter()
+                    .enumerate()
+                    .find_map(|(index, proj_expr)| {
+                        proj_expr.expr.as_any().downcast_ref::<Column>().and_then(
+                            |projected_column| {
+                                (column.name().eq(projected_column.name())
+                                    && column.index() == projected_column.index())
+                                .then(|| {
+                                    state = RewriteState::RewrittenValid;
+                                    Arc::new(Column::new(&proj_expr.alias, index)) as _
+                                })
+                            },
+                        )
+                    })
+                    .map_or_else(
+                        || Ok(Transformed::no(expr)),
+                        |c| Ok(Transformed::yes(c)),
+                    )
+            }
+        })
+        .data()?;
+
+    Ok((state == RewriteState::RewrittenValid).then_some(new_expr))
+}
 
 /// Stores target expressions, along with their indices, that associate with a
 /// source expression in a projection mapping.
@@ -249,18 +661,46 @@ pub fn project_ordering(
 }
 
 #[cfg(test)]
-mod tests {
+pub(crate) mod tests {
+    use std::collections::HashMap;
+
     use super::*;
-    use crate::equivalence::tests::output_schema;
     use crate::equivalence::{convert_to_orderings, EquivalenceProperties};
-    use crate::expressions::{col, BinaryExpr};
+    use crate::expressions::{col, BinaryExpr, Literal};
     use crate::utils::tests::TestScalarUDF;
     use crate::{PhysicalExprRef, ScalarFunctionExpr};
 
     use arrow::compute::SortOptions;
     use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
     use datafusion_common::config::ConfigOptions;
+    use datafusion_common::{ScalarValue, Statistics};
     use datafusion_expr::{Operator, ScalarUDF};
+    use insta::assert_snapshot;
+
+    pub(crate) fn output_schema(
+        mapping: &ProjectionMapping,
+        input_schema: &Arc<Schema>,
+    ) -> Result<SchemaRef> {
+        // Calculate output schema:
+        let mut fields = vec![];
+        for (source, targets) in mapping.iter() {
+            let data_type = source.data_type(input_schema)?;
+            let nullable = source.nullable(input_schema)?;
+            for (target, _) in targets.iter() {
+                let Some(column) = target.as_any().downcast_ref::<Column>() else {
+                    return plan_err!("Expects to have column");
+                };
+                fields.push(Field::new(column.name(), data_type.clone(), nullable));
+            }
+        }
+
+        let output_schema = Arc::new(Schema::new_with_metadata(
+            fields,
+            input_schema.metadata().clone(),
+        ));
+
+        Ok(output_schema)
+    }
 
     #[test]
     fn project_orderings() -> Result<()> {
@@ -1087,4 +1527,628 @@ mod tests {
 
         Ok(())
     }
+
+    fn get_stats() -> Statistics {
+        Statistics {
+            num_rows: Precision::Exact(5),
+            total_byte_size: Precision::Exact(23),
+            column_statistics: vec![
+                ColumnStatistics {
+                    distinct_count: Precision::Exact(5),
+                    max_value: Precision::Exact(ScalarValue::Int64(Some(21))),
+                    min_value: Precision::Exact(ScalarValue::Int64(Some(-4))),
+                    sum_value: Precision::Exact(ScalarValue::Int64(Some(42))),
+                    null_count: Precision::Exact(0),
+                },
+                ColumnStatistics {
+                    distinct_count: Precision::Exact(1),
+                    max_value: Precision::Exact(ScalarValue::from("x")),
+                    min_value: Precision::Exact(ScalarValue::from("a")),
+                    sum_value: Precision::Absent,
+                    null_count: Precision::Exact(3),
+                },
+                ColumnStatistics {
+                    distinct_count: Precision::Absent,
+                    max_value: Precision::Exact(ScalarValue::Float32(Some(1.1))),
+                    min_value: Precision::Exact(ScalarValue::Float32(Some(0.1))),
+                    sum_value: Precision::Exact(ScalarValue::Float32(Some(5.5))),
+                    null_count: Precision::Absent,
+                },
+            ],
+        }
+    }
+
+    fn get_schema() -> Schema {
+        let field_0 = Field::new("col0", DataType::Int64, false);
+        let field_1 = Field::new("col1", DataType::Utf8, false);
+        let field_2 = Field::new("col2", DataType::Float32, false);
+        Schema::new(vec![field_0, field_1, field_2])
+    }
+
+    #[test]
+    fn test_stats_projection_columns_only() {
+        let source = get_stats();
+        let schema = get_schema();
+
+        let projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col1", 1)),
+                alias: "col1".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col0", 0)),
+                alias: "col0".to_string(),
+            },
+        ]);
+
+        let result = projection.project_statistics(source, &schema).unwrap();
+
+        let expected = Statistics {
+            num_rows: Precision::Exact(5),
+            total_byte_size: Precision::Exact(23),
+            column_statistics: vec![
+                ColumnStatistics {
+                    distinct_count: Precision::Exact(1),
+                    max_value: Precision::Exact(ScalarValue::from("x")),
+                    min_value: Precision::Exact(ScalarValue::from("a")),
+                    sum_value: Precision::Absent,
+                    null_count: Precision::Exact(3),
+                },
+                ColumnStatistics {
+                    distinct_count: Precision::Exact(5),
+                    max_value: Precision::Exact(ScalarValue::Int64(Some(21))),
+                    min_value: Precision::Exact(ScalarValue::Int64(Some(-4))),
+                    sum_value: Precision::Exact(ScalarValue::Int64(Some(42))),
+                    null_count: Precision::Exact(0),
+                },
+            ],
+        };
+
+        assert_eq!(result, expected);
+    }
+
+    #[test]
+    fn test_stats_projection_column_with_primitive_width_only() {
+        let source = get_stats();
+        let schema = get_schema();
+
+        let projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col2", 2)),
+                alias: "col2".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col0", 0)),
+                alias: "col0".to_string(),
+            },
+        ]);
+
+        let result = projection.project_statistics(source, &schema).unwrap();
+
+        let expected = Statistics {
+            num_rows: Precision::Exact(5),
+            total_byte_size: Precision::Exact(60),
+            column_statistics: vec![
+                ColumnStatistics {
+                    distinct_count: Precision::Absent,
+                    max_value: Precision::Exact(ScalarValue::Float32(Some(1.1))),
+                    min_value: Precision::Exact(ScalarValue::Float32(Some(0.1))),
+                    sum_value: Precision::Exact(ScalarValue::Float32(Some(5.5))),
+                    null_count: Precision::Absent,
+                },
+                ColumnStatistics {
+                    distinct_count: Precision::Exact(5),
+                    max_value: Precision::Exact(ScalarValue::Int64(Some(21))),
+                    min_value: Precision::Exact(ScalarValue::Int64(Some(-4))),
+                    sum_value: Precision::Exact(ScalarValue::Int64(Some(42))),
+                    null_count: Precision::Exact(0),
+                },
+            ],
+        };
+
+        assert_eq!(result, expected);
+    }
+
+    // Tests for Projection struct
+
+    #[test]
+    fn test_projection_new() -> Result<()> {
+        let exprs = vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("a", 0)),
+                alias: "a".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("b", 1)),
+                alias: "b".to_string(),
+            },
+        ];
+        let projection = Projection::new(exprs.clone());
+        assert_eq!(projection.as_ref().len(), 2);
+        Ok(())
+    }
+
+    #[test]
+    fn test_projection_from_vec() -> Result<()> {
+        let exprs = vec![ProjectionExpr {
+            expr: Arc::new(Column::new("x", 0)),
+            alias: "x".to_string(),
+        }];
+        let projection: Projection = exprs.clone().into();
+        assert_eq!(projection.as_ref().len(), 1);
+        Ok(())
+    }
+
+    #[test]
+    fn test_projection_as_ref() -> Result<()> {
+        let exprs = vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col1", 0)),
+                alias: "col1".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col2", 1)),
+                alias: "col2".to_string(),
+            },
+        ];
+        let projection = Projection::new(exprs);
+        let as_ref: &[ProjectionExpr] = projection.as_ref();
+        assert_eq!(as_ref.len(), 2);
+        Ok(())
+    }
+
+    #[test]
+    fn test_column_indices_multiple_columns() -> Result<()> {
+        // Test with reversed column order to ensure proper reordering
+        let projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("c", 5)),
+                alias: "c".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("b", 2)),
+                alias: "b".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("a", 0)),
+                alias: "a".to_string(),
+            },
+        ]);
+        // Should return sorted indices regardless of projection order
+        assert_eq!(projection.column_indices(), vec![0, 2, 5]);
+        Ok(())
+    }
+
+    #[test]
+    fn test_column_indices_duplicates() -> Result<()> {
+        // Test that duplicate column indices appear only once
+        let projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("a", 1)),
+                alias: "a".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("b", 3)),
+                alias: "b".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("a2", 1)), // duplicate index
+                alias: "a2".to_string(),
+            },
+        ]);
+        assert_eq!(projection.column_indices(), vec![1, 3]);
+        Ok(())
+    }
+
+    #[test]
+    fn test_column_indices_unsorted() -> Result<()> {
+        // Test that column indices are sorted in the output
+        let projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("c", 5)),
+                alias: "c".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("a", 1)),
+                alias: "a".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("b", 3)),
+                alias: "b".to_string(),
+            },
+        ]);
+        assert_eq!(projection.column_indices(), vec![1, 3, 5]);
+        Ok(())
+    }
+
+    #[test]
+    fn test_column_indices_complex_expr() -> Result<()> {
+        // Test with complex expressions containing multiple columns
+        let expr = Arc::new(BinaryExpr::new(
+            Arc::new(Column::new("a", 1)),
+            Operator::Plus,
+            Arc::new(Column::new("b", 4)),
+        ));
+        let projection = Projection::new(vec![
+            ProjectionExpr {
+                expr,
+                alias: "sum".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("c", 2)),
+                alias: "c".to_string(),
+            },
+        ]);
+        // Should return [1, 2, 4] - all columns used, sorted and deduplicated
+        assert_eq!(projection.column_indices(), vec![1, 2, 4]);
+        Ok(())
+    }
+
+    #[test]
+    fn test_column_indices_empty() -> Result<()> {
+        let projection = Projection::new(vec![]);
+        assert_eq!(projection.column_indices(), Vec::<usize>::new());
+        Ok(())
+    }
+
+    #[test]
+    fn test_merge_simple_columns() -> Result<()> {
+        // First projection: SELECT c@2 AS x, b@1 AS y, a@0 AS z
+        let base_projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("c", 2)),
+                alias: "x".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("b", 1)),
+                alias: "y".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("a", 0)),
+                alias: "z".to_string(),
+            },
+        ]);
+
+        // Second projection: SELECT y@1 AS col2, x@0 AS col1
+        let top_projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("y", 1)),
+                alias: "col2".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("x", 0)),
+                alias: "col1".to_string(),
+            },
+        ]);
+
+        // Merge should produce: SELECT b@1 AS col2, c@2 AS col1
+        let merged = base_projection.try_merge(&top_projection)?;
+        assert_snapshot!(format!("{merged}"), @"Projection[b@1 AS col2, c@2 AS col1]");
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_merge_with_expressions() -> Result<()> {
+        // First projection: SELECT c@2 AS x, b@1 AS y, a@0 AS z
+        let base_projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("c", 2)),
+                alias: "x".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("b", 1)),
+                alias: "y".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("a", 0)),
+                alias: "z".to_string(),
+            },
+        ]);
+
+        // Second projection: SELECT y@1 + z@2 AS c2, x@0 + 1 AS c1
+        let top_projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(BinaryExpr::new(
+                    Arc::new(Column::new("y", 1)),
+                    Operator::Plus,
+                    Arc::new(Column::new("z", 2)),
+                )),
+                alias: "c2".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(BinaryExpr::new(
+                    Arc::new(Column::new("x", 0)),
+                    Operator::Plus,
+                    Arc::new(Literal::new(ScalarValue::Int32(Some(1)))),
+                )),
+                alias: "c1".to_string(),
+            },
+        ]);
+
+        // Merge should produce: SELECT b@1 + a@0 AS c2, c@2 + 1 AS c1
+        let merged = base_projection.try_merge(&top_projection)?;
+        assert_snapshot!(format!("{merged}"), @"Projection[b@1 + a@0 AS c2, c@2 + 1 AS c1]");
+
+        Ok(())
+    }
+
+    #[test]
+    fn try_merge_error() {
+        // Create a base projection
+        let base = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("a", 0)),
+                alias: "x".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("b", 1)),
+                alias: "y".to_string(),
+            },
+        ]);
+
+        // Create a top projection that references a non-existent column index
+        let top = Projection::new(vec![ProjectionExpr {
+            expr: Arc::new(Column::new("z", 5)), // Invalid index
+            alias: "result".to_string(),
+        }]);
+
+        // Attempt to merge and expect an error
+        let err_msg = base.try_merge(&top).unwrap_err().to_string();
+        assert!(
+            err_msg.contains("Internal error: Column index 5 out of bounds for projected expressions of length 2"),
+            "Unexpected error message: {err_msg}",
+        );
+    }
+
+    #[test]
+    fn test_project_schema_simple_columns() -> Result<()> {
+        // Input schema: [col0: Int64, col1: Utf8, col2: Float32]
+        let input_schema = get_schema();
+
+        // Projection: SELECT col2 AS c, col0 AS a
+        let projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col2", 2)),
+                alias: "c".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col0", 0)),
+                alias: "a".to_string(),
+            },
+        ]);
+
+        let output_schema = projection.project_schema(&input_schema)?;
+
+        // Should have 2 fields
+        assert_eq!(output_schema.fields().len(), 2);
+
+        // First field should be "c" with Float32 type
+        assert_eq!(output_schema.field(0).name(), "c");
+        assert_eq!(output_schema.field(0).data_type(), &DataType::Float32);
+
+        // Second field should be "a" with Int64 type
+        assert_eq!(output_schema.field(1).name(), "a");
+        assert_eq!(output_schema.field(1).data_type(), &DataType::Int64);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_project_schema_with_expressions() -> Result<()> {
+        // Input schema: [col0: Int64, col1: Utf8, col2: Float32]
+        let input_schema = get_schema();
+
+        // Projection: SELECT col0 + 1 AS incremented
+        let projection = Projection::new(vec![ProjectionExpr {
+            expr: Arc::new(BinaryExpr::new(
+                Arc::new(Column::new("col0", 0)),
+                Operator::Plus,
+                Arc::new(Literal::new(ScalarValue::Int64(Some(1)))),
+            )),
+            alias: "incremented".to_string(),
+        }]);
+
+        let output_schema = projection.project_schema(&input_schema)?;
+
+        // Should have 1 field
+        assert_eq!(output_schema.fields().len(), 1);
+
+        // Field should be "incremented" with Int64 type
+        assert_eq!(output_schema.field(0).name(), "incremented");
+        assert_eq!(output_schema.field(0).data_type(), &DataType::Int64);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_project_schema_preserves_metadata() -> Result<()> {
+        // Create schema with metadata
+        let mut metadata = HashMap::new();
+        metadata.insert("key".to_string(), "value".to_string());
+        let field_with_metadata =
+            Field::new("col0", DataType::Int64, false).with_metadata(metadata.clone());
+        let input_schema = Schema::new(vec![
+            field_with_metadata,
+            Field::new("col1", DataType::Utf8, false),
+        ]);
+
+        // Projection: SELECT col0 AS renamed
+        let projection = Projection::new(vec![ProjectionExpr {
+            expr: Arc::new(Column::new("col0", 0)),
+            alias: "renamed".to_string(),
+        }]);
+
+        let output_schema = projection.project_schema(&input_schema)?;
+
+        // Should have 1 field
+        assert_eq!(output_schema.fields().len(), 1);
+
+        // Field should be "renamed" with metadata preserved
+        assert_eq!(output_schema.field(0).name(), "renamed");
+        assert_eq!(output_schema.field(0).metadata(), &metadata);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_project_schema_empty() -> Result<()> {
+        let input_schema = get_schema();
+        let projection = Projection::new(vec![]);
+
+        let output_schema = projection.project_schema(&input_schema)?;
+
+        assert_eq!(output_schema.fields().len(), 0);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_project_statistics_columns_only() -> Result<()> {
+        let input_stats = get_stats();
+        let input_schema = get_schema();
+
+        // Projection: SELECT col1 AS text, col0 AS num
+        let projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col1", 1)),
+                alias: "text".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col0", 0)),
+                alias: "num".to_string(),
+            },
+        ]);
+
+        let output_stats = projection.project_statistics(input_stats, &input_schema)?;
+
+        // Row count should be preserved
+        assert_eq!(output_stats.num_rows, Precision::Exact(5));
+
+        // Should have 2 column statistics (reordered from input)
+        assert_eq!(output_stats.column_statistics.len(), 2);
+
+        // First column (col1 from input)
+        assert_eq!(
+            output_stats.column_statistics[0].distinct_count,
+            Precision::Exact(1)
+        );
+        assert_eq!(
+            output_stats.column_statistics[0].max_value,
+            Precision::Exact(ScalarValue::from("x"))
+        );
+
+        // Second column (col0 from input)
+        assert_eq!(
+            output_stats.column_statistics[1].distinct_count,
+            Precision::Exact(5)
+        );
+        assert_eq!(
+            output_stats.column_statistics[1].max_value,
+            Precision::Exact(ScalarValue::Int64(Some(21)))
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_project_statistics_with_expressions() -> Result<()> {
+        let input_stats = get_stats();
+        let input_schema = get_schema();
+
+        // Projection with expression: SELECT col0 + 1 AS incremented, col1 AS text
+        let projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(BinaryExpr::new(
+                    Arc::new(Column::new("col0", 0)),
+                    Operator::Plus,
+                    Arc::new(Literal::new(ScalarValue::Int64(Some(1)))),
+                )),
+                alias: "incremented".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col1", 1)),
+                alias: "text".to_string(),
+            },
+        ]);
+
+        let output_stats = projection.project_statistics(input_stats, &input_schema)?;
+
+        // Row count should be preserved
+        assert_eq!(output_stats.num_rows, Precision::Exact(5));
+
+        // Should have 2 column statistics
+        assert_eq!(output_stats.column_statistics.len(), 2);
+
+        // First column (expression) should have unknown statistics
+        assert_eq!(
+            output_stats.column_statistics[0].distinct_count,
+            Precision::Absent
+        );
+        assert_eq!(
+            output_stats.column_statistics[0].max_value,
+            Precision::Absent
+        );
+
+        // Second column (col1) should preserve statistics
+        assert_eq!(
+            output_stats.column_statistics[1].distinct_count,
+            Precision::Exact(1)
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_project_statistics_primitive_width_only() -> Result<()> {
+        let input_stats = get_stats();
+        let input_schema = get_schema();
+
+        // Projection with only primitive width columns: SELECT col2 AS f, col0 AS i
+        let projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col2", 2)),
+                alias: "f".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col0", 0)),
+                alias: "i".to_string(),
+            },
+        ]);
+
+        let output_stats = projection.project_statistics(input_stats, &input_schema)?;
+
+        // Row count should be preserved
+        assert_eq!(output_stats.num_rows, Precision::Exact(5));
+
+        // Total byte size should be recalculated for primitive types
+        // Float32 (4 bytes) + Int64 (8 bytes) = 12 bytes per row, 5 rows = 60 bytes
+        assert_eq!(output_stats.total_byte_size, Precision::Exact(60));
+
+        // Should have 2 column statistics
+        assert_eq!(output_stats.column_statistics.len(), 2);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_project_statistics_empty() -> Result<()> {
+        let input_stats = get_stats();
+        let input_schema = get_schema();
+
+        let projection = Projection::new(vec![]);
+
+        let output_stats = projection.project_statistics(input_stats, &input_schema)?;
+
+        // Row count should be preserved
+        assert_eq!(output_stats.num_rows, Precision::Exact(5));
+
+        // Should have no column statistics
+        assert_eq!(output_stats.column_statistics.len(), 0);
+
+        // Total byte size should be 0 for empty projection
+        assert_eq!(output_stats.total_byte_size, Precision::Exact(0));
+
+        Ok(())
+    }
 }
diff --git a/datafusion/physical-optimizer/Cargo.toml b/datafusion/physical-optimizer/Cargo.toml
index 15466cd86bb0..4df011fc0a05 100644
--- a/datafusion/physical-optimizer/Cargo.toml
+++ b/datafusion/physical-optimizer/Cargo.toml
@@ -52,5 +52,6 @@ recursive = { workspace = true, optional = true }
 
 [dev-dependencies]
 datafusion-expr = { workspace = true }
+datafusion-functions = { workspace = true }
 insta = { workspace = true }
 tokio = { workspace = true }
diff --git a/datafusion/physical-optimizer/src/projection_pushdown.rs b/datafusion/physical-optimizer/src/projection_pushdown.rs
index 34affcbd4a19..987e3cb6f713 100644
--- a/datafusion/physical-optimizer/src/projection_pushdown.rs
+++ b/datafusion/physical-optimizer/src/projection_pushdown.rs
@@ -20,18 +20,32 @@
 //! projections one by one if the operator below is amenable to this. If a
 //! projection reaches a source, it can even disappear from the plan entirely.
 
-use std::sync::Arc;
-
 use crate::PhysicalOptimizerRule;
+use arrow::datatypes::{Fields, Schema, SchemaRef};
+use datafusion_common::alias::AliasGenerator;
+use std::collections::HashSet;
+use std::sync::Arc;
 
 use datafusion_common::config::ConfigOptions;
-use datafusion_common::tree_node::{TransformedResult, TreeNode};
-use datafusion_common::Result;
-use datafusion_physical_plan::projection::remove_unnecessary_projections;
+use datafusion_common::tree_node::{
+    Transformed, TransformedResult, TreeNode, TreeNodeRecursion,
+};
+use datafusion_common::{JoinSide, JoinType, Result};
+use datafusion_physical_expr::expressions::Column;
+use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+use datafusion_physical_plan::joins::utils::{ColumnIndex, JoinFilter};
+use datafusion_physical_plan::joins::NestedLoopJoinExec;
+use datafusion_physical_plan::projection::{
+    remove_unnecessary_projections, ProjectionExec,
+};
 use datafusion_physical_plan::ExecutionPlan;
 
 /// This rule inspects `ProjectionExec`'s in the given physical plan and tries to
 /// remove or swap with its child.
+///
+/// Furthermore, tries to push down projections from nested loop join filters that only depend on
+/// one side of the join. By pushing these projections down, functions that only depend on one side
+/// of the join must be evaluated for the cartesian product of the two sides.
 #[derive(Default, Debug)]
 pub struct ProjectionPushdown {}
 
@@ -48,6 +62,20 @@ impl PhysicalOptimizerRule for ProjectionPushdown {
         plan: Arc<dyn ExecutionPlan>,
         _config: &ConfigOptions,
     ) -> Result<Arc<dyn ExecutionPlan>> {
+        let alias_generator = AliasGenerator::new();
+        let plan = plan
+            .transform_up(|plan| {
+                match plan.as_any().downcast_ref::<NestedLoopJoinExec>() {
+                    None => Ok(Transformed::no(plan)),
+                    Some(hash_join) => try_push_down_join_filter(
+                        Arc::clone(&plan),
+                        hash_join,
+                        &alias_generator,
+                    ),
+                }
+            })
+            .map(|t| t.data)?;
+
         plan.transform_down(remove_unnecessary_projections).data()
     }
 
@@ -59,3 +87,713 @@ impl PhysicalOptimizerRule for ProjectionPushdown {
         true
     }
 }
+
+/// Tries to push down parts of the filter.
+///
+/// See [JoinFilterRewriter] for details.
+fn try_push_down_join_filter(
+    original_plan: Arc<dyn ExecutionPlan>,
+    join: &NestedLoopJoinExec,
+    alias_generator: &AliasGenerator,
+) -> Result<Transformed<Arc<dyn ExecutionPlan>>> {
+    // Mark joins are currently not supported.
+    if matches!(join.join_type(), JoinType::LeftMark | JoinType::RightMark) {
+        return Ok(Transformed::no(original_plan));
+    }
+
+    let projections = join.projection();
+    let Some(filter) = join.filter() else {
+        return Ok(Transformed::no(original_plan));
+    };
+
+    let original_lhs_length = join.left().schema().fields().len();
+    let original_rhs_length = join.right().schema().fields().len();
+
+    let lhs_rewrite = try_push_down_projection(
+        Arc::clone(&join.right().schema()),
+        Arc::clone(join.left()),
+        JoinSide::Left,
+        filter.clone(),
+        alias_generator,
+    )?;
+    let rhs_rewrite = try_push_down_projection(
+        Arc::clone(&lhs_rewrite.data.0.schema()),
+        Arc::clone(join.right()),
+        JoinSide::Right,
+        lhs_rewrite.data.1,
+        alias_generator,
+    )?;
+    if !lhs_rewrite.transformed && !rhs_rewrite.transformed {
+        return Ok(Transformed::no(original_plan));
+    }
+
+    let join_filter = minimize_join_filter(
+        Arc::clone(rhs_rewrite.data.1.expression()),
+        rhs_rewrite.data.1.column_indices().to_vec(),
+        lhs_rewrite.data.0.schema().as_ref(),
+        rhs_rewrite.data.0.schema().as_ref(),
+    );
+
+    let new_lhs_length = lhs_rewrite.data.0.schema().fields.len();
+    let projections = match projections {
+        None => match join.join_type() {
+            JoinType::Inner | JoinType::Left | JoinType::Right | JoinType::Full => {
+                // Build projections that ignore the newly projected columns.
+                let mut projections = Vec::new();
+                projections.extend(0..original_lhs_length);
+                projections.extend(new_lhs_length..new_lhs_length + original_rhs_length);
+                projections
+            }
+            JoinType::LeftSemi | JoinType::LeftAnti => {
+                // Only return original left columns
+                let mut projections = Vec::new();
+                projections.extend(0..original_lhs_length);
+                projections
+            }
+            JoinType::RightSemi | JoinType::RightAnti => {
+                // Only return original right columns
+                let mut projections = Vec::new();
+                projections.extend(0..original_rhs_length);
+                projections
+            }
+            _ => unreachable!("Unsupported join type"),
+        },
+        Some(projections) => {
+            let rhs_offset = new_lhs_length - original_lhs_length;
+            projections
+                .iter()
+                .map(|idx| {
+                    if *idx >= original_lhs_length {
+                        idx + rhs_offset
+                    } else {
+                        *idx
+                    }
+                })
+                .collect()
+        }
+    };
+
+    Ok(Transformed::yes(Arc::new(NestedLoopJoinExec::try_new(
+        lhs_rewrite.data.0,
+        rhs_rewrite.data.0,
+        Some(join_filter),
+        join.join_type(),
+        Some(projections),
+    )?)))
+}
+
+/// Tries to push down parts of `expr` into the `join_side`.
+fn try_push_down_projection(
+    other_schema: SchemaRef,
+    plan: Arc<dyn ExecutionPlan>,
+    join_side: JoinSide,
+    join_filter: JoinFilter,
+    alias_generator: &AliasGenerator,
+) -> Result<Transformed<(Arc<dyn ExecutionPlan>, JoinFilter)>> {
+    let expr = Arc::clone(join_filter.expression());
+    let original_plan_schema = plan.schema();
+    let mut rewriter = JoinFilterRewriter::new(
+        join_side,
+        original_plan_schema.as_ref(),
+        join_filter.column_indices().to_vec(),
+        alias_generator,
+    );
+    let new_expr = rewriter.rewrite(expr)?;
+
+    if new_expr.transformed {
+        let new_join_side =
+            ProjectionExec::try_new(rewriter.join_side_projections, plan)?;
+        let new_schema = Arc::clone(&new_join_side.schema());
+
+        let (lhs_schema, rhs_schema) = match join_side {
+            JoinSide::Left => (new_schema, other_schema),
+            JoinSide::Right => (other_schema, new_schema),
+            JoinSide::None => unreachable!("Mark join not supported"),
+        };
+        let intermediate_schema = rewriter
+            .intermediate_column_indices
+            .iter()
+            .map(|ci| match ci.side {
+                JoinSide::Left => Arc::clone(&lhs_schema.fields[ci.index]),
+                JoinSide::Right => Arc::clone(&rhs_schema.fields[ci.index]),
+                JoinSide::None => unreachable!("Mark join not supported"),
+            })
+            .collect::<Fields>();
+
+        let join_filter = JoinFilter::new(
+            new_expr.data,
+            rewriter.intermediate_column_indices,
+            Arc::new(Schema::new(intermediate_schema)),
+        );
+        Ok(Transformed::yes((Arc::new(new_join_side), join_filter)))
+    } else {
+        Ok(Transformed::no((plan, join_filter)))
+    }
+}
+
+/// Creates a new [JoinFilter] and tries to minimize the internal schema.
+///
+/// This could eliminate some columns that were only part of a computation that has been pushed
+/// down. As this computation is now materialized on one side of the join, the original input
+/// columns are not needed anymore.
+fn minimize_join_filter(
+    expr: Arc<dyn PhysicalExpr>,
+    old_column_indices: Vec<ColumnIndex>,
+    lhs_schema: &Schema,
+    rhs_schema: &Schema,
+) -> JoinFilter {
+    let mut used_columns = HashSet::new();
+    expr.apply(|expr| {
+        if let Some(col) = expr.as_any().downcast_ref::<Column>() {
+            used_columns.insert(col.index());
+        }
+        Ok(TreeNodeRecursion::Continue)
+    })
+    .expect("Closure cannot fail");
+
+    let new_column_indices = old_column_indices
+        .iter()
+        .enumerate()
+        .filter(|(idx, _)| used_columns.contains(idx))
+        .map(|(_, ci)| ci.clone())
+        .collect::<Vec<_>>();
+    let fields = new_column_indices
+        .iter()
+        .map(|ci| match ci.side {
+            JoinSide::Left => lhs_schema.field(ci.index).clone(),
+            JoinSide::Right => rhs_schema.field(ci.index).clone(),
+            JoinSide::None => unreachable!("Mark join not supported"),
+        })
+        .collect::<Fields>();
+
+    let final_expr = expr
+        .transform_up(|expr| match expr.as_any().downcast_ref::<Column>() {
+            None => Ok(Transformed::no(expr)),
+            Some(column) => {
+                let new_idx = used_columns
+                    .iter()
+                    .filter(|idx| **idx < column.index())
+                    .count();
+                let new_column = Column::new(column.name(), new_idx);
+                Ok(Transformed::yes(
+                    Arc::new(new_column) as Arc<dyn PhysicalExpr>
+                ))
+            }
+        })
+        .expect("Closure cannot fail");
+
+    JoinFilter::new(
+        final_expr.data,
+        new_column_indices,
+        Arc::new(Schema::new(fields)),
+    )
+}
+
+/// Implements the push-down machinery.
+///
+/// The rewriter starts at the top of the filter expression and traverses the expression tree. For
+/// each (sub-)expression, the rewriter checks whether it only refers to one side of the join. If
+/// this is never the case, no subexpressions of the filter can be pushed down. If there is a
+/// subexpression that can be computed using only one side of the join, the entire subexpression is
+/// pushed down to the join side.
+struct JoinFilterRewriter<'a> {
+    join_side: JoinSide,
+    join_side_schema: &'a Schema,
+    join_side_projections: Vec<(Arc<dyn PhysicalExpr>, String)>,
+    intermediate_column_indices: Vec<ColumnIndex>,
+    alias_generator: &'a AliasGenerator,
+}
+
+impl<'a> JoinFilterRewriter<'a> {
+    /// Creates a new [JoinFilterRewriter].
+    fn new(
+        join_side: JoinSide,
+        join_side_schema: &'a Schema,
+        column_indices: Vec<ColumnIndex>,
+        alias_generator: &'a AliasGenerator,
+    ) -> Self {
+        let projections = join_side_schema
+            .fields()
+            .iter()
+            .enumerate()
+            .map(|(idx, field)| {
+                (
+                    Arc::new(Column::new(field.name(), idx)) as Arc<dyn PhysicalExpr>,
+                    field.name().to_string(),
+                )
+            })
+            .collect();
+
+        Self {
+            join_side,
+            join_side_schema,
+            join_side_projections: projections,
+            intermediate_column_indices: column_indices,
+            alias_generator,
+        }
+    }
+
+    /// Executes the push-down machinery on `expr`.
+    ///
+    /// See the [JoinFilterRewriter] for further information.
+    fn rewrite(
+        &mut self,
+        expr: Arc<dyn PhysicalExpr>,
+    ) -> Result<Transformed<Arc<dyn PhysicalExpr>>> {
+        let depends_on_this_side = self.depends_on_join_side(&expr, self.join_side)?;
+        // We don't push down things that do not depend on this side (other side or no side).
+        if !depends_on_this_side {
+            return Ok(Transformed::no(expr));
+        }
+
+        // Recurse if there is a dependency to both sides or if the entire expression is volatile.
+        let depends_on_other_side =
+            self.depends_on_join_side(&expr, self.join_side.negate())?;
+        let is_volatile = is_volatile_expression_tree(expr.as_ref());
+        if depends_on_other_side || is_volatile {
+            return expr.map_children(|expr| self.rewrite(expr));
+        }
+
+        // There is only a dependency on this side.
+
+        // If this expression has no children, we do not push down, as it should already be a column
+        // reference.
+        if expr.children().is_empty() {
+            return Ok(Transformed::no(expr));
+        }
+
+        // Otherwise, we push down a projection.
+        let alias = self.alias_generator.next("join_proj_push_down");
+        let idx = self.create_new_column(alias.clone(), expr)?;
+
+        Ok(Transformed::yes(
+            Arc::new(Column::new(&alias, idx)) as Arc<dyn PhysicalExpr>
+        ))
+    }
+
+    /// Creates a new column in the current join side.
+    fn create_new_column(
+        &mut self,
+        name: String,
+        expr: Arc<dyn PhysicalExpr>,
+    ) -> Result<usize> {
+        // First, add a new projection. The expression must be rewritten, as it is no longer
+        // executed against the filter schema.
+        let new_idx = self.join_side_projections.len();
+        let rewritten_expr = expr.transform_up(|expr| {
+            Ok(match expr.as_any().downcast_ref::<Column>() {
+                None => Transformed::no(expr),
+                Some(column) => {
+                    let intermediate_column =
+                        &self.intermediate_column_indices[column.index()];
+                    assert_eq!(intermediate_column.side, self.join_side);
+
+                    let join_side_index = intermediate_column.index;
+                    let field = self.join_side_schema.field(join_side_index);
+                    let new_column = Column::new(field.name(), join_side_index);
+                    Transformed::yes(Arc::new(new_column) as Arc<dyn PhysicalExpr>)
+                }
+            })
+        })?;
+        self.join_side_projections.push((rewritten_expr.data, name));
+
+        // Then, update the column indices
+        let new_intermediate_idx = self.intermediate_column_indices.len();
+        let idx = ColumnIndex {
+            index: new_idx,
+            side: self.join_side,
+        };
+        self.intermediate_column_indices.push(idx);
+
+        Ok(new_intermediate_idx)
+    }
+
+    /// Checks whether the entire expression depends on the given `join_side`.
+    fn depends_on_join_side(
+        &mut self,
+        expr: &Arc<dyn PhysicalExpr>,
+        join_side: JoinSide,
+    ) -> Result<bool> {
+        let mut result = false;
+        expr.apply(|expr| match expr.as_any().downcast_ref::<Column>() {
+            None => Ok(TreeNodeRecursion::Continue),
+            Some(c) => {
+                let column_index = &self.intermediate_column_indices[c.index()];
+                if column_index.side == join_side {
+                    result = true;
+                    return Ok(TreeNodeRecursion::Stop);
+                }
+                Ok(TreeNodeRecursion::Continue)
+            }
+        })?;
+
+        Ok(result)
+    }
+}
+
+fn is_volatile_expression_tree(expr: &dyn PhysicalExpr) -> bool {
+    if expr.is_volatile_node() {
+        return true;
+    }
+
+    expr.children()
+        .iter()
+        .map(|expr| is_volatile_expression_tree(expr.as_ref()))
+        .reduce(|lhs, rhs| lhs || rhs)
+        .unwrap_or(false)
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use arrow::datatypes::{DataType, Field, FieldRef, Schema};
+    use datafusion_expr_common::operator::Operator;
+    use datafusion_functions::math::random;
+    use datafusion_physical_expr::expressions::{binary, lit};
+    use datafusion_physical_expr::ScalarFunctionExpr;
+    use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+    use datafusion_physical_plan::displayable;
+    use datafusion_physical_plan::empty::EmptyExec;
+    use insta::assert_snapshot;
+    use std::sync::Arc;
+
+    #[tokio::test]
+    async fn no_computation_does_not_project() -> Result<()> {
+        let (left_schema, right_schema) = create_simple_schemas();
+        let optimized_plan = run_test(
+            left_schema,
+            right_schema,
+            a_x(),
+            None,
+            a_greater_than_x,
+            JoinType::Inner,
+        )?;
+
+        assert_snapshot!(optimized_plan, @r"
+        NestedLoopJoinExec: join_type=Inner, filter=a@0 > x@1
+          EmptyExec
+          EmptyExec
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn simple_push_down() -> Result<()> {
+        let (left_schema, right_schema) = create_simple_schemas();
+        let optimized_plan = run_test(
+            left_schema,
+            right_schema,
+            a_x(),
+            None,
+            a_plus_one_greater_than_x_plus_one,
+            JoinType::Inner,
+        )?;
+
+        assert_snapshot!(optimized_plan, @r"
+        NestedLoopJoinExec: join_type=Inner, filter=join_proj_push_down_1@0 > join_proj_push_down_2@1, projection=[a@0, x@2]
+          ProjectionExec: expr=[a@0 as a, a@0 + 1 as join_proj_push_down_1]
+            EmptyExec
+          ProjectionExec: expr=[x@0 as x, x@0 + 1 as join_proj_push_down_2]
+            EmptyExec
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn does_not_push_down_short_circuiting_expressions() -> Result<()> {
+        let (left_schema, right_schema) = create_simple_schemas();
+        let optimized_plan = run_test(
+            left_schema,
+            right_schema,
+            a_x(),
+            None,
+            |schema| {
+                binary(
+                    lit(false),
+                    Operator::And,
+                    a_plus_one_greater_than_x_plus_one(schema)?,
+                    schema,
+                )
+            },
+            JoinType::Inner,
+        )?;
+
+        assert_snapshot!(optimized_plan, @r"
+        NestedLoopJoinExec: join_type=Inner, filter=false AND join_proj_push_down_1@0 > join_proj_push_down_2@1, projection=[a@0, x@2]
+          ProjectionExec: expr=[a@0 as a, a@0 + 1 as join_proj_push_down_1]
+            EmptyExec
+          ProjectionExec: expr=[x@0 as x, x@0 + 1 as join_proj_push_down_2]
+            EmptyExec
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn does_not_push_down_volatile_functions() -> Result<()> {
+        let (left_schema, right_schema) = create_simple_schemas();
+        let optimized_plan = run_test(
+            left_schema,
+            right_schema,
+            a_x(),
+            None,
+            a_plus_rand_greater_than_x,
+            JoinType::Inner,
+        )?;
+
+        assert_snapshot!(optimized_plan, @r"
+        NestedLoopJoinExec: join_type=Inner, filter=a@0 + rand() > x@1
+          EmptyExec
+          EmptyExec
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn complex_schema_push_down() -> Result<()> {
+        let (left_schema, right_schema) = create_complex_schemas();
+
+        let optimized_plan = run_test(
+            left_schema,
+            right_schema,
+            a_b_x_z(),
+            None,
+            a_plus_b_greater_than_x_plus_z,
+            JoinType::Inner,
+        )?;
+
+        assert_snapshot!(optimized_plan, @r"
+        NestedLoopJoinExec: join_type=Inner, filter=join_proj_push_down_1@0 > join_proj_push_down_2@1, projection=[a@0, b@1, c@2, x@4, y@5, z@6]
+          ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c, a@0 + b@1 as join_proj_push_down_1]
+            EmptyExec
+          ProjectionExec: expr=[x@0 as x, y@1 as y, z@2 as z, x@0 + z@2 as join_proj_push_down_2]
+            EmptyExec
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn push_down_with_existing_projections() -> Result<()> {
+        let (left_schema, right_schema) = create_complex_schemas();
+
+        let optimized_plan = run_test(
+            left_schema,
+            right_schema,
+            a_b_x_z(),
+            Some(vec![1, 3, 5]), // ("b", "x", "z")
+            a_plus_b_greater_than_x_plus_z,
+            JoinType::Inner,
+        )?;
+
+        assert_snapshot!(optimized_plan, @r"
+        NestedLoopJoinExec: join_type=Inner, filter=join_proj_push_down_1@0 > join_proj_push_down_2@1, projection=[b@1, x@4, z@6]
+          ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c, a@0 + b@1 as join_proj_push_down_1]
+            EmptyExec
+          ProjectionExec: expr=[x@0 as x, y@1 as y, z@2 as z, x@0 + z@2 as join_proj_push_down_2]
+            EmptyExec
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn left_semi_join_projection() -> Result<()> {
+        let (left_schema, right_schema) = create_simple_schemas();
+
+        let left_semi_join_plan = run_test(
+            left_schema.clone(),
+            right_schema.clone(),
+            a_x(),
+            None,
+            a_plus_one_greater_than_x_plus_one,
+            JoinType::LeftSemi,
+        )?;
+
+        assert_snapshot!(left_semi_join_plan, @r"
+        NestedLoopJoinExec: join_type=LeftSemi, filter=join_proj_push_down_1@0 > join_proj_push_down_2@1, projection=[a@0]
+          ProjectionExec: expr=[a@0 as a, a@0 + 1 as join_proj_push_down_1]
+            EmptyExec
+          ProjectionExec: expr=[x@0 as x, x@0 + 1 as join_proj_push_down_2]
+            EmptyExec
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn right_semi_join_projection() -> Result<()> {
+        let (left_schema, right_schema) = create_simple_schemas();
+        let right_semi_join_plan = run_test(
+            left_schema,
+            right_schema,
+            a_x(),
+            None,
+            a_plus_one_greater_than_x_plus_one,
+            JoinType::RightSemi,
+        )?;
+        assert_snapshot!(right_semi_join_plan, @r"
+        NestedLoopJoinExec: join_type=RightSemi, filter=join_proj_push_down_1@0 > join_proj_push_down_2@1, projection=[x@0]
+          ProjectionExec: expr=[a@0 as a, a@0 + 1 as join_proj_push_down_1]
+            EmptyExec
+          ProjectionExec: expr=[x@0 as x, x@0 + 1 as join_proj_push_down_2]
+            EmptyExec
+        ");
+        Ok(())
+    }
+
+    fn run_test(
+        left_schema: Schema,
+        right_schema: Schema,
+        column_indices: Vec<ColumnIndex>,
+        existing_projections: Option<Vec<usize>>,
+        filter_expr_builder: impl FnOnce(&Schema) -> Result<Arc<dyn PhysicalExpr>>,
+        join_type: JoinType,
+    ) -> Result<String> {
+        let left = Arc::new(EmptyExec::new(Arc::new(left_schema.clone())));
+        let right = Arc::new(EmptyExec::new(Arc::new(right_schema.clone())));
+
+        let join_fields: Vec<_> = column_indices
+            .iter()
+            .map(|ci| match ci.side {
+                JoinSide::Left => left_schema.field(ci.index).clone(),
+                JoinSide::Right => right_schema.field(ci.index).clone(),
+                JoinSide::None => unreachable!(),
+            })
+            .collect();
+        let join_schema = Arc::new(Schema::new(join_fields));
+
+        let filter_expr = filter_expr_builder(join_schema.as_ref())?;
+
+        let join_filter = JoinFilter::new(filter_expr, column_indices, join_schema);
+
+        let join = NestedLoopJoinExec::try_new(
+            left,
+            right,
+            Some(join_filter),
+            &join_type,
+            existing_projections,
+        )?;
+
+        let optimizer = ProjectionPushdown::new();
+        let optimized_plan = optimizer.optimize(Arc::new(join), &Default::default())?;
+
+        let displayable_plan = displayable(optimized_plan.as_ref()).indent(false);
+        Ok(displayable_plan.to_string())
+    }
+
+    fn create_simple_schemas() -> (Schema, Schema) {
+        let left_schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
+        let right_schema = Schema::new(vec![Field::new("x", DataType::Int32, false)]);
+
+        (left_schema, right_schema)
+    }
+
+    fn create_complex_schemas() -> (Schema, Schema) {
+        let left_schema = Schema::new(vec![
+            Field::new("a", DataType::Int32, false),
+            Field::new("b", DataType::Int32, false),
+            Field::new("c", DataType::Int32, false),
+        ]);
+
+        let right_schema = Schema::new(vec![
+            Field::new("x", DataType::Int32, false),
+            Field::new("y", DataType::Int32, false),
+            Field::new("z", DataType::Int32, false),
+        ]);
+
+        (left_schema, right_schema)
+    }
+
+    fn a_x() -> Vec<ColumnIndex> {
+        vec![
+            ColumnIndex {
+                index: 0,
+                side: JoinSide::Left,
+            },
+            ColumnIndex {
+                index: 0,
+                side: JoinSide::Right,
+            },
+        ]
+    }
+
+    fn a_b_x_z() -> Vec<ColumnIndex> {
+        vec![
+            ColumnIndex {
+                index: 0,
+                side: JoinSide::Left,
+            },
+            ColumnIndex {
+                index: 1,
+                side: JoinSide::Left,
+            },
+            ColumnIndex {
+                index: 0,
+                side: JoinSide::Right,
+            },
+            ColumnIndex {
+                index: 2,
+                side: JoinSide::Right,
+            },
+        ]
+    }
+
+    fn a_plus_one_greater_than_x_plus_one(
+        join_schema: &Schema,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        let left_expr = binary(
+            Arc::new(Column::new("a", 0)),
+            Operator::Plus,
+            lit(1),
+            join_schema,
+        )?;
+        let right_expr = binary(
+            Arc::new(Column::new("x", 1)),
+            Operator::Plus,
+            lit(1),
+            join_schema,
+        )?;
+        binary(left_expr, Operator::Gt, right_expr, join_schema)
+    }
+
+    fn a_plus_rand_greater_than_x(join_schema: &Schema) -> Result<Arc<dyn PhysicalExpr>> {
+        let left_expr = binary(
+            Arc::new(Column::new("a", 0)),
+            Operator::Plus,
+            Arc::new(ScalarFunctionExpr::new(
+                "rand",
+                random(),
+                vec![],
+                FieldRef::new(Field::new("out", DataType::Float64, false)),
+                Arc::new(ConfigOptions::default()),
+            )),
+            join_schema,
+        )?;
+        let right_expr = Arc::new(Column::new("x", 1));
+        binary(left_expr, Operator::Gt, right_expr, join_schema)
+    }
+
+    fn a_greater_than_x(join_schema: &Schema) -> Result<Arc<dyn PhysicalExpr>> {
+        binary(
+            Arc::new(Column::new("a", 0)),
+            Operator::Gt,
+            Arc::new(Column::new("x", 1)),
+            join_schema,
+        )
+    }
+
+    fn a_plus_b_greater_than_x_plus_z(
+        join_schema: &Schema,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        let lhs = binary(
+            Arc::new(Column::new("a", 0)),
+            Operator::Plus,
+            Arc::new(Column::new("b", 1)),
+            join_schema,
+        )?;
+        let rhs = binary(
+            Arc::new(Column::new("x", 2)),
+            Operator::Plus,
+            Arc::new(Column::new("z", 3)),
+            join_schema,
+        )?;
+        binary(lhs, Operator::Gt, rhs, join_schema)
+    }
+}
diff --git a/datafusion/physical-plan/src/analyze.rs b/datafusion/physical-plan/src/analyze.rs
index c095afe5e716..c696cf5aa5e6 100644
--- a/datafusion/physical-plan/src/analyze.rs
+++ b/datafusion/physical-plan/src/analyze.rs
@@ -26,6 +26,7 @@ use super::{
     SendableRecordBatchStream,
 };
 use crate::display::DisplayableExecutionPlan;
+use crate::metrics::MetricType;
 use crate::{DisplayFormatType, ExecutionPlan, Partitioning};
 
 use arrow::{array::StringBuilder, datatypes::SchemaRef, record_batch::RecordBatch};
@@ -44,6 +45,8 @@ pub struct AnalyzeExec {
     verbose: bool,
     /// If statistics should be displayed
     show_statistics: bool,
+    /// Which metric categories should be displayed
+    metric_types: Vec<MetricType>,
     /// The input plan (the plan being analyzed)
     pub(crate) input: Arc<dyn ExecutionPlan>,
     /// The output schema for RecordBatches of this exec node
@@ -56,6 +59,7 @@ impl AnalyzeExec {
     pub fn new(
         verbose: bool,
         show_statistics: bool,
+        metric_types: Vec<MetricType>,
         input: Arc<dyn ExecutionPlan>,
         schema: SchemaRef,
     ) -> Self {
@@ -63,6 +67,7 @@ impl AnalyzeExec {
         AnalyzeExec {
             verbose,
             show_statistics,
+            metric_types,
             input,
             schema,
             cache,
@@ -145,6 +150,7 @@ impl ExecutionPlan for AnalyzeExec {
         Ok(Arc::new(Self::new(
             self.verbose,
             self.show_statistics,
+            self.metric_types.clone(),
             children.pop().unwrap(),
             Arc::clone(&self.schema),
         )))
@@ -182,6 +188,7 @@ impl ExecutionPlan for AnalyzeExec {
         let captured_schema = Arc::clone(&self.schema);
         let verbose = self.verbose;
         let show_statistics = self.show_statistics;
+        let metric_types = self.metric_types.clone();
 
         // future that gathers the results from all the tasks in the
         // JoinSet that computes the overall row count and final
@@ -201,6 +208,7 @@ impl ExecutionPlan for AnalyzeExec {
                 duration,
                 captured_input,
                 captured_schema,
+                &metric_types,
             )
         };
 
@@ -219,6 +227,7 @@ fn create_output_batch(
     duration: std::time::Duration,
     input: Arc<dyn ExecutionPlan>,
     schema: SchemaRef,
+    metric_types: &[MetricType],
 ) -> Result<RecordBatch> {
     let mut type_builder = StringBuilder::with_capacity(1, 1024);
     let mut plan_builder = StringBuilder::with_capacity(1, 1024);
@@ -227,6 +236,7 @@ fn create_output_batch(
     type_builder.append_value("Plan with Metrics");
 
     let annotated_plan = DisplayableExecutionPlan::with_metrics(input.as_ref())
+        .set_metric_types(metric_types.to_vec())
         .set_show_statistics(show_statistics)
         .indent(verbose)
         .to_string();
@@ -238,6 +248,7 @@ fn create_output_batch(
         type_builder.append_value("Plan with Full Metrics");
 
         let annotated_plan = DisplayableExecutionPlan::with_full_metrics(input.as_ref())
+            .set_metric_types(metric_types.to_vec())
             .set_show_statistics(show_statistics)
             .indent(verbose)
             .to_string();
@@ -282,7 +293,13 @@ mod tests {
 
         let blocking_exec = Arc::new(BlockingExec::new(Arc::clone(&schema), 1));
         let refs = blocking_exec.refs();
-        let analyze_exec = Arc::new(AnalyzeExec::new(true, false, blocking_exec, schema));
+        let analyze_exec = Arc::new(AnalyzeExec::new(
+            true,
+            false,
+            vec![MetricType::SUMMARY, MetricType::DEV],
+            blocking_exec,
+            schema,
+        ));
 
         let fut = collect(analyze_exec, task_ctx);
         let mut fut = fut.boxed();
diff --git a/datafusion/physical-plan/src/coalesce_partitions.rs b/datafusion/physical-plan/src/coalesce_partitions.rs
index 5869c51b26b8..2597dc6408de 100644
--- a/datafusion/physical-plan/src/coalesce_partitions.rs
+++ b/datafusion/physical-plan/src/coalesce_partitions.rs
@@ -170,8 +170,18 @@ impl ExecutionPlan for CoalescePartitionsExec {
                 "CoalescePartitionsExec requires at least one input partition"
             ),
             1 => {
-                // bypass any threading / metrics if there is a single partition
-                self.input.execute(0, context)
+                // single-partition path: execute child directly, but ensure fetch is respected
+                // (wrap with ObservedStream only if fetch is present so we don't add overhead otherwise)
+                let child_stream = self.input.execute(0, context)?;
+                if self.fetch.is_some() {
+                    let baseline_metrics = BaselineMetrics::new(&self.metrics, partition);
+                    return Ok(Box::pin(ObservedStream::new(
+                        child_stream,
+                        baseline_metrics,
+                        self.fetch,
+                    )));
+                }
+                Ok(child_stream)
             }
             _ => {
                 let baseline_metrics = BaselineMetrics::new(&self.metrics, partition);
@@ -351,4 +361,110 @@ mod tests {
 
         collect(coalesce_partitions_exec, task_ctx).await.unwrap();
     }
+
+    #[tokio::test]
+    async fn test_single_partition_with_fetch() -> Result<()> {
+        let task_ctx = Arc::new(TaskContext::default());
+
+        // Use existing scan_partitioned with 1 partition (returns 100 rows per partition)
+        let input = test::scan_partitioned(1);
+
+        // Test with fetch=3
+        let coalesce = CoalescePartitionsExec::new(input).with_fetch(Some(3));
+
+        let stream = coalesce.execute(0, task_ctx)?;
+        let batches = common::collect(stream).await?;
+
+        let row_count: usize = batches.iter().map(|batch| batch.num_rows()).sum();
+        assert_eq!(row_count, 3, "Should only return 3 rows due to fetch=3");
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_multi_partition_with_fetch_one() -> Result<()> {
+        let task_ctx = Arc::new(TaskContext::default());
+
+        // Create 4 partitions, each with 100 rows
+        // This simulates the real-world scenario where each partition has data
+        let input = test::scan_partitioned(4);
+
+        // Test with fetch=1 (the original bug: was returning multiple rows instead of 1)
+        let coalesce = CoalescePartitionsExec::new(input).with_fetch(Some(1));
+
+        let stream = coalesce.execute(0, task_ctx)?;
+        let batches = common::collect(stream).await?;
+
+        let row_count: usize = batches.iter().map(|batch| batch.num_rows()).sum();
+        assert_eq!(
+            row_count, 1,
+            "Should only return 1 row due to fetch=1, not one per partition"
+        );
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_single_partition_without_fetch() -> Result<()> {
+        let task_ctx = Arc::new(TaskContext::default());
+
+        // Use scan_partitioned with 1 partition
+        let input = test::scan_partitioned(1);
+
+        // Test without fetch (should return all rows)
+        let coalesce = CoalescePartitionsExec::new(input);
+
+        let stream = coalesce.execute(0, task_ctx)?;
+        let batches = common::collect(stream).await?;
+
+        let row_count: usize = batches.iter().map(|batch| batch.num_rows()).sum();
+        assert_eq!(
+            row_count, 100,
+            "Should return all 100 rows when fetch is None"
+        );
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_single_partition_fetch_larger_than_batch() -> Result<()> {
+        let task_ctx = Arc::new(TaskContext::default());
+
+        // Use scan_partitioned with 1 partition (returns 100 rows)
+        let input = test::scan_partitioned(1);
+
+        // Test with fetch larger than available rows
+        let coalesce = CoalescePartitionsExec::new(input).with_fetch(Some(200));
+
+        let stream = coalesce.execute(0, task_ctx)?;
+        let batches = common::collect(stream).await?;
+
+        let row_count: usize = batches.iter().map(|batch| batch.num_rows()).sum();
+        assert_eq!(
+            row_count, 100,
+            "Should return all available rows (100) when fetch (200) is larger"
+        );
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_multi_partition_fetch_exact_match() -> Result<()> {
+        let task_ctx = Arc::new(TaskContext::default());
+
+        // Create 4 partitions, each with 100 rows
+        let num_partitions = 4;
+        let csv = test::scan_partitioned(num_partitions);
+
+        // Test with fetch=400 (exactly all rows)
+        let coalesce = CoalescePartitionsExec::new(csv).with_fetch(Some(400));
+
+        let stream = coalesce.execute(0, task_ctx)?;
+        let batches = common::collect(stream).await?;
+
+        let row_count: usize = batches.iter().map(|batch| batch.num_rows()).sum();
+        assert_eq!(row_count, 400, "Should return exactly 400 rows");
+
+        Ok(())
+    }
 }
diff --git a/datafusion/physical-plan/src/display.rs b/datafusion/physical-plan/src/display.rs
index 2420edfc743d..35ca0b65ae29 100644
--- a/datafusion/physical-plan/src/display.rs
+++ b/datafusion/physical-plan/src/display.rs
@@ -28,6 +28,7 @@ use datafusion_common::display::{GraphvizBuilder, PlanType, StringifiedPlan};
 use datafusion_expr::display_schema;
 use datafusion_physical_expr::LexOrdering;
 
+use crate::metrics::MetricType;
 use crate::render_tree::RenderTree;
 
 use super::{accept, ExecutionPlan, ExecutionPlanVisitor};
@@ -120,11 +121,17 @@ pub struct DisplayableExecutionPlan<'a> {
     show_statistics: bool,
     /// If schema should be displayed. See [`Self::set_show_schema`]
     show_schema: bool,
+    /// Which metric categories should be included when rendering
+    metric_types: Vec<MetricType>,
     // (TreeRender) Maximum total width of the rendered tree
     tree_maximum_render_width: usize,
 }
 
 impl<'a> DisplayableExecutionPlan<'a> {
+    fn default_metric_types() -> Vec<MetricType> {
+        vec![MetricType::SUMMARY, MetricType::DEV]
+    }
+
     /// Create a wrapper around an [`ExecutionPlan`] which can be
     /// pretty printed in a variety of ways
     pub fn new(inner: &'a dyn ExecutionPlan) -> Self {
@@ -133,6 +140,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
             show_metrics: ShowMetrics::None,
             show_statistics: false,
             show_schema: false,
+            metric_types: Self::default_metric_types(),
             tree_maximum_render_width: 240,
         }
     }
@@ -146,6 +154,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
             show_metrics: ShowMetrics::Aggregated,
             show_statistics: false,
             show_schema: false,
+            metric_types: Self::default_metric_types(),
             tree_maximum_render_width: 240,
         }
     }
@@ -159,6 +168,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
             show_metrics: ShowMetrics::Full,
             show_statistics: false,
             show_schema: false,
+            metric_types: Self::default_metric_types(),
             tree_maximum_render_width: 240,
         }
     }
@@ -178,6 +188,12 @@ impl<'a> DisplayableExecutionPlan<'a> {
         self
     }
 
+    /// Specify which metric types should be rendered alongside the plan
+    pub fn set_metric_types(mut self, metric_types: Vec<MetricType>) -> Self {
+        self.metric_types = metric_types;
+        self
+    }
+
     /// Set the maximum render width for the tree format
     pub fn set_tree_maximum_render_width(mut self, width: usize) -> Self {
         self.tree_maximum_render_width = width;
@@ -206,6 +222,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
             show_metrics: ShowMetrics,
             show_statistics: bool,
             show_schema: bool,
+            metric_types: Vec<MetricType>,
         }
         impl fmt::Display for Wrapper<'_> {
             fn fmt(&self, f: &mut Formatter) -> fmt::Result {
@@ -216,6 +233,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
                     show_metrics: self.show_metrics,
                     show_statistics: self.show_statistics,
                     show_schema: self.show_schema,
+                    metric_types: &self.metric_types,
                 };
                 accept(self.plan, &mut visitor)
             }
@@ -226,6 +244,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
             show_metrics: self.show_metrics,
             show_statistics: self.show_statistics,
             show_schema: self.show_schema,
+            metric_types: self.metric_types.clone(),
         }
     }
 
@@ -245,6 +264,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
             plan: &'a dyn ExecutionPlan,
             show_metrics: ShowMetrics,
             show_statistics: bool,
+            metric_types: Vec<MetricType>,
         }
         impl fmt::Display for Wrapper<'_> {
             fn fmt(&self, f: &mut Formatter) -> fmt::Result {
@@ -255,6 +275,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
                     t,
                     show_metrics: self.show_metrics,
                     show_statistics: self.show_statistics,
+                    metric_types: &self.metric_types,
                     graphviz_builder: GraphvizBuilder::default(),
                     parents: Vec::new(),
                 };
@@ -272,6 +293,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
             plan: self.inner,
             show_metrics: self.show_metrics,
             show_statistics: self.show_statistics,
+            metric_types: self.metric_types.clone(),
         }
     }
 
@@ -306,6 +328,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
             show_metrics: ShowMetrics,
             show_statistics: bool,
             show_schema: bool,
+            metric_types: Vec<MetricType>,
         }
 
         impl fmt::Display for Wrapper<'_> {
@@ -317,6 +340,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
                     show_metrics: self.show_metrics,
                     show_statistics: self.show_statistics,
                     show_schema: self.show_schema,
+                    metric_types: &self.metric_types,
                 };
                 visitor.pre_visit(self.plan)?;
                 Ok(())
@@ -328,6 +352,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
             show_metrics: self.show_metrics,
             show_statistics: self.show_statistics,
             show_schema: self.show_schema,
+            metric_types: self.metric_types.clone(),
         }
     }
 
@@ -382,6 +407,8 @@ struct IndentVisitor<'a, 'b> {
     show_statistics: bool,
     /// If schema should be displayed
     show_schema: bool,
+    /// Which metric types should be rendered
+    metric_types: &'a [MetricType],
 }
 
 impl ExecutionPlanVisitor for IndentVisitor<'_, '_> {
@@ -394,6 +421,7 @@ impl ExecutionPlanVisitor for IndentVisitor<'_, '_> {
             ShowMetrics::Aggregated => {
                 if let Some(metrics) = plan.metrics() {
                     let metrics = metrics
+                        .filter_by_metric_types(self.metric_types)
                         .aggregate_by_name()
                         .sorted_for_display()
                         .timestamps_removed();
@@ -405,6 +433,7 @@ impl ExecutionPlanVisitor for IndentVisitor<'_, '_> {
             }
             ShowMetrics::Full => {
                 if let Some(metrics) = plan.metrics() {
+                    let metrics = metrics.filter_by_metric_types(self.metric_types);
                     write!(self.f, ", metrics=[{metrics}]")?;
                 } else {
                     write!(self.f, ", metrics=[]")?;
@@ -441,6 +470,8 @@ struct GraphvizVisitor<'a, 'b> {
     show_metrics: ShowMetrics,
     /// If statistics should be displayed
     show_statistics: bool,
+    /// Which metric types should be rendered
+    metric_types: &'a [MetricType],
 
     graphviz_builder: GraphvizBuilder,
     /// Used to record parent node ids when visiting a plan.
@@ -478,6 +509,7 @@ impl ExecutionPlanVisitor for GraphvizVisitor<'_, '_> {
             ShowMetrics::Aggregated => {
                 if let Some(metrics) = plan.metrics() {
                     let metrics = metrics
+                        .filter_by_metric_types(self.metric_types)
                         .aggregate_by_name()
                         .sorted_for_display()
                         .timestamps_removed();
@@ -489,6 +521,7 @@ impl ExecutionPlanVisitor for GraphvizVisitor<'_, '_> {
             }
             ShowMetrics::Full => {
                 if let Some(metrics) = plan.metrics() {
+                    let metrics = metrics.filter_by_metric_types(self.metric_types);
                     format!("metrics=[{metrics}]")
                 } else {
                     "metrics=[]".to_string()
diff --git a/datafusion/physical-plan/src/joins/hash_join/exec.rs b/datafusion/physical-plan/src/joins/hash_join/exec.rs
index 4c293b0498e7..b5fe5ee5cda1 100644
--- a/datafusion/physical-plan/src/joins/hash_join/exec.rs
+++ b/datafusion/physical-plan/src/joins/hash_join/exec.rs
@@ -1137,7 +1137,7 @@ impl ExecutionPlan for HashJoinExec {
 
         // Add dynamic filters in Post phase if enabled
         if matches!(phase, FilterPushdownPhase::Post)
-            && config.optimizer.enable_dynamic_filter_pushdown
+            && config.optimizer.enable_join_dynamic_filter_pushdown
         {
             // Add actual dynamic filter to right side (probe side)
             let dynamic_filter = Self::create_dynamic_filter(&self.on);
diff --git a/datafusion/physical-plan/src/joins/hash_join/stream.rs b/datafusion/physical-plan/src/joins/hash_join/stream.rs
index adc00d9fe75e..88c50c2eb2ce 100644
--- a/datafusion/physical-plan/src/joins/hash_join/stream.rs
+++ b/datafusion/physical-plan/src/joins/hash_join/stream.rs
@@ -637,6 +637,7 @@ impl HashJoinStream {
         let (left_side, right_side) = get_final_indices_from_shared_bitmap(
             build_side.left_data.visited_indices_bitmap(),
             self.join_type,
+            true,
         );
         let empty_right_batch = RecordBatch::new_empty(self.right.schema());
         // use the left and right indices to produce the batch result
diff --git a/datafusion/physical-plan/src/joins/mod.rs b/datafusion/physical-plan/src/joins/mod.rs
index 1d36db996434..b0c28cf994f7 100644
--- a/datafusion/physical-plan/src/joins/mod.rs
+++ b/datafusion/physical-plan/src/joins/mod.rs
@@ -24,11 +24,13 @@ pub use hash_join::HashJoinExec;
 pub use nested_loop_join::NestedLoopJoinExec;
 use parking_lot::Mutex;
 // Note: SortMergeJoin is not used in plans yet
+pub use piecewise_merge_join::PiecewiseMergeJoinExec;
 pub use sort_merge_join::SortMergeJoinExec;
 pub use symmetric_hash_join::SymmetricHashJoinExec;
 mod cross_join;
 mod hash_join;
 mod nested_loop_join;
+mod piecewise_merge_join;
 mod sort_merge_join;
 mod stream_join_utils;
 mod symmetric_hash_join;
diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index 0974b3a9114e..7ae09a42de88 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -48,11 +48,15 @@ use crate::{
 
 use arrow::array::{
     new_null_array, Array, BooleanArray, BooleanBufferBuilder, RecordBatchOptions,
+    UInt64Array,
 };
 use arrow::buffer::BooleanBuffer;
-use arrow::compute::{concat_batches, filter, filter_record_batch, not, BatchCoalescer};
+use arrow::compute::{
+    concat_batches, filter, filter_record_batch, not, take, BatchCoalescer,
+};
 use arrow::datatypes::{Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
+use arrow_schema::DataType;
 use datafusion_common::cast::as_boolean_array;
 use datafusion_common::{
     arrow_err, internal_datafusion_err, internal_err, project_schema,
@@ -1661,11 +1665,30 @@ fn build_row_join_batch(
             // Broadcast the single build-side row to match the filtered
             // probe-side batch length
             let original_left_array = build_side_batch.column(column_index.index);
-            let scalar_value = ScalarValue::try_from_array(
-                original_left_array.as_ref(),
-                build_side_index,
-            )?;
-            scalar_value.to_array_of_size(filtered_probe_batch.num_rows())?
+            // Avoid using `ScalarValue::to_array_of_size()` for `List(Utf8View)` to avoid
+            // deep copies for buffers inside `Utf8View` array. See below for details.
+            // https://github.com/apache/datafusion/issues/18159
+            //
+            // In other cases, `to_array_of_size()` is faster.
+            match original_left_array.data_type() {
+                DataType::List(field) | DataType::LargeList(field)
+                    if field.data_type() == &DataType::Utf8View =>
+                {
+                    let indices_iter = std::iter::repeat_n(
+                        build_side_index as u64,
+                        filtered_probe_batch.num_rows(),
+                    );
+                    let indices_array = UInt64Array::from_iter_values(indices_iter);
+                    take(original_left_array.as_ref(), &indices_array, None)?
+                }
+                _ => {
+                    let scalar_value = ScalarValue::try_from_array(
+                        original_left_array.as_ref(),
+                        build_side_index,
+                    )?;
+                    scalar_value.to_array_of_size(filtered_probe_batch.num_rows())?
+                }
+            }
         } else {
             // Take the filtered probe-side column using compute::take
             Arc::clone(filtered_probe_batch.column(column_index.index))
diff --git a/datafusion/physical-plan/src/joins/piecewise_merge_join/classic_join.rs b/datafusion/physical-plan/src/joins/piecewise_merge_join/classic_join.rs
new file mode 100644
index 000000000000..646905e0d787
--- /dev/null
+++ b/datafusion/physical-plan/src/joins/piecewise_merge_join/classic_join.rs
@@ -0,0 +1,1550 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Stream Implementation for PiecewiseMergeJoin's Classic Join (Left, Right, Full, Inner)
+
+use arrow::array::{new_null_array, Array, PrimitiveBuilder};
+use arrow::compute::{take, BatchCoalescer};
+use arrow::datatypes::UInt32Type;
+use arrow::{
+    array::{ArrayRef, RecordBatch, UInt32Array},
+    compute::{sort_to_indices, take_record_batch},
+};
+use arrow_schema::{Schema, SchemaRef, SortOptions};
+use datafusion_common::NullEquality;
+use datafusion_common::{internal_err, Result};
+use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream};
+use datafusion_expr::{JoinType, Operator};
+use datafusion_physical_expr::PhysicalExprRef;
+use futures::{Stream, StreamExt};
+use std::{cmp::Ordering, task::ready};
+use std::{sync::Arc, task::Poll};
+
+use crate::handle_state;
+use crate::joins::piecewise_merge_join::exec::{BufferedSide, BufferedSideReadyState};
+use crate::joins::piecewise_merge_join::utils::need_produce_result_in_final;
+use crate::joins::utils::{compare_join_arrays, get_final_indices_from_shared_bitmap};
+use crate::joins::utils::{BuildProbeJoinMetrics, StatefulStreamResult};
+
+pub(super) enum PiecewiseMergeJoinStreamState {
+    WaitBufferedSide,
+    FetchStreamBatch,
+    ProcessStreamBatch(SortedStreamBatch),
+    ProcessUnmatched,
+    Completed,
+}
+
+impl PiecewiseMergeJoinStreamState {
+    // Grab mutable reference to the current stream batch
+    fn try_as_process_stream_batch_mut(&mut self) -> Result<&mut SortedStreamBatch> {
+        match self {
+            PiecewiseMergeJoinStreamState::ProcessStreamBatch(state) => Ok(state),
+            _ => internal_err!("Expected streamed batch in StreamBatch"),
+        }
+    }
+}
+
+/// The stream side incoming batch with required sort order.
+///
+/// Note the compare key in the join predicate might include expressions on the original
+/// columns, so we store the evaluated compare key separately.
+/// e.g. For join predicate `buffer.v1 < (stream.v1 + 1)`, the `compare_key_values` field stores
+/// the evaluated `stream.v1 + 1` array.
+pub(super) struct SortedStreamBatch {
+    pub batch: RecordBatch,
+    compare_key_values: Vec<ArrayRef>,
+}
+
+impl SortedStreamBatch {
+    #[allow(dead_code)]
+    fn new(batch: RecordBatch, compare_key_values: Vec<ArrayRef>) -> Self {
+        Self {
+            batch,
+            compare_key_values,
+        }
+    }
+
+    fn compare_key_values(&self) -> &Vec<ArrayRef> {
+        &self.compare_key_values
+    }
+}
+
+pub(super) struct ClassicPWMJStream {
+    // Output schema of the `PiecewiseMergeJoin`
+    pub schema: Arc<Schema>,
+
+    // Physical expression that is evaluated on the streamed side
+    // We do not need on_buffered as this is already evaluated when
+    // creating the buffered side which happens before initializing
+    // `PiecewiseMergeJoinStream`
+    pub on_streamed: PhysicalExprRef,
+    // Type of join
+    pub join_type: JoinType,
+    // Comparison operator
+    pub operator: Operator,
+    // Streamed batch
+    pub streamed: SendableRecordBatchStream,
+    // Streamed schema
+    streamed_schema: SchemaRef,
+    // Buffered side data
+    buffered_side: BufferedSide,
+    // Tracks the state of the `PiecewiseMergeJoin`
+    state: PiecewiseMergeJoinStreamState,
+    // Sort option for streamed side (specifies whether
+    // the sort is ascending or descending)
+    sort_option: SortOptions,
+    // Metrics for build + probe joins
+    join_metrics: BuildProbeJoinMetrics,
+    // Tracking incremental state for emitting record batches
+    batch_process_state: BatchProcessState,
+}
+
+impl RecordBatchStream for ClassicPWMJStream {
+    fn schema(&self) -> SchemaRef {
+        Arc::clone(&self.schema)
+    }
+}
+
+// `PiecewiseMergeJoinStreamState` is separated into `WaitBufferedSide`, `FetchStreamBatch`,
+// `ProcessStreamBatch`, `ProcessUnmatched` and `Completed`.
+//
+// Classic Joins
+//  1. `WaitBufferedSide` - Load in the buffered side data into memory.
+//  2. `FetchStreamBatch` -  Fetch + sort incoming stream batches. We switch the state to
+//     `Completed` if there are are still remaining partitions to process. It is only switched to
+//     `ExhaustedStreamBatch` if all partitions have been processed.
+//  3. `ProcessStreamBatch` - Compare stream batch row values against the buffered side data.
+//  4. `ExhaustedStreamBatch` - If the join type is Left or Inner we will return state as
+//      `Completed` however for Full and Right we will need to process the unmatched buffered rows.
+impl ClassicPWMJStream {
+    // Creates a new `PiecewiseMergeJoinStream` instance
+    #[allow(clippy::too_many_arguments)]
+    pub fn try_new(
+        schema: Arc<Schema>,
+        on_streamed: PhysicalExprRef,
+        join_type: JoinType,
+        operator: Operator,
+        streamed: SendableRecordBatchStream,
+        buffered_side: BufferedSide,
+        state: PiecewiseMergeJoinStreamState,
+        sort_option: SortOptions,
+        join_metrics: BuildProbeJoinMetrics,
+        batch_size: usize,
+    ) -> Self {
+        Self {
+            schema: Arc::clone(&schema),
+            on_streamed,
+            join_type,
+            operator,
+            streamed_schema: streamed.schema(),
+            streamed,
+            buffered_side,
+            state,
+            sort_option,
+            join_metrics,
+            batch_process_state: BatchProcessState::new(schema, batch_size),
+        }
+    }
+
+    fn poll_next_impl(
+        &mut self,
+        cx: &mut std::task::Context<'_>,
+    ) -> Poll<Option<Result<RecordBatch>>> {
+        loop {
+            return match self.state {
+                PiecewiseMergeJoinStreamState::WaitBufferedSide => {
+                    handle_state!(ready!(self.collect_buffered_side(cx)))
+                }
+                PiecewiseMergeJoinStreamState::FetchStreamBatch => {
+                    handle_state!(ready!(self.fetch_stream_batch(cx)))
+                }
+                PiecewiseMergeJoinStreamState::ProcessStreamBatch(_) => {
+                    handle_state!(self.process_stream_batch())
+                }
+                PiecewiseMergeJoinStreamState::ProcessUnmatched => {
+                    handle_state!(self.process_unmatched_buffered_batch())
+                }
+                PiecewiseMergeJoinStreamState::Completed => Poll::Ready(None),
+            };
+        }
+    }
+
+    // Collects buffered side data
+    fn collect_buffered_side(
+        &mut self,
+        cx: &mut std::task::Context<'_>,
+    ) -> Poll<Result<StatefulStreamResult<Option<RecordBatch>>>> {
+        let build_timer = self.join_metrics.build_time.timer();
+        let buffered_data = ready!(self
+            .buffered_side
+            .try_as_initial_mut()?
+            .buffered_fut
+            .get_shared(cx))?;
+        build_timer.done();
+
+        // We will start fetching stream batches for classic joins
+        self.state = PiecewiseMergeJoinStreamState::FetchStreamBatch;
+
+        self.buffered_side =
+            BufferedSide::Ready(BufferedSideReadyState { buffered_data });
+
+        Poll::Ready(Ok(StatefulStreamResult::Continue))
+    }
+
+    // Fetches incoming stream batches
+    fn fetch_stream_batch(
+        &mut self,
+        cx: &mut std::task::Context<'_>,
+    ) -> Poll<Result<StatefulStreamResult<Option<RecordBatch>>>> {
+        match ready!(self.streamed.poll_next_unpin(cx)) {
+            None => {
+                if self
+                    .buffered_side
+                    .try_as_ready_mut()?
+                    .buffered_data
+                    .remaining_partitions
+                    .fetch_sub(1, std::sync::atomic::Ordering::SeqCst)
+                    == 1
+                {
+                    self.batch_process_state.reset();
+                    self.state = PiecewiseMergeJoinStreamState::ProcessUnmatched;
+                } else {
+                    self.state = PiecewiseMergeJoinStreamState::Completed;
+                }
+            }
+            Some(Ok(batch)) => {
+                // Evaluate the streamed physical expression on the stream batch
+                let stream_values: ArrayRef = self
+                    .on_streamed
+                    .evaluate(&batch)?
+                    .into_array(batch.num_rows())?;
+
+                self.join_metrics.input_batches.add(1);
+                self.join_metrics.input_rows.add(batch.num_rows());
+
+                // Sort stream values and change the streamed record batch accordingly
+                let indices = sort_to_indices(
+                    stream_values.as_ref(),
+                    Some(self.sort_option),
+                    None,
+                )?;
+                let stream_batch = take_record_batch(&batch, &indices)?;
+                let stream_values = take(stream_values.as_ref(), &indices, None)?;
+
+                // Reset BatchProcessState before processing a new stream batch
+                self.batch_process_state.reset();
+                self.state = PiecewiseMergeJoinStreamState::ProcessStreamBatch(
+                    SortedStreamBatch {
+                        batch: stream_batch,
+                        compare_key_values: vec![stream_values],
+                    },
+                );
+            }
+            Some(Err(err)) => return Poll::Ready(Err(err)),
+        };
+
+        Poll::Ready(Ok(StatefulStreamResult::Continue))
+    }
+
+    // Only classic join will call. This function will process stream batches and evaluate against
+    // the buffered side data.
+    fn process_stream_batch(
+        &mut self,
+    ) -> Result<StatefulStreamResult<Option<RecordBatch>>> {
+        let buffered_side = self.buffered_side.try_as_ready_mut()?;
+        let stream_batch = self.state.try_as_process_stream_batch_mut()?;
+
+        if let Some(batch) = self
+            .batch_process_state
+            .output_batches
+            .next_completed_batch()
+        {
+            return Ok(StatefulStreamResult::Ready(Some(batch)));
+        }
+
+        // Produce more work
+        let batch = resolve_classic_join(
+            buffered_side,
+            stream_batch,
+            Arc::clone(&self.schema),
+            self.operator,
+            self.sort_option,
+            self.join_type,
+            &mut self.batch_process_state,
+        )?;
+
+        if !self.batch_process_state.continue_process {
+            // We finished scanning this stream batch.
+            self.batch_process_state
+                .output_batches
+                .finish_buffered_batch()?;
+            if let Some(b) = self
+                .batch_process_state
+                .output_batches
+                .next_completed_batch()
+            {
+                self.state = PiecewiseMergeJoinStreamState::FetchStreamBatch;
+                return Ok(StatefulStreamResult::Ready(Some(b)));
+            }
+
+            // Nothing pending; hand back whatever `resolve` returned (often empty) and move on.
+            if self.batch_process_state.output_batches.is_empty() {
+                self.state = PiecewiseMergeJoinStreamState::FetchStreamBatch;
+
+                return Ok(StatefulStreamResult::Ready(Some(batch)));
+            }
+        }
+
+        Ok(StatefulStreamResult::Ready(Some(batch)))
+    }
+
+    // Process remaining unmatched rows
+    fn process_unmatched_buffered_batch(
+        &mut self,
+    ) -> Result<StatefulStreamResult<Option<RecordBatch>>> {
+        // Return early for `JoinType::Right` and `JoinType::Inner`
+        if matches!(self.join_type, JoinType::Right | JoinType::Inner) {
+            self.state = PiecewiseMergeJoinStreamState::Completed;
+            return Ok(StatefulStreamResult::Ready(None));
+        }
+
+        if !self.batch_process_state.continue_process {
+            if let Some(batch) = self
+                .batch_process_state
+                .output_batches
+                .next_completed_batch()
+            {
+                return Ok(StatefulStreamResult::Ready(Some(batch)));
+            }
+
+            self.batch_process_state
+                .output_batches
+                .finish_buffered_batch()?;
+            if let Some(batch) = self
+                .batch_process_state
+                .output_batches
+                .next_completed_batch()
+            {
+                self.state = PiecewiseMergeJoinStreamState::Completed;
+                return Ok(StatefulStreamResult::Ready(Some(batch)));
+            }
+        }
+
+        let buffered_data =
+            Arc::clone(&self.buffered_side.try_as_ready().unwrap().buffered_data);
+
+        let (buffered_indices, _streamed_indices) = get_final_indices_from_shared_bitmap(
+            &buffered_data.visited_indices_bitmap,
+            self.join_type,
+            true,
+        );
+
+        let new_buffered_batch =
+            take_record_batch(buffered_data.batch(), &buffered_indices)?;
+        let mut buffered_columns = new_buffered_batch.columns().to_vec();
+
+        let streamed_columns: Vec<ArrayRef> = self
+            .streamed_schema
+            .fields()
+            .iter()
+            .map(|f| new_null_array(f.data_type(), new_buffered_batch.num_rows()))
+            .collect();
+
+        buffered_columns.extend(streamed_columns);
+
+        let batch = RecordBatch::try_new(Arc::clone(&self.schema), buffered_columns)?;
+
+        self.batch_process_state.output_batches.push_batch(batch)?;
+
+        self.batch_process_state.continue_process = false;
+        if let Some(batch) = self
+            .batch_process_state
+            .output_batches
+            .next_completed_batch()
+        {
+            return Ok(StatefulStreamResult::Ready(Some(batch)));
+        }
+
+        self.batch_process_state
+            .output_batches
+            .finish_buffered_batch()?;
+        if let Some(batch) = self
+            .batch_process_state
+            .output_batches
+            .next_completed_batch()
+        {
+            self.state = PiecewiseMergeJoinStreamState::Completed;
+            return Ok(StatefulStreamResult::Ready(Some(batch)));
+        }
+
+        self.state = PiecewiseMergeJoinStreamState::Completed;
+        self.batch_process_state.reset();
+        Ok(StatefulStreamResult::Ready(None))
+    }
+}
+
+struct BatchProcessState {
+    // Used to pick up from the last index on the stream side
+    output_batches: Box<BatchCoalescer>,
+    // Used to store the unmatched stream indices for `JoinType::Right` and `JoinType::Full`
+    unmatched_indices: PrimitiveBuilder<UInt32Type>,
+    // Used to store the start index on the buffered side; used to resume processing on the correct
+    // row
+    start_buffer_idx: usize,
+    // Used to store the start index on the stream side; used to resume processing on the correct
+    // row
+    start_stream_idx: usize,
+    // Signals if we found a match for the current stream row
+    found: bool,
+    // Signals to continue processing the current stream batch
+    continue_process: bool,
+    // Skip nulls
+    processed_null_count: bool,
+}
+
+impl BatchProcessState {
+    pub(crate) fn new(schema: Arc<Schema>, batch_size: usize) -> Self {
+        Self {
+            output_batches: Box::new(BatchCoalescer::new(schema, batch_size)),
+            unmatched_indices: PrimitiveBuilder::new(),
+            start_buffer_idx: 0,
+            start_stream_idx: 0,
+            found: false,
+            continue_process: true,
+            processed_null_count: false,
+        }
+    }
+
+    pub(crate) fn reset(&mut self) {
+        self.unmatched_indices = PrimitiveBuilder::new();
+        self.start_buffer_idx = 0;
+        self.start_stream_idx = 0;
+        self.found = false;
+        self.continue_process = true;
+        self.processed_null_count = false;
+    }
+}
+
+impl Stream for ClassicPWMJStream {
+    type Item = Result<RecordBatch>;
+
+    fn poll_next(
+        mut self: std::pin::Pin<&mut Self>,
+        cx: &mut std::task::Context<'_>,
+    ) -> Poll<Option<Self::Item>> {
+        self.poll_next_impl(cx)
+    }
+}
+
+// For Left, Right, Full, and Inner joins, incoming stream batches will already be sorted.
+#[allow(clippy::too_many_arguments)]
+fn resolve_classic_join(
+    buffered_side: &mut BufferedSideReadyState,
+    stream_batch: &SortedStreamBatch,
+    join_schema: Arc<Schema>,
+    operator: Operator,
+    sort_options: SortOptions,
+    join_type: JoinType,
+    batch_process_state: &mut BatchProcessState,
+) -> Result<RecordBatch> {
+    let buffered_len = buffered_side.buffered_data.values().len();
+    let stream_values = stream_batch.compare_key_values();
+
+    let mut buffer_idx = batch_process_state.start_buffer_idx;
+    let mut stream_idx = batch_process_state.start_stream_idx;
+
+    if !batch_process_state.processed_null_count {
+        let buffered_null_idx = buffered_side.buffered_data.values().null_count();
+        let stream_null_idx = stream_values[0].null_count();
+        buffer_idx = buffered_null_idx;
+        stream_idx = stream_null_idx;
+        batch_process_state.processed_null_count = true;
+    }
+
+    // Our buffer_idx variable allows us to start probing on the buffered side where we last matched
+    // in the previous stream row.
+    for row_idx in stream_idx..stream_batch.batch.num_rows() {
+        while buffer_idx < buffered_len {
+            let compare = {
+                let buffered_values = buffered_side.buffered_data.values();
+                compare_join_arrays(
+                    &[Arc::clone(&stream_values[0])],
+                    row_idx,
+                    &[Arc::clone(buffered_values)],
+                    buffer_idx,
+                    &[sort_options],
+                    NullEquality::NullEqualsNothing,
+                )?
+            };
+
+            // If we find a match we append all indices and move to the next stream row index
+            match operator {
+                Operator::Gt | Operator::Lt => {
+                    if matches!(compare, Ordering::Less) {
+                        batch_process_state.found = true;
+                        let count = buffered_len - buffer_idx;
+
+                        let batch = build_matched_indices_and_set_buffered_bitmap(
+                            (buffer_idx, count),
+                            (row_idx, count),
+                            buffered_side,
+                            stream_batch,
+                            join_type,
+                            Arc::clone(&join_schema),
+                        )?;
+
+                        batch_process_state.output_batches.push_batch(batch)?;
+
+                        // Flush batch and update pointers if we have a completed batch
+                        if let Some(batch) =
+                            batch_process_state.output_batches.next_completed_batch()
+                        {
+                            batch_process_state.found = false;
+                            batch_process_state.start_buffer_idx = buffer_idx;
+                            batch_process_state.start_stream_idx = row_idx + 1;
+                            return Ok(batch);
+                        }
+
+                        break;
+                    }
+                }
+                Operator::GtEq | Operator::LtEq => {
+                    if matches!(compare, Ordering::Equal | Ordering::Less) {
+                        batch_process_state.found = true;
+                        let count = buffered_len - buffer_idx;
+                        let batch = build_matched_indices_and_set_buffered_bitmap(
+                            (buffer_idx, count),
+                            (row_idx, count),
+                            buffered_side,
+                            stream_batch,
+                            join_type,
+                            Arc::clone(&join_schema),
+                        )?;
+
+                        // Flush batch and update pointers if we have a completed batch
+                        batch_process_state.output_batches.push_batch(batch)?;
+                        if let Some(batch) =
+                            batch_process_state.output_batches.next_completed_batch()
+                        {
+                            batch_process_state.found = false;
+                            batch_process_state.start_buffer_idx = buffer_idx;
+                            batch_process_state.start_stream_idx = row_idx + 1;
+                            return Ok(batch);
+                        }
+
+                        break;
+                    }
+                }
+                _ => {
+                    return internal_err!(
+                        "PiecewiseMergeJoin should not contain operator, {}",
+                        operator
+                    )
+                }
+            };
+
+            // Increment buffer_idx after every row
+            buffer_idx += 1;
+        }
+
+        // If a match was not found for the current stream row index the stream indice is appended
+        // to the unmatched indices to be flushed later.
+        if matches!(join_type, JoinType::Right | JoinType::Full)
+            && !batch_process_state.found
+        {
+            batch_process_state
+                .unmatched_indices
+                .append_value(row_idx as u32);
+        }
+
+        batch_process_state.found = false;
+    }
+
+    // Flushed all unmatched indices on the streamed side
+    if matches!(join_type, JoinType::Right | JoinType::Full) {
+        let batch = create_unmatched_batch(
+            &mut batch_process_state.unmatched_indices,
+            stream_batch,
+            Arc::clone(&join_schema),
+        )?;
+
+        batch_process_state.output_batches.push_batch(batch)?;
+    }
+
+    batch_process_state.continue_process = false;
+    Ok(RecordBatch::new_empty(Arc::clone(&join_schema)))
+}
+
+// Builds a record batch from indices ranges on the buffered and streamed side.
+//
+// The two ranges are: buffered_range: (start index, count) and streamed_range: (start index, count) due
+// to batch.slice(start, count).
+fn build_matched_indices_and_set_buffered_bitmap(
+    buffered_range: (usize, usize),
+    streamed_range: (usize, usize),
+    buffered_side: &mut BufferedSideReadyState,
+    stream_batch: &SortedStreamBatch,
+    join_type: JoinType,
+    join_schema: Arc<Schema>,
+) -> Result<RecordBatch> {
+    // Mark the buffered indices as visited
+    if need_produce_result_in_final(join_type) {
+        let mut bitmap = buffered_side.buffered_data.visited_indices_bitmap.lock();
+        for i in buffered_range.0..buffered_range.0 + buffered_range.1 {
+            bitmap.set_bit(i, true);
+        }
+    }
+
+    let new_buffered_batch = buffered_side
+        .buffered_data
+        .batch()
+        .slice(buffered_range.0, buffered_range.1);
+    let mut buffered_columns = new_buffered_batch.columns().to_vec();
+
+    let indices = UInt32Array::from_value(streamed_range.0 as u32, streamed_range.1);
+    let new_stream_batch = take_record_batch(&stream_batch.batch, &indices)?;
+    let streamed_columns = new_stream_batch.columns().to_vec();
+
+    buffered_columns.extend(streamed_columns);
+
+    Ok(RecordBatch::try_new(
+        Arc::clone(&join_schema),
+        buffered_columns,
+    )?)
+}
+
+// Creates a record batch from the unmatched indices on the streamed side
+fn create_unmatched_batch(
+    streamed_indices: &mut PrimitiveBuilder<UInt32Type>,
+    stream_batch: &SortedStreamBatch,
+    join_schema: Arc<Schema>,
+) -> Result<RecordBatch> {
+    let streamed_indices = streamed_indices.finish();
+    let new_stream_batch = take_record_batch(&stream_batch.batch, &streamed_indices)?;
+    let streamed_columns = new_stream_batch.columns().to_vec();
+    let buffered_cols_len = join_schema.fields().len() - streamed_columns.len();
+
+    let num_rows = new_stream_batch.num_rows();
+    let mut buffered_columns: Vec<ArrayRef> = join_schema
+        .fields()
+        .iter()
+        .take(buffered_cols_len)
+        .map(|field| new_null_array(field.data_type(), num_rows))
+        .collect();
+
+    buffered_columns.extend(streamed_columns);
+
+    Ok(RecordBatch::try_new(
+        Arc::clone(&join_schema),
+        buffered_columns,
+    )?)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::{
+        common,
+        joins::PiecewiseMergeJoinExec,
+        test::{build_table_i32, TestMemoryExec},
+        ExecutionPlan,
+    };
+    use arrow::array::{Date32Array, Date64Array};
+    use arrow_schema::{DataType, Field};
+    use datafusion_common::test_util::batches_to_string;
+    use datafusion_execution::TaskContext;
+    use datafusion_expr::JoinType;
+    use datafusion_physical_expr::{expressions::Column, PhysicalExpr};
+    use insta::assert_snapshot;
+    use std::sync::Arc;
+
+    fn columns(schema: &Schema) -> Vec<String> {
+        schema.fields().iter().map(|f| f.name().clone()).collect()
+    }
+
+    fn build_table(
+        a: (&str, &Vec<i32>),
+        b: (&str, &Vec<i32>),
+        c: (&str, &Vec<i32>),
+    ) -> Arc<dyn ExecutionPlan> {
+        let batch = build_table_i32(a, b, c);
+        let schema = batch.schema();
+        TestMemoryExec::try_new_exec(&[vec![batch]], schema, None).unwrap()
+    }
+
+    fn build_date_table(
+        a: (&str, &Vec<i32>),
+        b: (&str, &Vec<i32>),
+        c: (&str, &Vec<i32>),
+    ) -> Arc<dyn ExecutionPlan> {
+        let schema = Schema::new(vec![
+            Field::new(a.0, DataType::Date32, false),
+            Field::new(b.0, DataType::Date32, false),
+            Field::new(c.0, DataType::Date32, false),
+        ]);
+
+        let batch = RecordBatch::try_new(
+            Arc::new(schema),
+            vec![
+                Arc::new(Date32Array::from(a.1.clone())),
+                Arc::new(Date32Array::from(b.1.clone())),
+                Arc::new(Date32Array::from(c.1.clone())),
+            ],
+        )
+        .unwrap();
+
+        let schema = batch.schema();
+        TestMemoryExec::try_new_exec(&[vec![batch]], schema, None).unwrap()
+    }
+
+    fn build_date64_table(
+        a: (&str, &Vec<i64>),
+        b: (&str, &Vec<i64>),
+        c: (&str, &Vec<i64>),
+    ) -> Arc<dyn ExecutionPlan> {
+        let schema = Schema::new(vec![
+            Field::new(a.0, DataType::Date64, false),
+            Field::new(b.0, DataType::Date64, false),
+            Field::new(c.0, DataType::Date64, false),
+        ]);
+
+        let batch = RecordBatch::try_new(
+            Arc::new(schema),
+            vec![
+                Arc::new(Date64Array::from(a.1.clone())),
+                Arc::new(Date64Array::from(b.1.clone())),
+                Arc::new(Date64Array::from(c.1.clone())),
+            ],
+        )
+        .unwrap();
+
+        let schema = batch.schema();
+        TestMemoryExec::try_new_exec(&[vec![batch]], schema, None).unwrap()
+    }
+
+    fn join(
+        left: Arc<dyn ExecutionPlan>,
+        right: Arc<dyn ExecutionPlan>,
+        on: (Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>),
+        operator: Operator,
+        join_type: JoinType,
+    ) -> Result<PiecewiseMergeJoinExec> {
+        PiecewiseMergeJoinExec::try_new(left, right, on, operator, join_type, 1)
+    }
+
+    async fn join_collect(
+        left: Arc<dyn ExecutionPlan>,
+        right: Arc<dyn ExecutionPlan>,
+        on: (PhysicalExprRef, PhysicalExprRef),
+        operator: Operator,
+        join_type: JoinType,
+    ) -> Result<(Vec<String>, Vec<RecordBatch>)> {
+        join_collect_with_options(left, right, on, operator, join_type).await
+    }
+
+    async fn join_collect_with_options(
+        left: Arc<dyn ExecutionPlan>,
+        right: Arc<dyn ExecutionPlan>,
+        on: (PhysicalExprRef, PhysicalExprRef),
+        operator: Operator,
+        join_type: JoinType,
+    ) -> Result<(Vec<String>, Vec<RecordBatch>)> {
+        let task_ctx = Arc::new(TaskContext::default());
+        let join = join(left, right, on, operator, join_type)?;
+        let columns = columns(&join.schema());
+
+        let stream = join.execute(0, task_ctx)?;
+        let batches = common::collect(stream).await?;
+        Ok((columns, batches))
+    }
+
+    #[tokio::test]
+    async fn join_inner_less_than() -> Result<()> {
+        // +----+----+----+
+        // | a1 | b1 | c1 |
+        // +----+----+----+
+        // | 1  | 3  | 7  |
+        // | 2  | 2  | 8  |
+        // | 3  | 1  | 9  |
+        // +----+----+----+
+        let left = build_table(
+            ("a1", &vec![1, 2, 3]),
+            ("b1", &vec![3, 2, 1]), // this has a repetition
+            ("c1", &vec![7, 8, 9]),
+        );
+
+        // +----+----+----+
+        // | a2 | b1 | c2 |
+        // +----+----+----+
+        // | 10 | 2  | 70 |
+        // | 20 | 3  | 80 |
+        // | 30 | 4  | 90 |
+        // +----+----+----+
+        let right = build_table(
+            ("a2", &vec![10, 20, 30]),
+            ("b1", &vec![2, 3, 4]),
+            ("c2", &vec![70, 80, 90]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::Lt, JoinType::Inner).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +----+----+----+----+----+----+
+        | a1 | b1 | c1 | a2 | b1 | c2 |
+        +----+----+----+----+----+----+
+        | 1  | 3  | 7  | 30 | 4  | 90 |
+        | 2  | 2  | 8  | 30 | 4  | 90 |
+        | 3  | 1  | 9  | 30 | 4  | 90 |
+        | 2  | 2  | 8  | 20 | 3  | 80 |
+        | 3  | 1  | 9  | 20 | 3  | 80 |
+        | 3  | 1  | 9  | 10 | 2  | 70 |
+        +----+----+----+----+----+----+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_inner_less_than_unsorted() -> Result<()> {
+        // +----+----+----+
+        // | a1 | b1 | c1 |
+        // +----+----+----+
+        // | 1  | 3  | 7  |
+        // | 2  | 2  | 8  |
+        // | 3  | 1  | 9  |
+        // +----+----+----+
+        let left = build_table(
+            ("a1", &vec![1, 2, 3]),
+            ("b1", &vec![3, 2, 1]), // this has a repetition
+            ("c1", &vec![7, 8, 9]),
+        );
+
+        // +----+----+----+
+        // | a2 | b1 | c2 |
+        // +----+----+----+
+        // | 10 | 3  | 70 |
+        // | 20 | 2  | 80 |
+        // | 30 | 4  | 90 |
+        // +----+----+----+
+        let right = build_table(
+            ("a2", &vec![10, 20, 30]),
+            ("b1", &vec![3, 2, 4]),
+            ("c2", &vec![70, 80, 90]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::Lt, JoinType::Inner).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+            +----+----+----+----+----+----+
+            | a1 | b1 | c1 | a2 | b1 | c2 |
+            +----+----+----+----+----+----+
+            | 1  | 3  | 7  | 30 | 4  | 90 |
+            | 2  | 2  | 8  | 30 | 4  | 90 |
+            | 3  | 1  | 9  | 30 | 4  | 90 |
+            | 2  | 2  | 8  | 10 | 3  | 70 |
+            | 3  | 1  | 9  | 10 | 3  | 70 |
+            | 3  | 1  | 9  | 20 | 2  | 80 |
+            +----+----+----+----+----+----+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_inner_greater_than_equal_to() -> Result<()> {
+        // +----+----+----+
+        // | a1 | b1 | c1 |
+        // +----+----+----+
+        // | 1  | 2  | 7  |
+        // | 2  | 3  | 8  |
+        // | 3  | 4  | 9  |
+        // +----+----+----+
+        let left = build_table(
+            ("a1", &vec![1, 2, 3]),
+            ("b1", &vec![2, 3, 4]),
+            ("c1", &vec![7, 8, 9]),
+        );
+
+        // +----+----+----+
+        // | a2 | b1 | c2 |
+        // +----+----+----+
+        // | 10 | 3  | 70 |
+        // | 20 | 2  | 80 |
+        // | 30 | 1  | 90 |
+        // +----+----+----+
+        let right = build_table(
+            ("a2", &vec![10, 20, 30]),
+            ("b1", &vec![3, 2, 1]),
+            ("c2", &vec![70, 80, 90]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::GtEq, JoinType::Inner).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +----+----+----+----+----+----+
+        | a1 | b1 | c1 | a2 | b1 | c2 |
+        +----+----+----+----+----+----+
+        | 1  | 2  | 7  | 30 | 1  | 90 |
+        | 2  | 3  | 8  | 30 | 1  | 90 |
+        | 3  | 4  | 9  | 30 | 1  | 90 |
+        | 1  | 2  | 7  | 20 | 2  | 80 |
+        | 2  | 3  | 8  | 20 | 2  | 80 |
+        | 3  | 4  | 9  | 20 | 2  | 80 |
+        | 2  | 3  | 8  | 10 | 3  | 70 |
+        | 3  | 4  | 9  | 10 | 3  | 70 |
+        +----+----+----+----+----+----+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_inner_empty_left() -> Result<()> {
+        // +----+----+----+
+        // | a1 | b1 | c1 |
+        // +----+----+----+
+        // (empty)
+        // +----+----+----+
+        let left = build_table(
+            ("a1", &Vec::<i32>::new()),
+            ("b1", &Vec::<i32>::new()),
+            ("c1", &Vec::<i32>::new()),
+        );
+
+        // +----+----+----+
+        // | a2 | b1 | c2 |
+        // +----+----+----+
+        // | 1  | 1  | 1  |
+        // | 2  | 2  | 2  |
+        // +----+----+----+
+        let right = build_table(
+            ("a2", &vec![1, 2]),
+            ("b1", &vec![1, 2]),
+            ("c2", &vec![1, 2]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+        let (_, batches) =
+            join_collect(left, right, on, Operator::LtEq, JoinType::Inner).await?;
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +----+----+----+----+----+----+
+        | a1 | b1 | c1 | a2 | b1 | c2 |
+        +----+----+----+----+----+----+
+        +----+----+----+----+----+----+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_full_greater_than_equal_to() -> Result<()> {
+        // +----+----+-----+
+        // | a1 | b1 | c1  |
+        // +----+----+-----+
+        // | 1  | 1  | 100 |
+        // | 2  | 2  | 200 |
+        // +----+----+-----+
+        let left = build_table(
+            ("a1", &vec![1, 2]),
+            ("b1", &vec![1, 2]),
+            ("c1", &vec![100, 200]),
+        );
+
+        // +----+----+-----+
+        // | a2 | b1 | c2  |
+        // +----+----+-----+
+        // | 10 | 3  | 300 |
+        // | 20 | 2  | 400 |
+        // +----+----+-----+
+        let right = build_table(
+            ("a2", &vec![10, 20]),
+            ("b1", &vec![3, 2]),
+            ("c2", &vec![300, 400]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::GtEq, JoinType::Full).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +----+----+-----+----+----+-----+
+        | a1 | b1 | c1  | a2 | b1 | c2  |
+        +----+----+-----+----+----+-----+
+        | 2  | 2  | 200 | 20 | 2  | 400 |
+        |    |    |     | 10 | 3  | 300 |
+        | 1  | 1  | 100 |    |    |     |
+        +----+----+-----+----+----+-----+
+        "#);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_left_greater_than() -> Result<()> {
+        // +----+----+----+
+        // | a1 | b1 | c1 |
+        // +----+----+----+
+        // | 1  | 1  | 7  |
+        // | 2  | 3  | 8  |
+        // | 3  | 4  | 9  |
+        // +----+----+----+
+        let left = build_table(
+            ("a1", &vec![1, 2, 3]),
+            ("b1", &vec![1, 3, 4]),
+            ("c1", &vec![7, 8, 9]),
+        );
+
+        // +----+----+----+
+        // | a2 | b1 | c2 |
+        // +----+----+----+
+        // | 10 | 3  | 70 |
+        // | 20 | 2  | 80 |
+        // | 30 | 1  | 90 |
+        // +----+----+----+
+        let right = build_table(
+            ("a2", &vec![10, 20, 30]),
+            ("b1", &vec![3, 2, 1]),
+            ("c2", &vec![70, 80, 90]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::Gt, JoinType::Left).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +----+----+----+----+----+----+
+        | a1 | b1 | c1 | a2 | b1 | c2 |
+        +----+----+----+----+----+----+
+        | 2  | 3  | 8  | 30 | 1  | 90 |
+        | 3  | 4  | 9  | 30 | 1  | 90 |
+        | 2  | 3  | 8  | 20 | 2  | 80 |
+        | 3  | 4  | 9  | 20 | 2  | 80 |
+        | 3  | 4  | 9  | 10 | 3  | 70 |
+        | 1  | 1  | 7  |    |    |    |
+        +----+----+----+----+----+----+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_right_greater_than() -> Result<()> {
+        // +----+----+----+
+        // | a1 | b1 | c1 |
+        // +----+----+----+
+        // | 1  | 1  | 7  |
+        // | 2  | 3  | 8  |
+        // | 3  | 4  | 9  |
+        // +----+----+----+
+        let left = build_table(
+            ("a1", &vec![1, 2, 3]),
+            ("b1", &vec![1, 3, 4]),
+            ("c1", &vec![7, 8, 9]),
+        );
+
+        // +----+----+----+
+        // | a2 | b1 | c2 |
+        // +----+----+----+
+        // | 10 | 5  | 70 |
+        // | 20 | 3  | 80 |
+        // | 30 | 2  | 90 |
+        // +----+----+----+
+        let right = build_table(
+            ("a2", &vec![10, 20, 30]),
+            ("b1", &vec![5, 3, 2]),
+            ("c2", &vec![70, 80, 90]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::Gt, JoinType::Right).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +----+----+----+----+----+----+
+        | a1 | b1 | c1 | a2 | b1 | c2 |
+        +----+----+----+----+----+----+
+        | 2  | 3  | 8  | 30 | 2  | 90 |
+        | 3  | 4  | 9  | 30 | 2  | 90 |
+        | 3  | 4  | 9  | 20 | 3  | 80 |
+        |    |    |    | 10 | 5  | 70 |
+        +----+----+----+----+----+----+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_right_less_than() -> Result<()> {
+        // +----+----+----+
+        // | a1 | b1 | c1 |
+        // +----+----+----+
+        // | 1  | 4  | 7  |
+        // | 2  | 3  | 8  |
+        // | 3  | 1  | 9  |
+        // +----+----+----+
+        let left = build_table(
+            ("a1", &vec![1, 2, 3]),
+            ("b1", &vec![4, 3, 1]),
+            ("c1", &vec![7, 8, 9]),
+        );
+
+        // +----+----+----+
+        // | a2 | b1 | c2 |
+        // +----+----+----+
+        // | 10 | 2  | 70 |
+        // | 20 | 3  | 80 |
+        // | 30 | 5  | 90 |
+        // +----+----+----+
+        let right = build_table(
+            ("a2", &vec![10, 20, 30]),
+            ("b1", &vec![2, 3, 5]),
+            ("c2", &vec![70, 80, 90]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::Lt, JoinType::Right).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +----+----+----+----+----+----+
+        | a1 | b1 | c1 | a2 | b1 | c2 |
+        +----+----+----+----+----+----+
+        | 1  | 4  | 7  | 30 | 5  | 90 |
+        | 2  | 3  | 8  | 30 | 5  | 90 |
+        | 3  | 1  | 9  | 30 | 5  | 90 |
+        | 3  | 1  | 9  | 20 | 3  | 80 |
+        | 3  | 1  | 9  | 10 | 2  | 70 |
+        +----+----+----+----+----+----+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_inner_less_than_equal_with_dups() -> Result<()> {
+        // +----+----+----+
+        // | a1 | b1 | c1 |
+        // +----+----+----+
+        // | 1  | 4  | 7  |
+        // | 2  | 4  | 8  |
+        // | 3  | 2  | 9  |
+        // +----+----+----+
+        let left = build_table(
+            ("a1", &vec![1, 2, 3]),
+            ("b1", &vec![4, 4, 2]),
+            ("c1", &vec![7, 8, 9]),
+        );
+
+        // +----+----+----+
+        // | a2 | b1 | c2 |
+        // +----+----+----+
+        // | 10 | 4  | 70 |
+        // | 20 | 3  | 80 |
+        // | 30 | 2  | 90 |
+        // +----+----+----+
+        let right = build_table(
+            ("a2", &vec![10, 20, 30]),
+            ("b1", &vec![4, 3, 2]),
+            ("c2", &vec![70, 80, 90]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::LtEq, JoinType::Inner).await?;
+
+        // Expected grouping follows right.b1 descending (4, 3, 2)
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +----+----+----+----+----+----+
+        | a1 | b1 | c1 | a2 | b1 | c2 |
+        +----+----+----+----+----+----+
+        | 1  | 4  | 7  | 10 | 4  | 70 |
+        | 2  | 4  | 8  | 10 | 4  | 70 |
+        | 3  | 2  | 9  | 10 | 4  | 70 |
+        | 3  | 2  | 9  | 20 | 3  | 80 |
+        | 3  | 2  | 9  | 30 | 2  | 90 |
+        +----+----+----+----+----+----+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_inner_greater_than_unsorted_right() -> Result<()> {
+        // +----+----+----+
+        // | a1 | b1 | c1 |
+        // +----+----+----+
+        // | 1  | 1  | 7  |
+        // | 2  | 2  | 8  |
+        // | 3  | 4  | 9  |
+        // +----+----+----+
+        let left = build_table(
+            ("a1", &vec![1, 2, 3]),
+            ("b1", &vec![1, 2, 4]),
+            ("c1", &vec![7, 8, 9]),
+        );
+
+        // +----+----+----+
+        // | a2 | b1 | c2 |
+        // +----+----+----+
+        // | 10 | 3  | 70 |
+        // | 20 | 1  | 80 |
+        // | 30 | 2  | 90 |
+        // +----+----+----+
+        let right = build_table(
+            ("a2", &vec![10, 20, 30]),
+            ("b1", &vec![3, 1, 2]),
+            ("c2", &vec![70, 80, 90]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::Gt, JoinType::Inner).await?;
+
+        // Grouped by right in ascending evaluation for > (1,2,3)
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +----+----+----+----+----+----+
+        | a1 | b1 | c1 | a2 | b1 | c2 |
+        +----+----+----+----+----+----+
+        | 2  | 2  | 8  | 20 | 1  | 80 |
+        | 3  | 4  | 9  | 20 | 1  | 80 |
+        | 3  | 4  | 9  | 30 | 2  | 90 |
+        | 3  | 4  | 9  | 10 | 3  | 70 |
+        +----+----+----+----+----+----+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_left_less_than_equal_with_left_nulls_on_no_match() -> Result<()> {
+        // +----+----+----+
+        // | a1 | b1 | c1 |
+        // +----+----+----+
+        // | 1  | 5  | 7  |
+        // | 2  | 4  | 8  |
+        // | 3  | 1  | 9  |
+        // +----+----+----+
+        let left = build_table(
+            ("a1", &vec![1, 2, 3]),
+            ("b1", &vec![5, 4, 1]),
+            ("c1", &vec![7, 8, 9]),
+        );
+
+        // +----+----+----+
+        // | a2 | b1 | c2 |
+        // +----+----+----+
+        // | 10 | 3  | 70 |
+        // +----+----+----+
+        let right = build_table(("a2", &vec![10]), ("b1", &vec![3]), ("c2", &vec![70]));
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::LtEq, JoinType::Left).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +----+----+----+----+----+----+
+        | a1 | b1 | c1 | a2 | b1 | c2 |
+        +----+----+----+----+----+----+
+        | 3  | 1  | 9  | 10 | 3  | 70 |
+        | 1  | 5  | 7  |    |    |    |
+        | 2  | 4  | 8  |    |    |    |
+        +----+----+----+----+----+----+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_right_greater_than_equal_with_right_nulls_on_no_match() -> Result<()> {
+        // +----+----+----+
+        // | a1 | b1 | c1 |
+        // +----+----+----+
+        // | 1  | 1  | 7  |
+        // | 2  | 2  | 8  |
+        // +----+----+----+
+        let left = build_table(
+            ("a1", &vec![1, 2]),
+            ("b1", &vec![1, 2]),
+            ("c1", &vec![7, 8]),
+        );
+
+        // +----+----+----+
+        // | a2 | b1 | c2 |
+        // +----+----+----+
+        // | 10 | 3  | 70 |
+        // | 20 | 5  | 80 |
+        // +----+----+----+
+        let right = build_table(
+            ("a2", &vec![10, 20]),
+            ("b1", &vec![3, 5]),
+            ("c2", &vec![70, 80]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::GtEq, JoinType::Right).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +----+----+----+----+----+----+
+        | a1 | b1 | c1 | a2 | b1 | c2 |
+        +----+----+----+----+----+----+
+        |    |    |    | 10 | 3  | 70 |
+        |    |    |    | 20 | 5  | 80 |
+        +----+----+----+----+----+----+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_inner_single_row_left_less_than() -> Result<()> {
+        let left = build_table(("a1", &vec![42]), ("b1", &vec![5]), ("c1", &vec![999]));
+
+        let right = build_table(
+            ("a2", &vec![10, 20, 30]),
+            ("b1", &vec![1, 5, 7]),
+            ("c2", &vec![70, 80, 90]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::Lt, JoinType::Inner).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +----+----+-----+----+----+----+
+        | a1 | b1 | c1  | a2 | b1 | c2 |
+        +----+----+-----+----+----+----+
+        | 42 | 5  | 999 | 30 | 7  | 90 |
+        +----+----+-----+----+----+----+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_inner_empty_right() -> Result<()> {
+        let left = build_table(
+            ("a1", &vec![1, 2, 3]),
+            ("b1", &vec![1, 2, 3]),
+            ("c1", &vec![7, 8, 9]),
+        );
+
+        let right = build_table(
+            ("a2", &Vec::<i32>::new()),
+            ("b1", &Vec::<i32>::new()),
+            ("c2", &Vec::<i32>::new()),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::Gt, JoinType::Inner).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +----+----+----+----+----+----+
+        | a1 | b1 | c1 | a2 | b1 | c2 |
+        +----+----+----+----+----+----+
+        +----+----+----+----+----+----+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_date32_inner_less_than() -> Result<()> {
+        // +----+-------+----+
+        // | a1 |  b1   | c1 |
+        // +----+-------+----+
+        // | 1  | 19107 | 7  |
+        // | 2  | 19107 | 8  |
+        // | 3  | 19105 | 9  |
+        // +----+-------+----+
+        let left = build_date_table(
+            ("a1", &vec![1, 2, 3]),
+            ("b1", &vec![19107, 19107, 19105]),
+            ("c1", &vec![7, 8, 9]),
+        );
+
+        // +----+-------+----+
+        // | a2 |  b1   | c2 |
+        // +----+-------+----+
+        // | 10 | 19105 | 70 |
+        // | 20 | 19103 | 80 |
+        // | 30 | 19107 | 90 |
+        // +----+-------+----+
+        let right = build_date_table(
+            ("a2", &vec![10, 20, 30]),
+            ("b1", &vec![19105, 19103, 19107]),
+            ("c2", &vec![70, 80, 90]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::Lt, JoinType::Inner).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+    +------------+------------+------------+------------+------------+------------+
+    | a1         | b1         | c1         | a2         | b1         | c2         |
+    +------------+------------+------------+------------+------------+------------+
+    | 1970-01-04 | 2022-04-23 | 1970-01-10 | 1970-01-31 | 2022-04-25 | 1970-04-01 |
+    +------------+------------+------------+------------+------------+------------+
+    "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_date64_inner_less_than() -> Result<()> {
+        // +----+---------------+----+
+        // | a1 |     b1        | c1 |
+        // +----+---------------+----+
+        // | 1  | 1650903441000 |  7 |
+        // | 2  | 1650903441000 |  8 |
+        // | 3  | 1650703441000 |  9 |
+        // +----+---------------+----+
+        let left = build_date64_table(
+            ("a1", &vec![1, 2, 3]),
+            ("b1", &vec![1650903441000, 1650903441000, 1650703441000]),
+            ("c1", &vec![7, 8, 9]),
+        );
+
+        // +----+---------------+----+
+        // | a2 |     b1        | c2 |
+        // +----+---------------+----+
+        // | 10 | 1650703441000 | 70 |
+        // | 20 | 1650503441000 | 80 |
+        // | 30 | 1650903441000 | 90 |
+        // +----+---------------+----+
+        let right = build_date64_table(
+            ("a2", &vec![10, 20, 30]),
+            ("b1", &vec![1650703441000, 1650503441000, 1650903441000]),
+            ("c2", &vec![70, 80, 90]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::Lt, JoinType::Inner).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +-------------------------+---------------------+-------------------------+-------------------------+---------------------+-------------------------+
+        | a1                      | b1                  | c1                      | a2                      | b1                  | c2                      |
+        +-------------------------+---------------------+-------------------------+-------------------------+---------------------+-------------------------+
+        | 1970-01-01T00:00:00.003 | 2022-04-23T08:44:01 | 1970-01-01T00:00:00.009 | 1970-01-01T00:00:00.030 | 2022-04-25T16:17:21 | 1970-01-01T00:00:00.090 |
+        +-------------------------+---------------------+-------------------------+-------------------------+---------------------+-------------------------+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_date64_right_less_than() -> Result<()> {
+        // +----+---------------+----+
+        // | a1 |     b1        | c1 |
+        // +----+---------------+----+
+        // | 1  | 1650903441000 |  7 |
+        // | 2  | 1650703441000 |  8 |
+        // +----+---------------+----+
+        let left = build_date64_table(
+            ("a1", &vec![1, 2]),
+            ("b1", &vec![1650903441000, 1650703441000]),
+            ("c1", &vec![7, 8]),
+        );
+
+        // +----+---------------+----+
+        // | a2 |     b1        | c2 |
+        // +----+---------------+----+
+        // | 10 | 1650703441000 | 80 |
+        // | 20 | 1650903441000 | 90 |
+        // +----+---------------+----+
+        let right = build_date64_table(
+            ("a2", &vec![10, 20]),
+            ("b1", &vec![1650703441000, 1650903441000]),
+            ("c2", &vec![80, 90]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::Lt, JoinType::Right).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+    +-------------------------+---------------------+-------------------------+-------------------------+---------------------+-------------------------+
+    | a1                      | b1                  | c1                      | a2                      | b1                  | c2                      |
+    +-------------------------+---------------------+-------------------------+-------------------------+---------------------+-------------------------+
+    | 1970-01-01T00:00:00.002 | 2022-04-23T08:44:01 | 1970-01-01T00:00:00.008 | 1970-01-01T00:00:00.020 | 2022-04-25T16:17:21 | 1970-01-01T00:00:00.090 |
+    |                         |                     |                         | 1970-01-01T00:00:00.010 | 2022-04-23T08:44:01 | 1970-01-01T00:00:00.080 |
+    +-------------------------+---------------------+-------------------------+-------------------------+---------------------+-------------------------+
+"#);
+        Ok(())
+    }
+}
diff --git a/datafusion/physical-plan/src/joins/piecewise_merge_join/exec.rs b/datafusion/physical-plan/src/joins/piecewise_merge_join/exec.rs
new file mode 100644
index 000000000000..987f3e9df45a
--- /dev/null
+++ b/datafusion/physical-plan/src/joins/piecewise_merge_join/exec.rs
@@ -0,0 +1,748 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::Array;
+use arrow::{
+    array::{ArrayRef, BooleanBufferBuilder, RecordBatch},
+    compute::concat_batches,
+    util::bit_util,
+};
+use arrow_schema::{SchemaRef, SortOptions};
+use datafusion_common::not_impl_err;
+use datafusion_common::{internal_err, JoinSide, Result};
+use datafusion_execution::{
+    memory_pool::{MemoryConsumer, MemoryReservation},
+    SendableRecordBatchStream,
+};
+use datafusion_expr::{JoinType, Operator};
+use datafusion_physical_expr::equivalence::join_equivalence_properties;
+use datafusion_physical_expr::{
+    Distribution, LexOrdering, OrderingRequirements, PhysicalExpr, PhysicalExprRef,
+    PhysicalSortExpr,
+};
+use datafusion_physical_expr_common::physical_expr::fmt_sql;
+use futures::TryStreamExt;
+use parking_lot::Mutex;
+use std::fmt::Formatter;
+use std::sync::atomic::AtomicUsize;
+use std::sync::Arc;
+
+use crate::execution_plan::{boundedness_from_children, EmissionType};
+
+use crate::joins::piecewise_merge_join::classic_join::{
+    ClassicPWMJStream, PiecewiseMergeJoinStreamState,
+};
+use crate::joins::piecewise_merge_join::utils::{
+    build_visited_indices_map, is_existence_join, is_right_existence_join,
+};
+use crate::joins::utils::asymmetric_join_output_partitioning;
+use crate::{
+    joins::{
+        utils::{build_join_schema, BuildProbeJoinMetrics, OnceAsync, OnceFut},
+        SharedBitmapBuilder,
+    },
+    metrics::ExecutionPlanMetricsSet,
+    spill::get_record_batch_memory_size,
+    ExecutionPlan, PlanProperties,
+};
+use crate::{DisplayAs, DisplayFormatType, ExecutionPlanProperties};
+
+/// `PiecewiseMergeJoinExec` is a join execution plan that only evaluates single range filter and show much
+/// better performance for these workloads than `NestedLoopJoin`
+///
+/// The physical planner will choose to evaluate this join when there is only one comparison filter. This
+/// is a binary expression which contains [`Operator::Lt`], [`Operator::LtEq`], [`Operator::Gt`], and
+/// [`Operator::GtEq`].:
+/// Examples:
+///  - `col0` < `colb`, `col0` <= `colb`, `col0` > `colb`, `col0` >= `colb`
+///
+/// # Execution Plan Inputs
+/// For `PiecewiseMergeJoin` we label all right inputs as the `streamed' side and the left outputs as the
+/// 'buffered' side.
+///
+/// `PiecewiseMergeJoin` takes a sorted input for the side to be buffered and is able to sort streamed record
+/// batches during processing. Sorted input must specifically be ascending/descending based on the operator.
+///
+/// # Algorithms
+/// Classic joins are processed differently compared to existence joins.
+///
+/// ## Classic Joins (Inner, Full, Left, Right)
+/// For classic joins we buffer the build side and stream the probe side (the "probe" side).
+/// Both sides are sorted so that we can iterate from index 0 to the end on each side.  This ordering ensures
+/// that when we find the first matching pair of rows, we can emit the current stream row joined with all remaining
+/// probe rows from the match position onward, without rescanning earlier probe rows.
+///  
+/// For `<` and `<=` operators, both inputs are sorted in **descending** order, while for `>` and `>=` operators
+/// they are sorted in **ascending** order. This choice ensures that the pointer on the buffered side can advance
+/// monotonically as we stream new batches from the stream side.
+///
+/// The streamed side may arrive unsorted, so this operator sorts each incoming batch in memory before
+/// processing. The buffered side is required to be globally sorted; the plan declares this requirement
+/// in `requires_input_order`, which allows the optimizer to automatically insert a `SortExec` on that side if needed.
+/// By the time this operator runs, the buffered side is guaranteed to be in the proper order.
+///
+/// The pseudocode for the algorithm looks like this:
+///
+/// ```text
+/// for stream_row in stream_batch:
+///     for buffer_row in buffer_batch:
+///         if compare(stream_row, probe_row):
+///             output stream_row X buffer_batch[buffer_row:]
+///         else:
+///             continue
+/// ```
+///
+/// The algorithm uses the streamed side (larger) to drive the loop. This is due to every row on the stream side iterating
+/// the buffered side to find every first match. By doing this, each match can output more result so that output
+/// handling can be better vectorized for performance.
+///
+/// Here is an example:
+///
+/// We perform a `JoinType::Left` with these two batches and the operator being `Operator::Lt`(<). For each
+/// row on the streamed side we move a pointer on the buffered until it matches the condition. Once we reach
+/// the row which matches (in this case with row 1 on streamed will have its first match on row 2 on
+/// buffered; 100 < 200 is true), we can emit all rows after that match. We can emit the rows like this because
+/// if the batch is sorted in ascending order, every subsequent row will also satisfy the condition as they will
+/// all be larger values.
+///
+/// ```text
+/// SQL statement:
+/// SELECT *
+/// FROM (VALUES (100), (200), (500)) AS streamed(a)
+/// LEFT JOIN (VALUES (100), (200), (200), (300), (400)) AS buffered(b)
+///   ON streamed.a < buffered.b;
+///
+/// Processing Row 1:
+///
+///       Sorted Buffered Side                                         Sorted Streamed Side          
+///       ┌──────────────────┐                                         ┌──────────────────┐         
+///     1 │       100        │                                       1 │       100        │        
+///       ├──────────────────┤                                         ├──────────────────┤         
+///     2 │       200        │ ─┐                                    2 │       200        │        
+///       ├──────────────────┤  │  For row 1 on streamed side with     ├──────────────────┤         
+///     3 │       200        │  │  value 100, we emit rows 2 - 5.    3 │       500        │       
+///       ├──────────────────┤  │  as matches when the operator is     └──────────────────┘
+///     4 │       300        │  │  `Operator::Lt` (<) Emitting all
+///       ├──────────────────┤  │  rows after the first match (row
+///     5 │       400        │ ─┘  2 buffered side; 100 < 200)
+///       └──────────────────┘     
+///
+/// Processing Row 2:
+///   By sorting the streamed side we know
+///
+///       Sorted Buffered Side                                         Sorted Streamed Side          
+///       ┌──────────────────┐                                         ┌──────────────────┐         
+///     1 │       100        │                                       1 │       100        │        
+///       ├──────────────────┤                                         ├──────────────────┤         
+///     2 │       200        │ <- Start here when probing for the    2 │       200        │        
+///       ├──────────────────┤    streamed side row 2.                 ├──────────────────┤         
+///     3 │       200        │                                       3 │       500        │       
+///       ├──────────────────┤                                         └──────────────────┘
+///     4 │       300        │  
+///       ├──────────────────┤  
+///     5 │       400        │
+///       └──────────────────┘     
+///
+/// ```
+///
+/// ## Existence Joins (Semi, Anti, Mark)
+/// Existence joins are made magnitudes of times faster with a `PiecewiseMergeJoin` as we only need to find
+/// the min/max value of the streamed side to be able to emit all matches on the buffered side. By putting
+/// the side we need to mark onto the sorted buffer side, we can emit all these matches at once.
+///
+/// For less than operations (`<`) both inputs are to be sorted in descending order and vice versa for greater
+/// than (`>`) operations. `SortExec` is used to enforce sorting on the buffered side and streamed side does not
+/// need to be sorted due to only needing to find the min/max.
+///
+/// For Left Semi, Anti, and Mark joins we swap the inputs so that the marked side is on the buffered side.
+///
+/// The pseudocode for the algorithm looks like this:
+///
+/// ```text
+/// // Using the example of a less than `<` operation
+/// let max = max_batch(streamed_batch)
+///
+/// for buffer_row in buffer_batch:
+///     if buffer_row < max:
+///         output buffer_batch[buffer_row:]
+/// ```
+///
+/// Only need to find the min/max value and iterate through the buffered side once.
+///
+/// Here is an example:
+/// We perform a `JoinType::LeftSemi` with these two batches and the operator being `Operator::Lt`(<). Because
+/// the operator is `Operator::Lt` we can find the minimum value in the streamed side; in this case it is 200.
+/// We can then advance a pointer from the start of the buffer side until we find the first value that satisfies
+/// the predicate. All rows after that first matched value satisfy the condition 200 < x so we can mark all of
+/// those rows as matched.
+///
+/// ```text
+/// SQL statement:
+/// SELECT *
+/// FROM (VALUES (500), (200), (300)) AS streamed(a)
+/// LEFT SEMI JOIN (VALUES (100), (200), (200), (300), (400)) AS buffered(b)
+///   ON streamed.a < buffered.b;
+///
+///          Sorted Buffered Side             Unsorted Streamed Side
+///            ┌──────────────────┐          ┌──────────────────┐
+///          1 │       100        │        1 │       500        │
+///            ├──────────────────┤          ├──────────────────┤
+///          2 │       200        │        2 │       200        │
+///            ├──────────────────┤          ├──────────────────┤    
+///          3 │       200        │        3 │       300        │
+///            ├──────────────────┤          └──────────────────┘
+///          4 │       300        │ ─┐       
+///            ├──────────────────┤  | We emit matches for row 4 - 5
+///          5 │       400        │ ─┘ on the buffered side.
+///            └──────────────────┘
+///             min value: 200
+/// ```
+///
+/// For both types of joins, the buffered side must be sorted ascending for `Operator::Lt` (<) or
+/// `Operator::LtEq` (<=) and descending for `Operator::Gt` (>) or `Operator::GtEq` (>=).
+///
+/// # Partitioning Logic
+/// Piecewise Merge Join requires one buffered side partition + round robin partitioned stream side. A counter
+/// is used in the buffered side to coordinate when all streamed partitions are finished execution. This allows
+/// for processing the rest of the unmatched rows for Left and Full joins. The last partition that finishes
+/// execution will be responsible for outputting the unmatched rows.
+///
+/// # Performance Explanation (cost)
+/// Piecewise Merge Join is used over Nested Loop Join due to its superior performance. Here is the breakdown:
+///
+/// R: Buffered Side
+/// S: Streamed Side
+///
+/// ## Piecewise Merge Join (PWMJ)
+///
+/// # Classic Join:
+/// Requires sorting the probe side and, for each probe row, scanning the buffered side until the first match
+/// is found.
+///     Complexity: `O(sort(S) + num_of_batches(|S|) * scan(R))`.
+///
+/// # Mark Join:
+/// Sorts the probe side, then computes the min/max range of the probe keys and scans the buffered side only
+/// within that range.  
+///   Complexity: `O(|S| + scan(R[range]))`.
+///
+/// ## Nested Loop Join
+/// Compares every row from `S` with every row from `R`.  
+///   Complexity: `O(|S| * |R|)`.
+///
+/// ## Nested Loop Join
+///   Always going to be probe (O(S) * O(R)).
+///
+/// # Further Reference Material
+/// DuckDB blog on Range Joins: [Range Joins in DuckDB](https://duckdb.org/2022/05/27/iejoin.html)
+#[derive(Debug)]
+pub struct PiecewiseMergeJoinExec {
+    /// Left buffered execution plan
+    pub buffered: Arc<dyn ExecutionPlan>,
+    /// Right streamed execution plan
+    pub streamed: Arc<dyn ExecutionPlan>,
+    /// The two expressions being compared
+    pub on: (Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>),
+    /// Comparison operator in the range predicate
+    pub operator: Operator,
+    /// How the join is performed
+    pub join_type: JoinType,
+    /// The schema once the join is applied
+    schema: SchemaRef,
+    /// Buffered data
+    buffered_fut: OnceAsync<BufferedSideData>,
+    /// Execution metrics
+    metrics: ExecutionPlanMetricsSet,
+
+    /// Sort expressions - See above for more details [`PiecewiseMergeJoinExec`]
+    ///
+    /// The left sort order, descending for `<`, `<=` operations + ascending for `>`, `>=` operations
+    left_child_plan_required_order: LexOrdering,
+    /// The right sort order, descending for `<`, `<=` operations + ascending for `>`, `>=` operations
+    /// Unsorted for mark joins
+    #[allow(unused)]
+    right_batch_required_orders: LexOrdering,
+
+    /// This determines the sort order of all join columns used in sorting the stream and buffered execution plans.
+    sort_options: SortOptions,
+    /// Cache holding plan properties like equivalences, output partitioning etc.
+    cache: PlanProperties,
+    /// Number of partitions to process
+    num_partitions: usize,
+}
+
+impl PiecewiseMergeJoinExec {
+    pub fn try_new(
+        buffered: Arc<dyn ExecutionPlan>,
+        streamed: Arc<dyn ExecutionPlan>,
+        on: (Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>),
+        operator: Operator,
+        join_type: JoinType,
+        num_partitions: usize,
+    ) -> Result<Self> {
+        // TODO: Implement existence joins for PiecewiseMergeJoin
+        if is_existence_join(join_type) {
+            return not_impl_err!(
+                "Existence Joins are currently not supported for PiecewiseMergeJoin"
+            );
+        }
+
+        // Take the operator and enforce a sort order on the streamed + buffered side based on
+        // the operator type.
+        let sort_options = match operator {
+            Operator::Lt | Operator::LtEq => {
+                // For left existence joins the inputs will be swapped so the sort
+                // options are switched
+                if is_right_existence_join(join_type) {
+                    SortOptions::new(false, true)
+                } else {
+                    SortOptions::new(true, true)
+                }
+            }
+            Operator::Gt | Operator::GtEq => {
+                if is_right_existence_join(join_type) {
+                    SortOptions::new(true, true)
+                } else {
+                    SortOptions::new(false, true)
+                }
+            }
+            _ => {
+                return internal_err!(
+                    "Cannot contain non-range operator in PiecewiseMergeJoinExec"
+                )
+            }
+        };
+
+        // Give the same `sort_option for comparison later`
+        let left_child_plan_required_order =
+            vec![PhysicalSortExpr::new(Arc::clone(&on.0), sort_options)];
+        let right_batch_required_orders =
+            vec![PhysicalSortExpr::new(Arc::clone(&on.1), sort_options)];
+
+        let Some(left_child_plan_required_order) =
+            LexOrdering::new(left_child_plan_required_order)
+        else {
+            return internal_err!(
+                "PiecewiseMergeJoinExec requires valid sort expressions for its left side"
+            );
+        };
+        let Some(right_batch_required_orders) =
+            LexOrdering::new(right_batch_required_orders)
+        else {
+            return internal_err!(
+                "PiecewiseMergeJoinExec requires valid sort expressions for its right side"
+            );
+        };
+
+        let buffered_schema = buffered.schema();
+        let streamed_schema = streamed.schema();
+
+        // Create output schema for the join
+        let schema =
+            Arc::new(build_join_schema(&buffered_schema, &streamed_schema, &join_type).0);
+        let cache = Self::compute_properties(
+            &buffered,
+            &streamed,
+            Arc::clone(&schema),
+            join_type,
+            &on,
+        )?;
+
+        Ok(Self {
+            streamed,
+            buffered,
+            on,
+            operator,
+            join_type,
+            schema,
+            buffered_fut: Default::default(),
+            metrics: ExecutionPlanMetricsSet::new(),
+            left_child_plan_required_order,
+            right_batch_required_orders,
+            sort_options,
+            cache,
+            num_partitions,
+        })
+    }
+
+    /// Reference to buffered side execution plan
+    pub fn buffered(&self) -> &Arc<dyn ExecutionPlan> {
+        &self.buffered
+    }
+
+    /// Reference to streamed side execution plan
+    pub fn streamed(&self) -> &Arc<dyn ExecutionPlan> {
+        &self.streamed
+    }
+
+    /// Join type
+    pub fn join_type(&self) -> JoinType {
+        self.join_type
+    }
+
+    /// Reference to sort options
+    pub fn sort_options(&self) -> &SortOptions {
+        &self.sort_options
+    }
+
+    /// Get probe side (streamed side) for the PiecewiseMergeJoin
+    /// In current implementation, probe side is determined according to join type.
+    pub fn probe_side(join_type: &JoinType) -> JoinSide {
+        match join_type {
+            JoinType::Right
+            | JoinType::Inner
+            | JoinType::Full
+            | JoinType::RightSemi
+            | JoinType::RightAnti
+            | JoinType::RightMark => JoinSide::Right,
+            JoinType::Left
+            | JoinType::LeftAnti
+            | JoinType::LeftSemi
+            | JoinType::LeftMark => JoinSide::Left,
+        }
+    }
+
+    pub fn compute_properties(
+        buffered: &Arc<dyn ExecutionPlan>,
+        streamed: &Arc<dyn ExecutionPlan>,
+        schema: SchemaRef,
+        join_type: JoinType,
+        join_on: &(PhysicalExprRef, PhysicalExprRef),
+    ) -> Result<PlanProperties> {
+        let eq_properties = join_equivalence_properties(
+            buffered.equivalence_properties().clone(),
+            streamed.equivalence_properties().clone(),
+            &join_type,
+            schema,
+            &Self::maintains_input_order(join_type),
+            Some(Self::probe_side(&join_type)),
+            std::slice::from_ref(join_on),
+        )?;
+
+        let output_partitioning =
+            asymmetric_join_output_partitioning(buffered, streamed, &join_type)?;
+
+        Ok(PlanProperties::new(
+            eq_properties,
+            output_partitioning,
+            EmissionType::Incremental,
+            boundedness_from_children([buffered, streamed]),
+        ))
+    }
+
+    // TODO: Add input order. Now they're all `false` indicating it will not maintain the input order.
+    // However, for certain join types the order is maintained. This can be updated in the future after
+    // more testing.
+    fn maintains_input_order(join_type: JoinType) -> Vec<bool> {
+        match join_type {
+            // The existence side is expected to come in sorted
+            JoinType::LeftSemi | JoinType::LeftAnti | JoinType::LeftMark => {
+                vec![false, false]
+            }
+            JoinType::RightSemi | JoinType::RightAnti | JoinType::RightMark => {
+                vec![false, false]
+            }
+            // Left, Right, Full, Inner Join is not guaranteed to maintain
+            // input order as the streamed side will be sorted during
+            // execution for `PiecewiseMergeJoin`
+            _ => vec![false, false],
+        }
+    }
+
+    // TODO
+    pub fn swap_inputs(&self) -> Result<Arc<dyn ExecutionPlan>> {
+        todo!()
+    }
+}
+
+impl ExecutionPlan for PiecewiseMergeJoinExec {
+    fn name(&self) -> &str {
+        "PiecewiseMergeJoinExec"
+    }
+
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.cache
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![&self.buffered, &self.streamed]
+    }
+
+    fn required_input_distribution(&self) -> Vec<Distribution> {
+        vec![
+            Distribution::SinglePartition,
+            Distribution::UnspecifiedDistribution,
+        ]
+    }
+
+    fn required_input_ordering(&self) -> Vec<Option<OrderingRequirements>> {
+        // Existence joins don't need to be sorted on one side.
+        if is_right_existence_join(self.join_type) {
+            unimplemented!()
+        } else {
+            // Sort the right side in memory, so we do not need to enforce any sorting
+            vec![
+                Some(OrderingRequirements::from(
+                    self.left_child_plan_required_order.clone(),
+                )),
+                None,
+            ]
+        }
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        match &children[..] {
+            [left, right] => Ok(Arc::new(PiecewiseMergeJoinExec::try_new(
+                Arc::clone(left),
+                Arc::clone(right),
+                self.on.clone(),
+                self.operator,
+                self.join_type,
+                self.num_partitions,
+            )?)),
+            _ => internal_err!(
+                "PiecewiseMergeJoin should have 2 children, found {}",
+                children.len()
+            ),
+        }
+    }
+
+    fn execute(
+        &self,
+        partition: usize,
+        context: Arc<datafusion_execution::TaskContext>,
+    ) -> Result<SendableRecordBatchStream> {
+        let on_buffered = Arc::clone(&self.on.0);
+        let on_streamed = Arc::clone(&self.on.1);
+
+        let metrics = BuildProbeJoinMetrics::new(partition, &self.metrics);
+        let buffered_fut = self.buffered_fut.try_once(|| {
+            let reservation = MemoryConsumer::new("PiecewiseMergeJoinInput")
+                .register(context.memory_pool());
+
+            let buffered_stream = self.buffered.execute(0, Arc::clone(&context))?;
+            Ok(build_buffered_data(
+                buffered_stream,
+                Arc::clone(&on_buffered),
+                metrics.clone(),
+                reservation,
+                build_visited_indices_map(self.join_type),
+                self.num_partitions,
+            ))
+        })?;
+
+        let streamed = self.streamed.execute(partition, Arc::clone(&context))?;
+
+        let batch_size = context.session_config().batch_size();
+
+        // TODO: Add existence joins + this is guarded at physical planner
+        if is_existence_join(self.join_type()) {
+            unreachable!()
+        } else {
+            Ok(Box::pin(ClassicPWMJStream::try_new(
+                Arc::clone(&self.schema),
+                on_streamed,
+                self.join_type,
+                self.operator,
+                streamed,
+                BufferedSide::Initial(BufferedSideInitialState { buffered_fut }),
+                PiecewiseMergeJoinStreamState::WaitBufferedSide,
+                self.sort_options,
+                metrics,
+                batch_size,
+            )))
+        }
+    }
+}
+
+impl DisplayAs for PiecewiseMergeJoinExec {
+    fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> std::fmt::Result {
+        let on_str = format!(
+            "({} {} {})",
+            fmt_sql(self.on.0.as_ref()),
+            self.operator,
+            fmt_sql(self.on.1.as_ref())
+        );
+
+        match t {
+            DisplayFormatType::Default | DisplayFormatType::Verbose => {
+                write!(
+                    f,
+                    "PiecewiseMergeJoin: operator={:?}, join_type={:?}, on={}",
+                    self.operator, self.join_type, on_str
+                )
+            }
+
+            DisplayFormatType::TreeRender => {
+                writeln!(f, "operator={:?}", self.operator)?;
+                if self.join_type != JoinType::Inner {
+                    writeln!(f, "join_type={:?}", self.join_type)?;
+                }
+                writeln!(f, "on={on_str}")
+            }
+        }
+    }
+}
+
+async fn build_buffered_data(
+    buffered: SendableRecordBatchStream,
+    on_buffered: PhysicalExprRef,
+    metrics: BuildProbeJoinMetrics,
+    reservation: MemoryReservation,
+    build_map: bool,
+    remaining_partitions: usize,
+) -> Result<BufferedSideData> {
+    let schema = buffered.schema();
+
+    // Combine batches and record number of rows
+    let initial = (Vec::new(), 0, metrics, reservation);
+    let (batches, num_rows, metrics, mut reservation) = buffered
+        .try_fold(initial, |mut acc, batch| async {
+            let batch_size = get_record_batch_memory_size(&batch);
+            acc.3.try_grow(batch_size)?;
+            acc.2.build_mem_used.add(batch_size);
+            acc.2.build_input_batches.add(1);
+            acc.2.build_input_rows.add(batch.num_rows());
+            // Update row count
+            acc.1 += batch.num_rows();
+            // Push batch to output
+            acc.0.push(batch);
+            Ok(acc)
+        })
+        .await?;
+
+    let single_batch = concat_batches(&schema, batches.iter())?;
+
+    // Evaluate physical expression on the buffered side.
+    let buffered_values = on_buffered
+        .evaluate(&single_batch)?
+        .into_array(single_batch.num_rows())?;
+
+    // We add the single batch size + the memory of the join keys
+    // size of the size estimation
+    let size_estimation = get_record_batch_memory_size(&single_batch)
+        + buffered_values.get_array_memory_size();
+    reservation.try_grow(size_estimation)?;
+    metrics.build_mem_used.add(size_estimation);
+
+    // Created visited indices bitmap only if the join type requires it
+    let visited_indices_bitmap = if build_map {
+        let bitmap_size = bit_util::ceil(single_batch.num_rows(), 8);
+        reservation.try_grow(bitmap_size)?;
+        metrics.build_mem_used.add(bitmap_size);
+
+        let mut bitmap_buffer = BooleanBufferBuilder::new(single_batch.num_rows());
+        bitmap_buffer.append_n(num_rows, false);
+        bitmap_buffer
+    } else {
+        BooleanBufferBuilder::new(0)
+    };
+
+    let buffered_data = BufferedSideData::new(
+        single_batch,
+        buffered_values,
+        Mutex::new(visited_indices_bitmap),
+        remaining_partitions,
+        reservation,
+    );
+
+    Ok(buffered_data)
+}
+
+pub(super) struct BufferedSideData {
+    pub(super) batch: RecordBatch,
+    values: ArrayRef,
+    pub(super) visited_indices_bitmap: SharedBitmapBuilder,
+    pub(super) remaining_partitions: AtomicUsize,
+    _reservation: MemoryReservation,
+}
+
+impl BufferedSideData {
+    pub(super) fn new(
+        batch: RecordBatch,
+        values: ArrayRef,
+        visited_indices_bitmap: SharedBitmapBuilder,
+        remaining_partitions: usize,
+        reservation: MemoryReservation,
+    ) -> Self {
+        Self {
+            batch,
+            values,
+            visited_indices_bitmap,
+            remaining_partitions: AtomicUsize::new(remaining_partitions),
+            _reservation: reservation,
+        }
+    }
+
+    pub(super) fn batch(&self) -> &RecordBatch {
+        &self.batch
+    }
+
+    pub(super) fn values(&self) -> &ArrayRef {
+        &self.values
+    }
+}
+
+pub(super) enum BufferedSide {
+    /// Indicates that build-side not collected yet
+    Initial(BufferedSideInitialState),
+    /// Indicates that build-side data has been collected
+    Ready(BufferedSideReadyState),
+}
+
+impl BufferedSide {
+    // Takes a mutable state of the buffered row batches
+    pub(super) fn try_as_initial_mut(&mut self) -> Result<&mut BufferedSideInitialState> {
+        match self {
+            BufferedSide::Initial(state) => Ok(state),
+            _ => internal_err!("Expected build side in initial state"),
+        }
+    }
+
+    pub(super) fn try_as_ready(&self) -> Result<&BufferedSideReadyState> {
+        match self {
+            BufferedSide::Ready(state) => Ok(state),
+            _ => {
+                internal_err!("Expected build side in ready state")
+            }
+        }
+    }
+
+    /// Tries to extract BuildSideReadyState from BuildSide enum.
+    /// Returns an error if state is not Ready.
+    pub(super) fn try_as_ready_mut(&mut self) -> Result<&mut BufferedSideReadyState> {
+        match self {
+            BufferedSide::Ready(state) => Ok(state),
+            _ => internal_err!("Expected build side in ready state"),
+        }
+    }
+}
+
+pub(super) struct BufferedSideInitialState {
+    pub(crate) buffered_fut: OnceFut<BufferedSideData>,
+}
+
+pub(super) struct BufferedSideReadyState {
+    /// Collected build-side data
+    pub(super) buffered_data: Arc<BufferedSideData>,
+}
diff --git a/datafusion/physical-plan/src/joins/piecewise_merge_join/mod.rs b/datafusion/physical-plan/src/joins/piecewise_merge_join/mod.rs
new file mode 100644
index 000000000000..c85a7cc16f65
--- /dev/null
+++ b/datafusion/physical-plan/src/joins/piecewise_merge_join/mod.rs
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! PiecewiseMergeJoin is currently experimental
+
+pub use exec::PiecewiseMergeJoinExec;
+
+mod classic_join;
+mod exec;
+mod utils;
diff --git a/datafusion/physical-plan/src/joins/piecewise_merge_join/utils.rs b/datafusion/physical-plan/src/joins/piecewise_merge_join/utils.rs
new file mode 100644
index 000000000000..5bbb496322b5
--- /dev/null
+++ b/datafusion/physical-plan/src/joins/piecewise_merge_join/utils.rs
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion_expr::JoinType;
+
+// Returns boolean for whether the join is a right existence join
+pub(super) fn is_right_existence_join(join_type: JoinType) -> bool {
+    matches!(
+        join_type,
+        JoinType::RightAnti | JoinType::RightSemi | JoinType::RightMark
+    )
+}
+
+// Returns boolean for whether the join is an existence join
+pub(super) fn is_existence_join(join_type: JoinType) -> bool {
+    matches!(
+        join_type,
+        JoinType::LeftAnti
+            | JoinType::RightAnti
+            | JoinType::LeftSemi
+            | JoinType::RightSemi
+            | JoinType::LeftMark
+            | JoinType::RightMark
+    )
+}
+
+// Returns boolean to check if the join type needs to record
+// buffered side matches for classic joins
+pub(super) fn need_produce_result_in_final(join_type: JoinType) -> bool {
+    matches!(join_type, JoinType::Full | JoinType::Left)
+}
+
+// Returns boolean for whether or not we need to build the buffered side
+// bitmap for marking matched rows on the buffered side.
+pub(super) fn build_visited_indices_map(join_type: JoinType) -> bool {
+    matches!(
+        join_type,
+        JoinType::Full
+            | JoinType::Left
+            | JoinType::LeftAnti
+            | JoinType::RightAnti
+            | JoinType::LeftSemi
+            | JoinType::RightSemi
+            | JoinType::LeftMark
+            | JoinType::RightMark
+    )
+}
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs b/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs
index 879f47638d2c..5a2e3669ab5e 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs
@@ -34,7 +34,7 @@ use std::sync::Arc;
 use std::task::{Context, Poll};
 
 use crate::joins::sort_merge_join::metrics::SortMergeJoinMetrics;
-use crate::joins::utils::JoinFilter;
+use crate::joins::utils::{compare_join_arrays, JoinFilter};
 use crate::spill::spill_manager::SpillManager;
 use crate::{PhysicalExpr, RecordBatchStream, SendableRecordBatchStream};
 
@@ -1865,101 +1865,6 @@ fn join_arrays(batch: &RecordBatch, on_column: &[PhysicalExprRef]) -> Vec<ArrayR
         .collect()
 }
 
-/// Get comparison result of two rows of join arrays
-fn compare_join_arrays(
-    left_arrays: &[ArrayRef],
-    left: usize,
-    right_arrays: &[ArrayRef],
-    right: usize,
-    sort_options: &[SortOptions],
-    null_equality: NullEquality,
-) -> Result<Ordering> {
-    let mut res = Ordering::Equal;
-    for ((left_array, right_array), sort_options) in
-        left_arrays.iter().zip(right_arrays).zip(sort_options)
-    {
-        macro_rules! compare_value {
-            ($T:ty) => {{
-                let left_array = left_array.as_any().downcast_ref::<$T>().unwrap();
-                let right_array = right_array.as_any().downcast_ref::<$T>().unwrap();
-                match (left_array.is_null(left), right_array.is_null(right)) {
-                    (false, false) => {
-                        let left_value = &left_array.value(left);
-                        let right_value = &right_array.value(right);
-                        res = left_value.partial_cmp(right_value).unwrap();
-                        if sort_options.descending {
-                            res = res.reverse();
-                        }
-                    }
-                    (true, false) => {
-                        res = if sort_options.nulls_first {
-                            Ordering::Less
-                        } else {
-                            Ordering::Greater
-                        };
-                    }
-                    (false, true) => {
-                        res = if sort_options.nulls_first {
-                            Ordering::Greater
-                        } else {
-                            Ordering::Less
-                        };
-                    }
-                    _ => {
-                        res = match null_equality {
-                            NullEquality::NullEqualsNothing => Ordering::Less,
-                            NullEquality::NullEqualsNull => Ordering::Equal,
-                        };
-                    }
-                }
-            }};
-        }
-
-        match left_array.data_type() {
-            DataType::Null => {}
-            DataType::Boolean => compare_value!(BooleanArray),
-            DataType::Int8 => compare_value!(Int8Array),
-            DataType::Int16 => compare_value!(Int16Array),
-            DataType::Int32 => compare_value!(Int32Array),
-            DataType::Int64 => compare_value!(Int64Array),
-            DataType::UInt8 => compare_value!(UInt8Array),
-            DataType::UInt16 => compare_value!(UInt16Array),
-            DataType::UInt32 => compare_value!(UInt32Array),
-            DataType::UInt64 => compare_value!(UInt64Array),
-            DataType::Float32 => compare_value!(Float32Array),
-            DataType::Float64 => compare_value!(Float64Array),
-            DataType::Utf8 => compare_value!(StringArray),
-            DataType::Utf8View => compare_value!(StringViewArray),
-            DataType::LargeUtf8 => compare_value!(LargeStringArray),
-            DataType::Binary => compare_value!(BinaryArray),
-            DataType::BinaryView => compare_value!(BinaryViewArray),
-            DataType::FixedSizeBinary(_) => compare_value!(FixedSizeBinaryArray),
-            DataType::LargeBinary => compare_value!(LargeBinaryArray),
-            DataType::Decimal32(..) => compare_value!(Decimal32Array),
-            DataType::Decimal64(..) => compare_value!(Decimal64Array),
-            DataType::Decimal128(..) => compare_value!(Decimal128Array),
-            DataType::Timestamp(time_unit, None) => match time_unit {
-                TimeUnit::Second => compare_value!(TimestampSecondArray),
-                TimeUnit::Millisecond => compare_value!(TimestampMillisecondArray),
-                TimeUnit::Microsecond => compare_value!(TimestampMicrosecondArray),
-                TimeUnit::Nanosecond => compare_value!(TimestampNanosecondArray),
-            },
-            DataType::Date32 => compare_value!(Date32Array),
-            DataType::Date64 => compare_value!(Date64Array),
-            dt => {
-                return not_impl_err!(
-                    "Unsupported data type in sort merge join comparator: {}",
-                    dt
-                );
-            }
-        }
-        if !res.is_eq() {
-            break;
-        }
-    }
-    Ok(res)
-}
-
 /// A faster version of compare_join_arrays() that only output whether
 /// the given two rows are equal
 fn is_join_arrays_equal(
diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs
index c50bfce93a2d..78652d443d3c 100644
--- a/datafusion/physical-plan/src/joins/utils.rs
+++ b/datafusion/physical-plan/src/joins/utils.rs
@@ -17,7 +17,7 @@
 
 //! Join related functionality used both on logical and physical plans
 
-use std::cmp::min;
+use std::cmp::{min, Ordering};
 use std::collections::HashSet;
 use std::fmt::{self, Debug};
 use std::future::Future;
@@ -43,7 +43,13 @@ use arrow::array::{
     BooleanBufferBuilder, NativeAdapter, PrimitiveArray, RecordBatch, RecordBatchOptions,
     UInt32Array, UInt32Builder, UInt64Array,
 };
-use arrow::array::{ArrayRef, BooleanArray};
+use arrow::array::{
+    ArrayRef, BinaryArray, BinaryViewArray, BooleanArray, Date32Array, Date64Array,
+    Decimal128Array, FixedSizeBinaryArray, Float32Array, Float64Array, Int16Array,
+    Int32Array, Int64Array, Int8Array, LargeBinaryArray, LargeStringArray, StringArray,
+    StringViewArray, TimestampMicrosecondArray, TimestampMillisecondArray,
+    TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt8Array,
+};
 use arrow::buffer::{BooleanBuffer, NullBuffer};
 use arrow::compute::kernels::cmp::eq;
 use arrow::compute::{self, and, take, FilterBuilder};
@@ -51,12 +57,13 @@ use arrow::datatypes::{
     ArrowNativeType, Field, Schema, SchemaBuilder, UInt32Type, UInt64Type,
 };
 use arrow_ord::cmp::not_distinct;
-use arrow_schema::ArrowError;
+use arrow_schema::{ArrowError, DataType, SortOptions, TimeUnit};
 use datafusion_common::cast::as_boolean_array;
 use datafusion_common::hash_utils::create_hashes;
 use datafusion_common::stats::Precision;
 use datafusion_common::{
-    plan_err, DataFusionError, JoinSide, JoinType, NullEquality, Result, SharedResult,
+    not_impl_err, plan_err, DataFusionError, JoinSide, JoinType, NullEquality, Result,
+    SharedResult,
 };
 use datafusion_expr::interval_arithmetic::Interval;
 use datafusion_expr::Operator;
@@ -284,7 +291,7 @@ pub fn build_join_schema(
         JoinType::LeftSemi | JoinType::LeftAnti => left_fields().unzip(),
         JoinType::LeftMark => {
             let right_field = once((
-                Field::new("mark", arrow::datatypes::DataType::Boolean, false),
+                Field::new("mark", DataType::Boolean, false),
                 ColumnIndex {
                     index: 0,
                     side: JoinSide::None,
@@ -295,7 +302,7 @@ pub fn build_join_schema(
         JoinType::RightSemi | JoinType::RightAnti => right_fields().unzip(),
         JoinType::RightMark => {
             let left_field = once((
-                Field::new("mark", arrow_schema::DataType::Boolean, false),
+                Field::new("mark", DataType::Boolean, false),
                 ColumnIndex {
                     index: 0,
                     side: JoinSide::None,
@@ -812,9 +819,10 @@ pub(crate) fn need_produce_result_in_final(join_type: JoinType) -> bool {
 pub(crate) fn get_final_indices_from_shared_bitmap(
     shared_bitmap: &SharedBitmapBuilder,
     join_type: JoinType,
+    piecewise: bool,
 ) -> (UInt64Array, UInt32Array) {
     let bitmap = shared_bitmap.lock();
-    get_final_indices_from_bit_map(&bitmap, join_type)
+    get_final_indices_from_bit_map(&bitmap, join_type, piecewise)
 }
 
 /// In the end of join execution, need to use bit map of the matched
@@ -829,16 +837,22 @@ pub(crate) fn get_final_indices_from_shared_bitmap(
 pub(crate) fn get_final_indices_from_bit_map(
     left_bit_map: &BooleanBufferBuilder,
     join_type: JoinType,
+    // We add a flag for whether this is being passed from the `PiecewiseMergeJoin`
+    // because the bitmap can be for left + right `JoinType`s
+    piecewise: bool,
 ) -> (UInt64Array, UInt32Array) {
     let left_size = left_bit_map.len();
-    if join_type == JoinType::LeftMark {
+    if join_type == JoinType::LeftMark || (join_type == JoinType::RightMark && piecewise)
+    {
         let left_indices = (0..left_size as u64).collect::<UInt64Array>();
         let right_indices = (0..left_size)
             .map(|idx| left_bit_map.get_bit(idx).then_some(0))
             .collect::<UInt32Array>();
         return (left_indices, right_indices);
     }
-    let left_indices = if join_type == JoinType::LeftSemi {
+    let left_indices = if join_type == JoinType::LeftSemi
+        || (join_type == JoinType::RightSemi && piecewise)
+    {
         (0..left_size)
             .filter_map(|idx| (left_bit_map.get_bit(idx)).then_some(idx as u64))
             .collect::<UInt64Array>()
@@ -1749,6 +1763,99 @@ fn eq_dyn_null(
     }
 }
 
+/// Get comparison result of two rows of join arrays
+pub fn compare_join_arrays(
+    left_arrays: &[ArrayRef],
+    left: usize,
+    right_arrays: &[ArrayRef],
+    right: usize,
+    sort_options: &[SortOptions],
+    null_equality: NullEquality,
+) -> Result<Ordering> {
+    let mut res = Ordering::Equal;
+    for ((left_array, right_array), sort_options) in
+        left_arrays.iter().zip(right_arrays).zip(sort_options)
+    {
+        macro_rules! compare_value {
+            ($T:ty) => {{
+                let left_array = left_array.as_any().downcast_ref::<$T>().unwrap();
+                let right_array = right_array.as_any().downcast_ref::<$T>().unwrap();
+                match (left_array.is_null(left), right_array.is_null(right)) {
+                    (false, false) => {
+                        let left_value = &left_array.value(left);
+                        let right_value = &right_array.value(right);
+                        res = left_value.partial_cmp(right_value).unwrap();
+                        if sort_options.descending {
+                            res = res.reverse();
+                        }
+                    }
+                    (true, false) => {
+                        res = if sort_options.nulls_first {
+                            Ordering::Less
+                        } else {
+                            Ordering::Greater
+                        };
+                    }
+                    (false, true) => {
+                        res = if sort_options.nulls_first {
+                            Ordering::Greater
+                        } else {
+                            Ordering::Less
+                        };
+                    }
+                    _ => {
+                        res = match null_equality {
+                            NullEquality::NullEqualsNothing => Ordering::Less,
+                            NullEquality::NullEqualsNull => Ordering::Equal,
+                        };
+                    }
+                }
+            }};
+        }
+
+        match left_array.data_type() {
+            DataType::Null => {}
+            DataType::Boolean => compare_value!(BooleanArray),
+            DataType::Int8 => compare_value!(Int8Array),
+            DataType::Int16 => compare_value!(Int16Array),
+            DataType::Int32 => compare_value!(Int32Array),
+            DataType::Int64 => compare_value!(Int64Array),
+            DataType::UInt8 => compare_value!(UInt8Array),
+            DataType::UInt16 => compare_value!(UInt16Array),
+            DataType::UInt32 => compare_value!(UInt32Array),
+            DataType::UInt64 => compare_value!(UInt64Array),
+            DataType::Float32 => compare_value!(Float32Array),
+            DataType::Float64 => compare_value!(Float64Array),
+            DataType::Binary => compare_value!(BinaryArray),
+            DataType::BinaryView => compare_value!(BinaryViewArray),
+            DataType::FixedSizeBinary(_) => compare_value!(FixedSizeBinaryArray),
+            DataType::LargeBinary => compare_value!(LargeBinaryArray),
+            DataType::Utf8 => compare_value!(StringArray),
+            DataType::Utf8View => compare_value!(StringViewArray),
+            DataType::LargeUtf8 => compare_value!(LargeStringArray),
+            DataType::Decimal128(..) => compare_value!(Decimal128Array),
+            DataType::Timestamp(time_unit, None) => match time_unit {
+                TimeUnit::Second => compare_value!(TimestampSecondArray),
+                TimeUnit::Millisecond => compare_value!(TimestampMillisecondArray),
+                TimeUnit::Microsecond => compare_value!(TimestampMicrosecondArray),
+                TimeUnit::Nanosecond => compare_value!(TimestampNanosecondArray),
+            },
+            DataType::Date32 => compare_value!(Date32Array),
+            DataType::Date64 => compare_value!(Date64Array),
+            dt => {
+                return not_impl_err!(
+                    "Unsupported data type in sort merge join comparator: {}",
+                    dt
+                );
+            }
+        }
+        if !res.is_eq() {
+            break;
+        }
+    }
+    Ok(res)
+}
+
 #[cfg(test)]
 mod tests {
     use std::collections::HashMap;
diff --git a/datafusion/physical-plan/src/metrics/baseline.rs b/datafusion/physical-plan/src/metrics/baseline.rs
index 15efb8f90aa2..858773b94664 100644
--- a/datafusion/physical-plan/src/metrics/baseline.rs
+++ b/datafusion/physical-plan/src/metrics/baseline.rs
@@ -21,6 +21,8 @@ use std::task::Poll;
 
 use arrow::record_batch::RecordBatch;
 
+use crate::spill::get_record_batch_memory_size;
+
 use super::{Count, ExecutionPlanMetricsSet, MetricBuilder, Time, Timestamp};
 use datafusion_common::Result;
 
@@ -53,6 +55,16 @@ pub struct BaselineMetrics {
 
     /// output rows: the total output rows
     output_rows: Count,
+
+    /// Memory usage of all output batches.
+    ///
+    /// Note: This value may be overestimated. If multiple output `RecordBatch`
+    /// instances share underlying memory buffers, their sizes will be counted
+    /// multiple times.
+    /// Issue: <https://github.com/apache/datafusion/issues/16841>
+    output_bytes: Count,
+    // Remember to update `docs/source/user-guide/metrics.md` when updating comments
+    // or adding new metrics
 }
 
 impl BaselineMetrics {
@@ -62,9 +74,18 @@ impl BaselineMetrics {
         start_time.record();
 
         Self {
-            end_time: MetricBuilder::new(metrics).end_timestamp(partition),
-            elapsed_compute: MetricBuilder::new(metrics).elapsed_compute(partition),
-            output_rows: MetricBuilder::new(metrics).output_rows(partition),
+            end_time: MetricBuilder::new(metrics)
+                .with_type(super::MetricType::SUMMARY)
+                .end_timestamp(partition),
+            elapsed_compute: MetricBuilder::new(metrics)
+                .with_type(super::MetricType::SUMMARY)
+                .elapsed_compute(partition),
+            output_rows: MetricBuilder::new(metrics)
+                .with_type(super::MetricType::SUMMARY)
+                .output_rows(partition),
+            output_bytes: MetricBuilder::new(metrics)
+                .with_type(super::MetricType::SUMMARY)
+                .output_bytes(partition),
         }
     }
 
@@ -78,6 +99,7 @@ impl BaselineMetrics {
             end_time: Default::default(),
             elapsed_compute: self.elapsed_compute.clone(),
             output_rows: Default::default(),
+            output_bytes: Default::default(),
         }
     }
 
@@ -205,6 +227,8 @@ impl RecordOutput for usize {
 impl RecordOutput for RecordBatch {
     fn record_output(self, bm: &BaselineMetrics) -> Self {
         bm.record_output(self.num_rows());
+        let n_bytes = get_record_batch_memory_size(&self);
+        bm.output_bytes.add(n_bytes);
         self
     }
 }
@@ -212,6 +236,8 @@ impl RecordOutput for RecordBatch {
 impl RecordOutput for &RecordBatch {
     fn record_output(self, bm: &BaselineMetrics) -> Self {
         bm.record_output(self.num_rows());
+        let n_bytes = get_record_batch_memory_size(self);
+        bm.output_bytes.add(n_bytes);
         self
     }
 }
diff --git a/datafusion/physical-plan/src/metrics/builder.rs b/datafusion/physical-plan/src/metrics/builder.rs
index dbda0a310ce5..88ec1a3f67d1 100644
--- a/datafusion/physical-plan/src/metrics/builder.rs
+++ b/datafusion/physical-plan/src/metrics/builder.rs
@@ -19,6 +19,8 @@
 
 use std::{borrow::Cow, sync::Arc};
 
+use crate::metrics::MetricType;
+
 use super::{
     Count, ExecutionPlanMetricsSet, Gauge, Label, Metric, MetricValue, Time, Timestamp,
 };
@@ -52,15 +54,23 @@ pub struct MetricBuilder<'a> {
 
     /// arbitrary name=value pairs identifying this metric
     labels: Vec<Label>,
+
+    /// The type controlling the verbosity/category for this builder
+    /// See comments in [`MetricType`] for details
+    metric_type: MetricType,
 }
 
 impl<'a> MetricBuilder<'a> {
     /// Create a new `MetricBuilder` that will register the result of `build()` with the `metrics`
+    ///
+    /// `self.metric_type` controls when such metric is displayed. See comments in
+    /// [`MetricType`] for details.
     pub fn new(metrics: &'a ExecutionPlanMetricsSet) -> Self {
         Self {
             metrics,
             partition: None,
             labels: vec![],
+            metric_type: MetricType::DEV,
         }
     }
 
@@ -70,6 +80,12 @@ impl<'a> MetricBuilder<'a> {
         self
     }
 
+    /// Set the metric type to the metric being constructed
+    pub fn with_type(mut self, metric_type: MetricType) -> Self {
+        self.metric_type = metric_type;
+        self
+    }
+
     /// Add a label to the metric being constructed
     pub fn with_new_label(
         self,
@@ -92,8 +108,11 @@ impl<'a> MetricBuilder<'a> {
             labels,
             partition,
             metrics,
+            metric_type,
         } = self;
-        let metric = Arc::new(Metric::new_with_labels(value, partition, labels));
+        let metric = Arc::new(
+            Metric::new_with_labels(value, partition, labels).with_type(metric_type),
+        );
         metrics.register(metric);
     }
 
@@ -132,6 +151,14 @@ impl<'a> MetricBuilder<'a> {
         count
     }
 
+    /// Consume self and create a new counter for recording total output bytes
+    pub fn output_bytes(self, partition: usize) -> Count {
+        let count = Count::new();
+        self.with_partition(partition)
+            .build(MetricValue::OutputBytes(count.clone()));
+        count
+    }
+
     /// Consume self and create a new gauge for reporting current memory usage
     pub fn mem_used(self, partition: usize) -> Gauge {
         let gauge = Gauge::new();
diff --git a/datafusion/physical-plan/src/metrics/mod.rs b/datafusion/physical-plan/src/metrics/mod.rs
index 0b9b4bed856b..02aad6eb60ac 100644
--- a/datafusion/physical-plan/src/metrics/mod.rs
+++ b/datafusion/physical-plan/src/metrics/mod.rs
@@ -78,6 +78,29 @@ pub struct Metric {
     /// To which partition of an operators output did this metric
     /// apply? If `None` then means all partitions.
     partition: Option<usize>,
+
+    metric_type: MetricType,
+}
+
+/// Categorizes metrics so the display layer can choose the desired verbosity.
+///
+/// # How is it used:
+/// The `datafusion.explain.analyze_level` configuration controls which category is shown.
+/// - When set to `dev`, all metrics with type `MetricType::Summary` or `MetricType::DEV`
+///   will be shown.
+/// - When set to `summary`, only metrics with type `MetricType::Summary` are shown.
+///
+/// # Difference from `EXPLAIN ANALYZE VERBOSE`:  
+/// The `VERBOSE` keyword controls whether per-partition metrics are shown (when specified),  
+/// or aggregated metrics are displayed (when omitted).  
+/// In contrast, the `analyze_level` configuration determines which categories or
+/// levels of metrics are displayed.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum MetricType {
+    /// Common metrics for high-level insights (answering which operator is slow)
+    SUMMARY,
+    /// For deep operator-level introspection for developers
+    DEV,
 }
 
 impl Display for Metric {
@@ -122,6 +145,7 @@ impl Metric {
             value,
             labels: vec![],
             partition,
+            metric_type: MetricType::DEV,
         }
     }
 
@@ -136,9 +160,16 @@ impl Metric {
             value,
             labels,
             partition,
+            metric_type: MetricType::DEV,
         }
     }
 
+    /// Set the type for this metric. Defaults to [`MetricType::DEV`]
+    pub fn with_type(mut self, metric_type: MetricType) -> Self {
+        self.metric_type = metric_type;
+        self
+    }
+
     /// Add a new label to this metric
     pub fn with_label(mut self, label: Label) -> Self {
         self.labels.push(label);
@@ -164,6 +195,11 @@ impl Metric {
     pub fn partition(&self) -> Option<usize> {
         self.partition
     }
+
+    /// Return the metric type (verbosity level) associated with this metric
+    pub fn metric_type(&self) -> MetricType {
+        self.metric_type
+    }
 }
 
 /// A snapshot of the metrics for a particular ([`ExecutionPlan`]).
@@ -260,6 +296,7 @@ impl MetricsSet {
             MetricValue::ElapsedCompute(_) => false,
             MetricValue::SpillCount(_) => false,
             MetricValue::SpilledBytes(_) => false,
+            MetricValue::OutputBytes(_) => false,
             MetricValue::SpilledRows(_) => false,
             MetricValue::CurrentMemoryUsage(_) => false,
             MetricValue::Gauge { name, .. } => name == metric_name,
@@ -286,7 +323,8 @@ impl MetricsSet {
                 .or_insert_with(|| {
                     // accumulate with no partition
                     let partition = None;
-                    let mut accum = Metric::new(metric.value().new_empty(), partition);
+                    let mut accum = Metric::new(metric.value().new_empty(), partition)
+                        .with_type(metric.metric_type());
                     accum.value_mut().aggregate(metric.value());
                     accum
                 });
@@ -324,6 +362,21 @@ impl MetricsSet {
 
         Self { metrics }
     }
+
+    /// Returns a new derived `MetricsSet` containing only metrics whose
+    /// [`MetricType`] appears in `allowed`.
+    pub fn filter_by_metric_types(self, allowed: &[MetricType]) -> Self {
+        if allowed.is_empty() {
+            return Self { metrics: vec![] };
+        }
+
+        let metrics = self
+            .metrics
+            .into_iter()
+            .filter(|metric| allowed.contains(&metric.metric_type()))
+            .collect::<Vec<_>>();
+        Self { metrics }
+    }
 }
 
 impl Display for MetricsSet {
diff --git a/datafusion/physical-plan/src/metrics/value.rs b/datafusion/physical-plan/src/metrics/value.rs
index 3149fca95ba8..fc947935503c 100644
--- a/datafusion/physical-plan/src/metrics/value.rs
+++ b/datafusion/physical-plan/src/metrics/value.rs
@@ -395,6 +395,8 @@ pub enum MetricValue {
     SpillCount(Count),
     /// Total size of spilled bytes produced: "spilled_bytes" metric
     SpilledBytes(Count),
+    /// Total size of output bytes produced: "output_bytes" metric
+    OutputBytes(Count),
     /// Total size of spilled rows produced: "spilled_rows" metric
     SpilledRows(Count),
     /// Current memory used
@@ -449,6 +451,9 @@ impl PartialEq for MetricValue {
             (MetricValue::SpilledBytes(count), MetricValue::SpilledBytes(other)) => {
                 count == other
             }
+            (MetricValue::OutputBytes(count), MetricValue::OutputBytes(other)) => {
+                count == other
+            }
             (MetricValue::SpilledRows(count), MetricValue::SpilledRows(other)) => {
                 count == other
             }
@@ -505,6 +510,7 @@ impl MetricValue {
             Self::OutputRows(_) => "output_rows",
             Self::SpillCount(_) => "spill_count",
             Self::SpilledBytes(_) => "spilled_bytes",
+            Self::OutputBytes(_) => "output_bytes",
             Self::SpilledRows(_) => "spilled_rows",
             Self::CurrentMemoryUsage(_) => "mem_used",
             Self::ElapsedCompute(_) => "elapsed_compute",
@@ -523,6 +529,7 @@ impl MetricValue {
             Self::OutputRows(count) => count.value(),
             Self::SpillCount(count) => count.value(),
             Self::SpilledBytes(bytes) => bytes.value(),
+            Self::OutputBytes(bytes) => bytes.value(),
             Self::SpilledRows(count) => count.value(),
             Self::CurrentMemoryUsage(used) => used.value(),
             Self::ElapsedCompute(time) => time.value(),
@@ -550,6 +557,7 @@ impl MetricValue {
             Self::OutputRows(_) => Self::OutputRows(Count::new()),
             Self::SpillCount(_) => Self::SpillCount(Count::new()),
             Self::SpilledBytes(_) => Self::SpilledBytes(Count::new()),
+            Self::OutputBytes(_) => Self::OutputBytes(Count::new()),
             Self::SpilledRows(_) => Self::SpilledRows(Count::new()),
             Self::CurrentMemoryUsage(_) => Self::CurrentMemoryUsage(Gauge::new()),
             Self::ElapsedCompute(_) => Self::ElapsedCompute(Time::new()),
@@ -588,6 +596,7 @@ impl MetricValue {
             (Self::OutputRows(count), Self::OutputRows(other_count))
             | (Self::SpillCount(count), Self::SpillCount(other_count))
             | (Self::SpilledBytes(count), Self::SpilledBytes(other_count))
+            | (Self::OutputBytes(count), Self::OutputBytes(other_count))
             | (Self::SpilledRows(count), Self::SpilledRows(other_count))
             | (
                 Self::Count { count, .. },
@@ -638,18 +647,21 @@ impl MetricValue {
     /// numbers are "more useful" (and displayed first)
     pub fn display_sort_key(&self) -> u8 {
         match self {
-            Self::OutputRows(_) => 0,     // show first
-            Self::ElapsedCompute(_) => 1, // show second
-            Self::SpillCount(_) => 2,
-            Self::SpilledBytes(_) => 3,
-            Self::SpilledRows(_) => 4,
-            Self::CurrentMemoryUsage(_) => 5,
-            Self::Count { .. } => 6,
-            Self::Gauge { .. } => 7,
-            Self::Time { .. } => 8,
-            Self::StartTimestamp(_) => 9, // show timestamps last
-            Self::EndTimestamp(_) => 10,
-            Self::Custom { .. } => 11,
+            // `BaselineMetrics` that is common for most operators
+            Self::OutputRows(_) => 0,
+            Self::ElapsedCompute(_) => 1,
+            Self::OutputBytes(_) => 2,
+            // Other metrics
+            Self::SpillCount(_) => 3,
+            Self::SpilledBytes(_) => 4,
+            Self::SpilledRows(_) => 5,
+            Self::CurrentMemoryUsage(_) => 6,
+            Self::Count { .. } => 7,
+            Self::Gauge { .. } => 8,
+            Self::Time { .. } => 9,
+            Self::StartTimestamp(_) => 10, // show timestamps last
+            Self::EndTimestamp(_) => 11,
+            Self::Custom { .. } => 12,
         }
     }
 
@@ -669,7 +681,7 @@ impl Display for MetricValue {
             | Self::Count { count, .. } => {
                 write!(f, "{count}")
             }
-            Self::SpilledBytes(count) => {
+            Self::SpilledBytes(count) | Self::OutputBytes(count) => {
                 let readable_count = human_readable_size(count.value());
                 write!(f, "{readable_count}")
             }
diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs
index 6eea70e1176d..4dc88bc56631 100644
--- a/datafusion/physical-plan/src/projection.rs
+++ b/datafusion/physical-plan/src/projection.rs
@@ -32,17 +32,16 @@ use crate::filter_pushdown::{
     FilterPushdownPropagation,
 };
 use crate::joins::utils::{ColumnIndex, JoinFilter, JoinOn, JoinOnRef};
-use crate::{ColumnStatistics, DisplayFormatType, ExecutionPlan, PhysicalExpr};
+use crate::{DisplayFormatType, ExecutionPlan, PhysicalExpr};
 use std::any::Any;
 use std::collections::HashMap;
 use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
-use arrow::datatypes::{Field, Schema, SchemaRef};
+use arrow::datatypes::SchemaRef;
 use arrow::record_batch::{RecordBatch, RecordBatchOptions};
 use datafusion_common::config::ConfigOptions;
-use datafusion_common::stats::Precision;
 use datafusion_common::tree_node::{
     Transformed, TransformedResult, TreeNode, TreeNodeRecursion,
 };
@@ -52,6 +51,9 @@ use datafusion_physical_expr::equivalence::ProjectionMapping;
 use datafusion_physical_expr::utils::collect_columns;
 use datafusion_physical_expr_common::physical_expr::{fmt_sql, PhysicalExprRef};
 use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexRequirement};
+// Re-exported from datafusion-physical-expr for backwards compatibility
+// We recommend updating your imports to use datafusion-physical-expr directly
+pub use datafusion_physical_expr::projection::{update_expr, Projection, ProjectionExpr};
 
 use futures::stream::{Stream, StreamExt};
 use log::trace;
@@ -63,7 +65,7 @@ use log::trace;
 #[derive(Debug, Clone)]
 pub struct ProjectionExec {
     /// The projection expressions stored as tuples of (expression, output column name)
-    pub(crate) expr: Vec<ProjectionExpr>,
+    projection: Projection,
     /// The schema once the projection has been applied to the input
     schema: SchemaRef,
     /// The input plan
@@ -127,42 +129,17 @@ impl ProjectionExec {
     {
         let input_schema = input.schema();
         // convert argument to Vec<ProjectionExpr>
-        let expr = expr.into_iter().map(Into::into).collect::<Vec<_>>();
+        let expr_vec = expr.into_iter().map(Into::into).collect::<Vec<_>>();
+        let projection = Projection::new(expr_vec);
 
-        let fields: Result<Vec<Field>> = expr
-            .iter()
-            .map(|proj_expr| {
-                let metadata = proj_expr
-                    .expr
-                    .return_field(&input_schema)?
-                    .metadata()
-                    .clone();
-
-                let field = Field::new(
-                    &proj_expr.alias,
-                    proj_expr.expr.data_type(&input_schema)?,
-                    proj_expr.expr.nullable(&input_schema)?,
-                )
-                .with_metadata(metadata);
-
-                Ok(field)
-            })
-            .collect();
-
-        let schema = Arc::new(Schema::new_with_metadata(
-            fields?,
-            input_schema.metadata().clone(),
-        ));
+        let schema = Arc::new(projection.project_schema(&input_schema)?);
 
         // Construct a map from the input expressions to the output expression of the Projection
-        let projection_mapping = ProjectionMapping::try_new(
-            expr.iter().map(|p| (Arc::clone(&p.expr), p.alias.clone())),
-            &input_schema,
-        )?;
+        let projection_mapping = projection.projection_mapping(&input_schema)?;
         let cache =
             Self::compute_properties(&input, &projection_mapping, Arc::clone(&schema))?;
         Ok(Self {
-            expr,
+            projection,
             schema,
             input,
             metrics: ExecutionPlanMetricsSet::new(),
@@ -172,7 +149,7 @@ impl ProjectionExec {
 
     /// The projection expressions stored as tuples of (expression, output column name)
     pub fn expr(&self) -> &[ProjectionExpr] {
-        &self.expr
+        self.projection.as_ref()
     }
 
     /// The input plan
@@ -203,35 +180,6 @@ impl ProjectionExec {
     }
 }
 
-/// A projection expression that is created by [`ProjectionExec`]
-///
-/// The expression is evaluated and the result is stored in a column
-/// with the name specified by `alias`.
-///
-/// For example, the SQL expression `a + b AS sum_ab` would be represented
-/// as a `ProjectionExpr` where `expr` is the expression `a + b`
-/// and `alias` is the string `sum_ab`.
-#[derive(Debug, Clone)]
-pub struct ProjectionExpr {
-    /// The expression that will be evaluated.
-    pub expr: Arc<dyn PhysicalExpr>,
-    /// The name of the output column for use an output schema.
-    pub alias: String,
-}
-
-impl ProjectionExpr {
-    /// Create a new projection expression
-    pub fn new(expr: Arc<dyn PhysicalExpr>, alias: String) -> Self {
-        Self { expr, alias }
-    }
-}
-
-impl From<(Arc<dyn PhysicalExpr>, String)> for ProjectionExpr {
-    fn from(value: (Arc<dyn PhysicalExpr>, String)) -> Self {
-        Self::new(value.0, value.1)
-    }
-}
-
 impl DisplayAs for ProjectionExec {
     fn fmt_as(
         &self,
@@ -241,7 +189,8 @@ impl DisplayAs for ProjectionExec {
         match t {
             DisplayFormatType::Default | DisplayFormatType::Verbose => {
                 let expr: Vec<String> = self
-                    .expr
+                    .projection
+                    .as_ref()
                     .iter()
                     .map(|proj_expr| {
                         let e = proj_expr.expr.to_string();
@@ -291,7 +240,7 @@ impl ExecutionPlan for ProjectionExec {
     }
 
     fn benefits_from_input_partitioning(&self) -> Vec<bool> {
-        let all_simple_exprs = self.expr.iter().all(|proj_expr| {
+        let all_simple_exprs = self.projection.iter().all(|proj_expr| {
             proj_expr.expr.as_any().is::<Column>()
                 || proj_expr.expr.as_any().is::<Literal>()
         });
@@ -308,7 +257,7 @@ impl ExecutionPlan for ProjectionExec {
         self: Arc<Self>,
         mut children: Vec<Arc<dyn ExecutionPlan>>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        ProjectionExec::try_new(self.expr.clone(), children.swap_remove(0))
+        ProjectionExec::try_new(self.projection.clone(), children.swap_remove(0))
             .map(|p| Arc::new(p) as _)
     }
 
@@ -318,12 +267,12 @@ impl ExecutionPlan for ProjectionExec {
         context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
         trace!("Start ProjectionExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id());
-        Ok(Box::pin(ProjectionStream {
-            schema: Arc::clone(&self.schema),
-            expr: self.expr.iter().map(|x| Arc::clone(&x.expr)).collect(),
-            input: self.input.execute(partition, context)?,
-            baseline_metrics: BaselineMetrics::new(&self.metrics, partition),
-        }))
+        Ok(Box::pin(ProjectionStream::new(
+            Arc::clone(&self.schema),
+            self.projection.expr_iter().collect(),
+            self.input.execute(partition, context)?,
+            BaselineMetrics::new(&self.metrics, partition),
+        )))
     }
 
     fn metrics(&self) -> Option<MetricsSet> {
@@ -336,13 +285,8 @@ impl ExecutionPlan for ProjectionExec {
 
     fn partition_statistics(&self, partition: Option<usize>) -> Result<Statistics> {
         let input_stats = self.input.partition_statistics(partition)?;
-        stats_projection(
-            input_stats,
-            self.expr
-                .iter()
-                .map(|proj_expr| Arc::clone(&proj_expr.expr)),
-            Arc::clone(&self.input.schema()),
-        )
+        self.projection
+            .project_statistics(input_stats, &self.input.schema())
     }
 
     fn supports_limit_pushdown(&self) -> bool {
@@ -388,40 +332,22 @@ impl ExecutionPlan for ProjectionExec {
     }
 }
 
-fn stats_projection(
-    mut stats: Statistics,
-    exprs: impl Iterator<Item = Arc<dyn PhysicalExpr>>,
-    schema: SchemaRef,
-) -> Result<Statistics> {
-    let mut primitive_row_size = 0;
-    let mut primitive_row_size_possible = true;
-    let mut column_statistics = vec![];
-    for expr in exprs {
-        let col_stats = if let Some(col) = expr.as_any().downcast_ref::<Column>() {
-            stats.column_statistics[col.index()].clone()
-        } else {
-            // TODO stats: estimate more statistics from expressions
-            // (expressions should compute their statistics themselves)
-            ColumnStatistics::new_unknown()
-        };
-        column_statistics.push(col_stats);
-        let data_type = expr.data_type(&schema)?;
-        if let Some(value) = data_type.primitive_width() {
-            primitive_row_size += value;
-            continue;
+impl ProjectionStream {
+    /// Create a new projection stream
+    fn new(
+        schema: SchemaRef,
+        expr: Vec<Arc<dyn PhysicalExpr>>,
+        input: SendableRecordBatchStream,
+        baseline_metrics: BaselineMetrics,
+    ) -> Self {
+        Self {
+            schema,
+            expr,
+            input,
+            baseline_metrics,
         }
-        primitive_row_size_possible = false;
     }
 
-    if primitive_row_size_possible {
-        stats.total_byte_size =
-            Precision::Exact(primitive_row_size).multiply(&stats.num_rows);
-    }
-    stats.column_statistics = column_statistics;
-    Ok(stats)
-}
-
-impl ProjectionStream {
     fn batch_project(&self, batch: &RecordBatch) -> Result<RecordBatch> {
         // Records time on drop
         let _timer = self.baseline_metrics.elapsed_compute().timer();
@@ -703,86 +629,6 @@ pub fn all_columns(exprs: &[ProjectionExpr]) -> bool {
         .all(|proj_expr| proj_expr.expr.as_any().is::<Column>())
 }
 
-/// The function operates in two modes:
-///
-/// 1) When `sync_with_child` is `true`:
-///
-///    The function updates the indices of `expr` if the expression resides
-///    in the input plan. For instance, given the expressions `a@1 + b@2`
-///    and `c@0` with the input schema `c@2, a@0, b@1`, the expressions are
-///    updated to `a@0 + b@1` and `c@2`.
-///
-/// 2) When `sync_with_child` is `false`:
-///
-///    The function determines how the expression would be updated if a projection
-///    was placed before the plan associated with the expression. If the expression
-///    cannot be rewritten after the projection, it returns `None`. For example,
-///    given the expressions `c@0`, `a@1` and `b@2`, and the [`ProjectionExec`] with
-///    an output schema of `a, c_new`, then `c@0` becomes `c_new@1`, `a@1` becomes
-///    `a@0`, but `b@2` results in `None` since the projection does not include `b`.
-pub fn update_expr(
-    expr: &Arc<dyn PhysicalExpr>,
-    projected_exprs: &[ProjectionExpr],
-    sync_with_child: bool,
-) -> Result<Option<Arc<dyn PhysicalExpr>>> {
-    #[derive(Debug, PartialEq)]
-    enum RewriteState {
-        /// The expression is unchanged.
-        Unchanged,
-        /// Some part of the expression has been rewritten
-        RewrittenValid,
-        /// Some part of the expression has been rewritten, but some column
-        /// references could not be.
-        RewrittenInvalid,
-    }
-
-    let mut state = RewriteState::Unchanged;
-
-    let new_expr = Arc::clone(expr)
-        .transform_up(|expr| {
-            if state == RewriteState::RewrittenInvalid {
-                return Ok(Transformed::no(expr));
-            }
-
-            let Some(column) = expr.as_any().downcast_ref::<Column>() else {
-                return Ok(Transformed::no(expr));
-            };
-            if sync_with_child {
-                state = RewriteState::RewrittenValid;
-                // Update the index of `column`:
-                Ok(Transformed::yes(Arc::clone(
-                    &projected_exprs[column.index()].expr,
-                )))
-            } else {
-                // default to invalid, in case we can't find the relevant column
-                state = RewriteState::RewrittenInvalid;
-                // Determine how to update `column` to accommodate `projected_exprs`
-                projected_exprs
-                    .iter()
-                    .enumerate()
-                    .find_map(|(index, proj_expr)| {
-                        proj_expr.expr.as_any().downcast_ref::<Column>().and_then(
-                            |projected_column| {
-                                (column.name().eq(projected_column.name())
-                                    && column.index() == projected_column.index())
-                                .then(|| {
-                                    state = RewriteState::RewrittenValid;
-                                    Arc::new(Column::new(&proj_expr.alias, index)) as _
-                                })
-                            },
-                        )
-                    })
-                    .map_or_else(
-                        || Ok(Transformed::no(expr)),
-                        |c| Ok(Transformed::yes(c)),
-                    )
-            }
-        })
-        .data();
-
-    new_expr.map(|e| (state == RewriteState::RewrittenValid).then_some(e))
-}
-
 /// Updates the given lexicographic ordering according to given projected
 /// expressions using the [`update_expr`] function.
 pub fn update_ordering(
@@ -1268,116 +1114,6 @@ mod tests {
         .unwrap();
     }
 
-    fn get_stats() -> Statistics {
-        Statistics {
-            num_rows: Precision::Exact(5),
-            total_byte_size: Precision::Exact(23),
-            column_statistics: vec![
-                ColumnStatistics {
-                    distinct_count: Precision::Exact(5),
-                    max_value: Precision::Exact(ScalarValue::Int64(Some(21))),
-                    min_value: Precision::Exact(ScalarValue::Int64(Some(-4))),
-                    sum_value: Precision::Exact(ScalarValue::Int64(Some(42))),
-                    null_count: Precision::Exact(0),
-                },
-                ColumnStatistics {
-                    distinct_count: Precision::Exact(1),
-                    max_value: Precision::Exact(ScalarValue::from("x")),
-                    min_value: Precision::Exact(ScalarValue::from("a")),
-                    sum_value: Precision::Absent,
-                    null_count: Precision::Exact(3),
-                },
-                ColumnStatistics {
-                    distinct_count: Precision::Absent,
-                    max_value: Precision::Exact(ScalarValue::Float32(Some(1.1))),
-                    min_value: Precision::Exact(ScalarValue::Float32(Some(0.1))),
-                    sum_value: Precision::Exact(ScalarValue::Float32(Some(5.5))),
-                    null_count: Precision::Absent,
-                },
-            ],
-        }
-    }
-
-    fn get_schema() -> Schema {
-        let field_0 = Field::new("col0", DataType::Int64, false);
-        let field_1 = Field::new("col1", DataType::Utf8, false);
-        let field_2 = Field::new("col2", DataType::Float32, false);
-        Schema::new(vec![field_0, field_1, field_2])
-    }
-    #[tokio::test]
-    async fn test_stats_projection_columns_only() {
-        let source = get_stats();
-        let schema = get_schema();
-
-        let exprs: Vec<Arc<dyn PhysicalExpr>> = vec![
-            Arc::new(Column::new("col1", 1)),
-            Arc::new(Column::new("col0", 0)),
-        ];
-
-        let result =
-            stats_projection(source, exprs.into_iter(), Arc::new(schema)).unwrap();
-
-        let expected = Statistics {
-            num_rows: Precision::Exact(5),
-            total_byte_size: Precision::Exact(23),
-            column_statistics: vec![
-                ColumnStatistics {
-                    distinct_count: Precision::Exact(1),
-                    max_value: Precision::Exact(ScalarValue::from("x")),
-                    min_value: Precision::Exact(ScalarValue::from("a")),
-                    sum_value: Precision::Absent,
-                    null_count: Precision::Exact(3),
-                },
-                ColumnStatistics {
-                    distinct_count: Precision::Exact(5),
-                    max_value: Precision::Exact(ScalarValue::Int64(Some(21))),
-                    min_value: Precision::Exact(ScalarValue::Int64(Some(-4))),
-                    sum_value: Precision::Exact(ScalarValue::Int64(Some(42))),
-                    null_count: Precision::Exact(0),
-                },
-            ],
-        };
-
-        assert_eq!(result, expected);
-    }
-
-    #[tokio::test]
-    async fn test_stats_projection_column_with_primitive_width_only() {
-        let source = get_stats();
-        let schema = get_schema();
-
-        let exprs: Vec<Arc<dyn PhysicalExpr>> = vec![
-            Arc::new(Column::new("col2", 2)),
-            Arc::new(Column::new("col0", 0)),
-        ];
-
-        let result =
-            stats_projection(source, exprs.into_iter(), Arc::new(schema)).unwrap();
-
-        let expected = Statistics {
-            num_rows: Precision::Exact(5),
-            total_byte_size: Precision::Exact(60),
-            column_statistics: vec![
-                ColumnStatistics {
-                    distinct_count: Precision::Absent,
-                    max_value: Precision::Exact(ScalarValue::Float32(Some(1.1))),
-                    min_value: Precision::Exact(ScalarValue::Float32(Some(0.1))),
-                    sum_value: Precision::Exact(ScalarValue::Float32(Some(5.5))),
-                    null_count: Precision::Absent,
-                },
-                ColumnStatistics {
-                    distinct_count: Precision::Exact(5),
-                    max_value: Precision::Exact(ScalarValue::Int64(Some(21))),
-                    min_value: Precision::Exact(ScalarValue::Int64(Some(-4))),
-                    sum_value: Precision::Exact(ScalarValue::Int64(Some(42))),
-                    null_count: Precision::Exact(0),
-                },
-            ],
-        };
-
-        assert_eq!(result, expected);
-    }
-
     #[test]
     fn test_projection_statistics_uses_input_schema() {
         let input_schema = Schema::new(vec![
diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
index a5bf68a63c38..8174f71c31af 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -32,12 +32,13 @@ use super::{
 };
 use crate::execution_plan::{CardinalityEffect, EvaluationType, SchedulingType};
 use crate::hash_utils::create_hashes;
-use crate::metrics::BaselineMetrics;
+use crate::metrics::{BaselineMetrics, SpillMetrics};
 use crate::projection::{all_columns, make_with_child, update_expr, ProjectionExec};
 use crate::repartition::distributor_channels::{
     channels, partition_aware_channels, DistributionReceiver, DistributionSender,
 };
 use crate::sorts::streaming_merge::StreamingMergeBuilder;
+use crate::spill::spill_manager::SpillManager;
 use crate::stream::RecordBatchStreamAdapter;
 use crate::{DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties, Statistics};
 
@@ -50,6 +51,7 @@ use datafusion_common::utils::transpose;
 use datafusion_common::{internal_err, ColumnStatistics, HashMap};
 use datafusion_common::{not_impl_err, DataFusionError, Result};
 use datafusion_common_runtime::SpawnedTask;
+use datafusion_execution::disk_manager::RefCountedTempFile;
 use datafusion_execution::memory_pool::MemoryConsumer;
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{EquivalenceProperties, PhysicalExpr};
@@ -66,22 +68,42 @@ use parking_lot::Mutex;
 
 mod distributor_channels;
 
-type MaybeBatch = Option<Result<RecordBatch>>;
+/// A batch in the repartition queue - either in memory or spilled to disk
+#[derive(Debug)]
+enum RepartitionBatch {
+    /// Batch held in memory (counts against memory reservation)
+    Memory(RecordBatch),
+    /// Batch spilled to disk (one file per batch for queue semantics)
+    /// File automatically deleted when dropped via reference counting
+    /// The size field stores the original batch size for validation when reading back
+    Spilled {
+        spill_file: RefCountedTempFile,
+        size: usize,
+    },
+}
+
+type MaybeBatch = Option<Result<RepartitionBatch>>;
 type InputPartitionsToCurrentPartitionSender = Vec<DistributionSender<MaybeBatch>>;
 type InputPartitionsToCurrentPartitionReceiver = Vec<DistributionReceiver<MaybeBatch>>;
 
+/// Channels and resources for a single output partition
+#[derive(Debug)]
+struct PartitionChannels {
+    /// Senders for each input partition to send data to this output partition
+    tx: InputPartitionsToCurrentPartitionSender,
+    /// Receivers for each input partition sending data to this output partition
+    rx: InputPartitionsToCurrentPartitionReceiver,
+    /// Memory reservation for this output partition
+    reservation: SharedMemoryReservation,
+    /// Spill manager for handling disk spills for this output partition
+    spill_manager: Arc<SpillManager>,
+}
+
 #[derive(Debug)]
 struct ConsumingInputStreamsState {
     /// Channels for sending batches from input partitions to output partitions.
     /// Key is the partition number.
-    channels: HashMap<
-        usize,
-        (
-            InputPartitionsToCurrentPartitionSender,
-            InputPartitionsToCurrentPartitionReceiver,
-            SharedMemoryReservation,
-        ),
-    >,
+    channels: HashMap<usize, PartitionChannels>,
 
     /// Helper that ensures that that background job is killed once it is no longer needed.
     abort_helper: Arc<Vec<SpawnedTask<()>>>,
@@ -161,8 +183,8 @@ impl RepartitionExecState {
         let streams_and_metrics = match self {
             RepartitionExecState::NotInitialized => {
                 self.ensure_input_streams_initialized(
-                    input,
-                    metrics,
+                    Arc::clone(&input),
+                    metrics.clone(),
                     partitioning.partition_count(),
                     Arc::clone(&context),
                 )?;
@@ -205,9 +227,24 @@ impl RepartitionExecState {
         for (partition, (tx, rx)) in txs.into_iter().zip(rxs).enumerate() {
             let reservation = Arc::new(Mutex::new(
                 MemoryConsumer::new(format!("{name}[{partition}]"))
+                    .with_can_spill(true)
                     .register(context.memory_pool()),
             ));
-            channels.insert(partition, (tx, rx, reservation));
+            let spill_metrics = SpillMetrics::new(&metrics, partition);
+            let spill_manager = Arc::new(SpillManager::new(
+                Arc::clone(&context.runtime_env()),
+                spill_metrics,
+                input.schema(),
+            ));
+            channels.insert(
+                partition,
+                PartitionChannels {
+                    tx,
+                    rx,
+                    reservation,
+                    spill_manager,
+                },
+            );
         }
 
         // launch one async task per *input* partition
@@ -217,8 +254,15 @@ impl RepartitionExecState {
         {
             let txs: HashMap<_, _> = channels
                 .iter()
-                .map(|(partition, (tx, _rx, reservation))| {
-                    (*partition, (tx[i].clone(), Arc::clone(reservation)))
+                .map(|(partition, channels)| {
+                    (
+                        *partition,
+                        (
+                            channels.tx[i].clone(),
+                            Arc::clone(&channels.reservation),
+                            Arc::clone(&channels.spill_manager),
+                        ),
+                    )
                 })
                 .collect();
 
@@ -234,7 +278,9 @@ impl RepartitionExecState {
             let wait_for_task = SpawnedTask::spawn(RepartitionExec::wait_for_task(
                 input_task,
                 txs.into_iter()
-                    .map(|(partition, (tx, _reservation))| (partition, tx))
+                    .map(|(partition, (tx, _reservation, _spill_manager))| {
+                        (partition, tx)
+                    })
                     .collect(),
             ));
             spawned_tasks.push(wait_for_task);
@@ -675,7 +721,7 @@ impl ExecutionPlan for RepartitionExec {
             let num_input_partitions = input.output_partitioning().partition_count();
 
             // lock scope
-            let (mut rx, reservation, abort_helper) = {
+            let (mut rx, reservation, spill_manager, abort_helper) = {
                 // lock mutexes
                 let mut state = state.lock();
                 let state = state.consume_input_streams(
@@ -689,12 +735,22 @@ impl ExecutionPlan for RepartitionExec {
 
                 // now return stream for the specified *output* partition which will
                 // read from the channel
-                let (_tx, rx, reservation) = state
+                let PartitionChannels {
+                    rx,
+                    reservation,
+                    spill_manager,
+                    ..
+                } = state
                     .channels
                     .remove(&partition)
                     .expect("partition not used yet");
 
-                (rx, reservation, Arc::clone(&state.abort_helper))
+                (
+                    rx,
+                    reservation,
+                    spill_manager,
+                    Arc::clone(&state.abort_helper),
+                )
             };
 
             trace!(
@@ -711,6 +767,8 @@ impl ExecutionPlan for RepartitionExec {
                             receiver,
                             _drop_helper: Arc::clone(&abort_helper),
                             reservation: Arc::clone(&reservation),
+                            spill_manager: Arc::clone(&spill_manager),
+                            state: RepartitionStreamState::ReceivingFromChannel,
                         }) as SendableRecordBatchStream
                     })
                     .collect::<Vec<_>>();
@@ -739,6 +797,8 @@ impl ExecutionPlan for RepartitionExec {
                     input: rx.swap_remove(0),
                     _drop_helper: abort_helper,
                     reservation,
+                    spill_manager,
+                    state: RepartitionStreamState::ReceivingFromChannel,
                 }) as SendableRecordBatchStream)
             }
         })
@@ -979,7 +1039,11 @@ impl RepartitionExec {
         mut stream: SendableRecordBatchStream,
         mut output_channels: HashMap<
             usize,
-            (DistributionSender<MaybeBatch>, SharedMemoryReservation),
+            (
+                DistributionSender<MaybeBatch>,
+                SharedMemoryReservation,
+                Arc<SpillManager>,
+            ),
         >,
         partitioning: Partitioning,
         metrics: RepartitionMetrics,
@@ -1001,18 +1065,49 @@ impl RepartitionExec {
                 None => break,
             };
 
+            // Handle empty batch
+            if batch.num_rows() == 0 {
+                continue;
+            }
+
             for res in partitioner.partition_iter(batch)? {
                 let (partition, batch) = res?;
                 let size = batch.get_array_memory_size();
 
                 let timer = metrics.send_time[partition].timer();
                 // if there is still a receiver, send to it
-                if let Some((tx, reservation)) = output_channels.get_mut(&partition) {
-                    reservation.lock().try_grow(size)?;
-
-                    if tx.send(Some(Ok(batch))).await.is_err() {
+                if let Some((tx, reservation, spill_manager)) =
+                    output_channels.get_mut(&partition)
+                {
+                    let (batch_to_send, is_memory_batch) =
+                        match reservation.lock().try_grow(size) {
+                            Ok(_) => {
+                                // Memory available - send in-memory batch
+                                (RepartitionBatch::Memory(batch), true)
+                            }
+                            Err(_) => {
+                                // We're memory limited - spill this single batch to its own file
+                                let spill_file = spill_manager
+                                    .spill_record_batch_and_finish(
+                                        &[batch],
+                                        &format!(
+                                            "RepartitionExec spill partition {partition}"
+                                        ),
+                                    )?
+                                    // Note that we handled empty batch above, so this is safe
+                                    .expect("non-empty batch should produce spill file");
+
+                                // Store size for validation when reading back
+                                (RepartitionBatch::Spilled { spill_file, size }, false)
+                            }
+                        };
+
+                    if tx.send(Some(Ok(batch_to_send))).await.is_err() {
                         // If the other end has hung up, it was an early shutdown (e.g. LIMIT)
-                        reservation.lock().shrink(size);
+                        // Only shrink memory if it was a memory batch
+                        if is_memory_batch {
+                            reservation.lock().shrink(size);
+                        }
                         output_channels.remove(&partition);
                     }
                 }
@@ -1093,6 +1188,13 @@ impl RepartitionExec {
     }
 }
 
+enum RepartitionStreamState {
+    /// Waiting for next item from channel
+    ReceivingFromChannel,
+    /// Reading a spilled batch from disk (stream reads via tokio::fs)
+    ReadingSpilledBatch(SendableRecordBatchStream),
+}
+
 struct RepartitionStream {
     /// Number of input partitions that will be sending batches to this output channel
     num_input_partitions: usize,
@@ -1111,6 +1213,12 @@ struct RepartitionStream {
 
     /// Memory reservation.
     reservation: SharedMemoryReservation,
+
+    /// Spill manager for reading spilled batches
+    spill_manager: Arc<SpillManager>,
+
+    /// Current state of the stream
+    state: RepartitionStreamState,
 }
 
 impl Stream for RepartitionStream {
@@ -1121,33 +1229,67 @@ impl Stream for RepartitionStream {
         cx: &mut Context<'_>,
     ) -> Poll<Option<Self::Item>> {
         loop {
-            match self.input.recv().poll_unpin(cx) {
-                Poll::Ready(Some(Some(v))) => {
-                    if let Ok(batch) = &v {
-                        self.reservation
-                            .lock()
-                            .shrink(batch.get_array_memory_size());
+            match &mut self.state {
+                RepartitionStreamState::ReceivingFromChannel => {
+                    let value = futures::ready!(self.input.recv().poll_unpin(cx));
+                    match value {
+                        Some(Some(v)) => match v {
+                            Ok(RepartitionBatch::Memory(batch)) => {
+                                // Release memory and return
+                                self.reservation
+                                    .lock()
+                                    .shrink(batch.get_array_memory_size());
+                                return Poll::Ready(Some(Ok(batch)));
+                            }
+                            Ok(RepartitionBatch::Spilled { spill_file, size }) => {
+                                // Read from disk - SpillReaderStream uses tokio::fs internally
+                                // Pass the original size for validation
+                                let stream = self
+                                    .spill_manager
+                                    .read_spill_as_stream(spill_file, Some(size))?;
+                                self.state =
+                                    RepartitionStreamState::ReadingSpilledBatch(stream);
+                                // Continue loop to poll the stream immediately
+                            }
+                            Err(e) => {
+                                return Poll::Ready(Some(Err(e)));
+                            }
+                        },
+                        Some(None) => {
+                            self.num_input_partitions_processed += 1;
+
+                            if self.num_input_partitions
+                                == self.num_input_partitions_processed
+                            {
+                                // all input partitions have finished sending batches
+                                return Poll::Ready(None);
+                            } else {
+                                // other partitions still have data to send
+                                continue;
+                            }
+                        }
+                        None => {
+                            return Poll::Ready(None);
+                        }
                     }
-
-                    return Poll::Ready(Some(v));
                 }
-                Poll::Ready(Some(None)) => {
-                    self.num_input_partitions_processed += 1;
-
-                    if self.num_input_partitions == self.num_input_partitions_processed {
-                        // all input partitions have finished sending batches
-                        return Poll::Ready(None);
-                    } else {
-                        // other partitions still have data to send
-                        continue;
+                RepartitionStreamState::ReadingSpilledBatch(stream) => {
+                    match futures::ready!(stream.poll_next_unpin(cx)) {
+                        Some(Ok(batch)) => {
+                            // Return batch and stay in ReadingSpilledBatch state to read more batches
+                            return Poll::Ready(Some(Ok(batch)));
+                        }
+                        Some(Err(e)) => {
+                            self.state = RepartitionStreamState::ReceivingFromChannel;
+                            return Poll::Ready(Some(Err(e)));
+                        }
+                        None => {
+                            // Spill stream ended - go back to receiving from channel
+                            self.state = RepartitionStreamState::ReceivingFromChannel;
+                            continue;
+                        }
                     }
                 }
-                Poll::Ready(None) => {
-                    return Poll::Ready(None);
-                }
-                Poll::Pending => {
-                    return Poll::Pending;
-                }
             }
         }
     }
@@ -1174,6 +1316,12 @@ struct PerPartitionStream {
 
     /// Memory reservation.
     reservation: SharedMemoryReservation,
+
+    /// Spill manager for reading spilled batches
+    spill_manager: Arc<SpillManager>,
+
+    /// Current state of the stream
+    state: RepartitionStreamState,
 }
 
 impl Stream for PerPartitionStream {
@@ -1183,21 +1331,59 @@ impl Stream for PerPartitionStream {
         mut self: Pin<&mut Self>,
         cx: &mut Context<'_>,
     ) -> Poll<Option<Self::Item>> {
-        match self.receiver.recv().poll_unpin(cx) {
-            Poll::Ready(Some(Some(v))) => {
-                if let Ok(batch) = &v {
-                    self.reservation
-                        .lock()
-                        .shrink(batch.get_array_memory_size());
+        loop {
+            match &mut self.state {
+                RepartitionStreamState::ReceivingFromChannel => {
+                    let value = futures::ready!(self.receiver.recv().poll_unpin(cx));
+                    match value {
+                        Some(Some(v)) => match v {
+                            Ok(RepartitionBatch::Memory(batch)) => {
+                                // Release memory and return
+                                self.reservation
+                                    .lock()
+                                    .shrink(batch.get_array_memory_size());
+                                return Poll::Ready(Some(Ok(batch)));
+                            }
+                            Ok(RepartitionBatch::Spilled { spill_file, size }) => {
+                                // Read from disk - SpillReaderStream uses tokio::fs internally
+                                // Pass the original size for validation
+                                let stream = self
+                                    .spill_manager
+                                    .read_spill_as_stream(spill_file, Some(size))?;
+                                self.state =
+                                    RepartitionStreamState::ReadingSpilledBatch(stream);
+                                // Continue loop to poll the stream immediately
+                            }
+                            Err(e) => {
+                                return Poll::Ready(Some(Err(e)));
+                            }
+                        },
+                        Some(None) => {
+                            // Input partition has finished sending batches
+                            return Poll::Ready(None);
+                        }
+                        None => return Poll::Ready(None),
+                    }
+                }
+
+                RepartitionStreamState::ReadingSpilledBatch(stream) => {
+                    match futures::ready!(stream.poll_next_unpin(cx)) {
+                        Some(Ok(batch)) => {
+                            // Return batch and stay in ReadingSpilledBatch state to read more batches
+                            return Poll::Ready(Some(Ok(batch)));
+                        }
+                        Some(Err(e)) => {
+                            self.state = RepartitionStreamState::ReceivingFromChannel;
+                            return Poll::Ready(Some(Err(e)));
+                        }
+                        None => {
+                            // Spill stream ended - go back to receiving from channel
+                            self.state = RepartitionStreamState::ReceivingFromChannel;
+                            continue;
+                        }
+                    }
                 }
-                Poll::Ready(Some(v))
-            }
-            Poll::Ready(Some(None)) => {
-                // Input partition has finished sending batches
-                Poll::Ready(None)
             }
-            Poll::Ready(None) => Poll::Ready(None),
-            Poll::Pending => Poll::Pending,
         }
     }
 }
@@ -1229,8 +1415,8 @@ mod tests {
     use arrow::array::{ArrayRef, StringArray, UInt32Array};
     use arrow::datatypes::{DataType, Field, Schema};
     use datafusion_common::cast::as_string_array;
+    use datafusion_common::exec_err;
     use datafusion_common::test_util::batches_to_sort_string;
-    use datafusion_common::{arrow_datafusion_err, exec_err};
     use datafusion_common_runtime::JoinSet;
     use datafusion_execution::runtime_env::RuntimeEnvBuilder;
     use insta::assert_snapshot;
@@ -1711,17 +1897,210 @@ mod tests {
         Ok(())
     }
 
+    #[tokio::test]
+    async fn repartition_with_spilling() -> Result<()> {
+        // Test that repartition successfully spills to disk when memory is constrained
+        let schema = test_schema();
+        let partition = create_vec_batches(50);
+        let input_partitions = vec![partition];
+        let partitioning = Partitioning::RoundRobinBatch(4);
+
+        // Set up context with very tight memory limit to force spilling
+        let runtime = RuntimeEnvBuilder::default()
+            .with_memory_limit(1, 1.0)
+            .build_arc()?;
+
+        let task_ctx = TaskContext::default().with_runtime(runtime);
+        let task_ctx = Arc::new(task_ctx);
+
+        // create physical plan
+        let exec =
+            TestMemoryExec::try_new_exec(&input_partitions, Arc::clone(&schema), None)?;
+        let exec = RepartitionExec::try_new(exec, partitioning)?;
+
+        // Collect all partitions - should succeed by spilling to disk
+        let mut total_rows = 0;
+        for i in 0..exec.partitioning().partition_count() {
+            let mut stream = exec.execute(i, Arc::clone(&task_ctx))?;
+            while let Some(result) = stream.next().await {
+                let batch = result?;
+                total_rows += batch.num_rows();
+            }
+        }
+
+        // Verify we got all the data (50 batches * 8 rows each)
+        assert_eq!(total_rows, 50 * 8);
+
+        // Verify spilling metrics to confirm spilling actually happened
+        let metrics = exec.metrics().unwrap();
+        assert!(
+            metrics.spill_count().unwrap() > 0,
+            "Expected spill_count > 0, but got {:?}",
+            metrics.spill_count()
+        );
+        println!("Spilled {} times", metrics.spill_count().unwrap());
+        assert!(
+            metrics.spilled_bytes().unwrap() > 0,
+            "Expected spilled_bytes > 0, but got {:?}",
+            metrics.spilled_bytes()
+        );
+        println!(
+            "Spilled {} bytes in {} spills",
+            metrics.spilled_bytes().unwrap(),
+            metrics.spill_count().unwrap()
+        );
+        assert!(
+            metrics.spilled_rows().unwrap() > 0,
+            "Expected spilled_rows > 0, but got {:?}",
+            metrics.spilled_rows()
+        );
+        println!("Spilled {} rows", metrics.spilled_rows().unwrap());
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn repartition_with_partial_spilling() -> Result<()> {
+        // Test that repartition can handle partial spilling (some batches in memory, some spilled)
+        let schema = test_schema();
+        let partition = create_vec_batches(50);
+        let input_partitions = vec![partition];
+        let partitioning = Partitioning::RoundRobinBatch(4);
+
+        // Set up context with moderate memory limit to force partial spilling
+        // 2KB should allow some batches in memory but force others to spill
+        let runtime = RuntimeEnvBuilder::default()
+            .with_memory_limit(2 * 1024, 1.0)
+            .build_arc()?;
+
+        let task_ctx = TaskContext::default().with_runtime(runtime);
+        let task_ctx = Arc::new(task_ctx);
+
+        // create physical plan
+        let exec =
+            TestMemoryExec::try_new_exec(&input_partitions, Arc::clone(&schema), None)?;
+        let exec = RepartitionExec::try_new(exec, partitioning)?;
+
+        // Collect all partitions - should succeed with partial spilling
+        let mut total_rows = 0;
+        for i in 0..exec.partitioning().partition_count() {
+            let mut stream = exec.execute(i, Arc::clone(&task_ctx))?;
+            while let Some(result) = stream.next().await {
+                let batch = result?;
+                total_rows += batch.num_rows();
+            }
+        }
+
+        // Verify we got all the data (50 batches * 8 rows each)
+        assert_eq!(total_rows, 50 * 8);
+
+        // Verify partial spilling metrics
+        let metrics = exec.metrics().unwrap();
+        let spill_count = metrics.spill_count().unwrap();
+        let spilled_rows = metrics.spilled_rows().unwrap();
+        let spilled_bytes = metrics.spilled_bytes().unwrap();
+
+        assert!(
+            spill_count > 0,
+            "Expected some spilling to occur, but got spill_count={spill_count}"
+        );
+        assert!(
+            spilled_rows > 0 && spilled_rows < total_rows,
+            "Expected partial spilling (0 < spilled_rows < {total_rows}), but got spilled_rows={spilled_rows}"
+        );
+        assert!(
+            spilled_bytes > 0,
+            "Expected some bytes to be spilled, but got spilled_bytes={spilled_bytes}"
+        );
+
+        println!(
+            "Partial spilling: spilled {} out of {} rows ({:.1}%) in {} spills, {} bytes",
+            spilled_rows,
+            total_rows,
+            (spilled_rows as f64 / total_rows as f64) * 100.0,
+            spill_count,
+            spilled_bytes
+        );
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn repartition_without_spilling() -> Result<()> {
+        // Test that repartition does not spill when there's ample memory
+        let schema = test_schema();
+        let partition = create_vec_batches(50);
+        let input_partitions = vec![partition];
+        let partitioning = Partitioning::RoundRobinBatch(4);
+
+        // Set up context with generous memory limit - no spilling should occur
+        let runtime = RuntimeEnvBuilder::default()
+            .with_memory_limit(10 * 1024 * 1024, 1.0) // 10MB
+            .build_arc()?;
+
+        let task_ctx = TaskContext::default().with_runtime(runtime);
+        let task_ctx = Arc::new(task_ctx);
+
+        // create physical plan
+        let exec =
+            TestMemoryExec::try_new_exec(&input_partitions, Arc::clone(&schema), None)?;
+        let exec = RepartitionExec::try_new(exec, partitioning)?;
+
+        // Collect all partitions - should succeed without spilling
+        let mut total_rows = 0;
+        for i in 0..exec.partitioning().partition_count() {
+            let mut stream = exec.execute(i, Arc::clone(&task_ctx))?;
+            while let Some(result) = stream.next().await {
+                let batch = result?;
+                total_rows += batch.num_rows();
+            }
+        }
+
+        // Verify we got all the data (50 batches * 8 rows each)
+        assert_eq!(total_rows, 50 * 8);
+
+        // Verify no spilling occurred
+        let metrics = exec.metrics().unwrap();
+        assert_eq!(
+            metrics.spill_count(),
+            Some(0),
+            "Expected no spilling, but got spill_count={:?}",
+            metrics.spill_count()
+        );
+        assert_eq!(
+            metrics.spilled_bytes(),
+            Some(0),
+            "Expected no bytes spilled, but got spilled_bytes={:?}",
+            metrics.spilled_bytes()
+        );
+        assert_eq!(
+            metrics.spilled_rows(),
+            Some(0),
+            "Expected no rows spilled, but got spilled_rows={:?}",
+            metrics.spilled_rows()
+        );
+
+        println!("No spilling occurred - all data processed in memory");
+
+        Ok(())
+    }
+
     #[tokio::test]
     async fn oom() -> Result<()> {
-        // define input partitions
+        use datafusion_execution::disk_manager::{DiskManagerBuilder, DiskManagerMode};
+
+        // Test that repartition fails with OOM when disk manager is disabled
         let schema = test_schema();
         let partition = create_vec_batches(50);
         let input_partitions = vec![partition];
         let partitioning = Partitioning::RoundRobinBatch(4);
 
-        // setup up context
+        // Setup context with memory limit but NO disk manager (explicitly disabled)
         let runtime = RuntimeEnvBuilder::default()
             .with_memory_limit(1, 1.0)
+            .with_disk_manager_builder(
+                DiskManagerBuilder::default().with_mode(DiskManagerMode::Disabled),
+            )
             .build_arc()?;
 
         let task_ctx = TaskContext::default().with_runtime(runtime);
@@ -1732,11 +2111,10 @@ mod tests {
             TestMemoryExec::try_new_exec(&input_partitions, Arc::clone(&schema), None)?;
         let exec = RepartitionExec::try_new(exec, partitioning)?;
 
-        // pull partitions
+        // Attempt to execute - should fail with ResourcesExhausted error
         for i in 0..exec.partitioning().partition_count() {
             let mut stream = exec.execute(i, Arc::clone(&task_ctx))?;
-            let err =
-                arrow_datafusion_err!(stream.next().await.unwrap().unwrap_err().into());
+            let err = stream.next().await.unwrap().unwrap_err();
             let err = err.find_root();
             assert!(
                 matches!(err, DataFusionError::ResourcesExhausted(_)),
@@ -1782,16 +2160,12 @@ mod test {
     /// `$PLAN`: the plan to optimized
     ///
     macro_rules! assert_plan {
-        ($EXPECTED_PLAN_LINES: expr,  $PLAN: expr) => {
+        ($PLAN: expr,  @ $EXPECTED: expr) => {
             let formatted = crate::displayable($PLAN).indent(true).to_string();
-            let actual: Vec<&str> = formatted.trim().lines().collect();
-
-            let expected_plan_lines: Vec<&str> = $EXPECTED_PLAN_LINES
-                .iter().map(|s| *s).collect();
 
-            assert_eq!(
-                expected_plan_lines, actual,
-                "\n**Original Plan Mismatch\n\nexpected:\n\n{expected_plan_lines:#?}\nactual:\n\n{actual:#?}\n\n"
+            insta::assert_snapshot!(
+                formatted,
+                @$EXPECTED
             );
         };
     }
@@ -1808,13 +2182,12 @@ mod test {
             .with_preserve_order();
 
         // Repartition should preserve order
-        let expected_plan = [
-            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2, preserve_order=true, sort_exprs=c0@0 ASC",
-            "  UnionExec",
-            "    DataSourceExec: partitions=1, partition_sizes=[0], output_ordering=c0@0 ASC",
-            "    DataSourceExec: partitions=1, partition_sizes=[0], output_ordering=c0@0 ASC",
-        ];
-        assert_plan!(expected_plan, &exec);
+        assert_plan!(&exec, @r"
+        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2, preserve_order=true, sort_exprs=c0@0 ASC
+          UnionExec
+            DataSourceExec: partitions=1, partition_sizes=[0], output_ordering=c0@0 ASC
+            DataSourceExec: partitions=1, partition_sizes=[0], output_ordering=c0@0 ASC
+        ");
         Ok(())
     }
 
@@ -1824,16 +2197,15 @@ mod test {
         let sort_exprs = sort_exprs(&schema);
         let source = sorted_memory_exec(&schema, sort_exprs);
         // output is sorted, but has only a single partition, so no need to sort
-        let exec = RepartitionExec::try_new(source, Partitioning::RoundRobinBatch(10))
-            .unwrap()
+        let exec = RepartitionExec::try_new(source, Partitioning::RoundRobinBatch(10))?
             .with_preserve_order();
 
         // Repartition should not preserve order
-        let expected_plan = [
-            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "  DataSourceExec: partitions=1, partition_sizes=[0], output_ordering=c0@0 ASC",
-        ];
-        assert_plan!(expected_plan, &exec);
+        assert_plan!(&exec, @r"
+        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+          DataSourceExec: partitions=1, partition_sizes=[0], output_ordering=c0@0 ASC
+        ");
+
         Ok(())
     }
 
@@ -1848,13 +2220,12 @@ mod test {
             .with_preserve_order();
 
         // Repartition should not preserve order, as there is no order to preserve
-        let expected_plan = [
-            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2",
-            "  UnionExec",
-            "    DataSourceExec: partitions=1, partition_sizes=[0]",
-            "    DataSourceExec: partitions=1, partition_sizes=[0]",
-        ];
-        assert_plan!(expected_plan, &exec);
+        assert_plan!(&exec, @r"
+        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2
+          UnionExec
+            DataSourceExec: partitions=1, partition_sizes=[0]
+            DataSourceExec: partitions=1, partition_sizes=[0]
+        ");
         Ok(())
     }
 
@@ -1869,11 +2240,10 @@ mod test {
             .unwrap();
 
         // Repartition should not preserve order
-        let expected_plan = [
-            "RepartitionExec: partitioning=RoundRobinBatch(20), input_partitions=1",
-            "  DataSourceExec: partitions=1, partition_sizes=[0], output_ordering=c0@0 ASC",
-        ];
-        assert_plan!(expected_plan, exec.as_ref());
+        assert_plan!(exec.as_ref(), @r"
+        RepartitionExec: partitioning=RoundRobinBatch(20), input_partitions=1
+          DataSourceExec: partitions=1, partition_sizes=[0], output_ordering=c0@0 ASC
+        ");
         Ok(())
     }
 
diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs
index 7f47d60c735a..bd798ab4f54b 100644
--- a/datafusion/physical-plan/src/sorts/sort.rs
+++ b/datafusion/physical-plan/src/sorts/sort.rs
@@ -1355,7 +1355,7 @@ impl ExecutionPlan for SortExec {
             ChildFilterDescription::from_child(&parent_filters, self.input())?;
 
         if let Some(filter) = &self.filter {
-            if config.optimizer.enable_dynamic_filter_pushdown {
+            if config.optimizer.enable_topk_dynamic_filter_pushdown {
                 child = child.with_self_filter(filter.read().expr());
             }
         }
diff --git a/datafusion/physical-plan/src/unnest.rs b/datafusion/physical-plan/src/unnest.rs
index e36cd2b6c242..026a7fbcd0e5 100644
--- a/datafusion/physical-plan/src/unnest.rs
+++ b/datafusion/physical-plan/src/unnest.rs
@@ -32,8 +32,8 @@ use crate::{
 };
 
 use arrow::array::{
-    new_null_array, Array, ArrayRef, AsArray, FixedSizeListArray, Int64Array,
-    LargeListArray, ListArray, PrimitiveArray, Scalar, StructArray,
+    new_null_array, Array, ArrayRef, AsArray, BooleanBufferBuilder, FixedSizeListArray,
+    Int64Array, LargeListArray, ListArray, PrimitiveArray, Scalar, StructArray,
 };
 use arrow::compute::kernels::length::length;
 use arrow::compute::kernels::zip::zip;
@@ -43,16 +43,19 @@ use arrow::record_batch::RecordBatch;
 use arrow_ord::cmp::lt;
 use async_trait::async_trait;
 use datafusion_common::{
-    exec_datafusion_err, exec_err, internal_err, HashMap, HashSet, Result, UnnestOptions,
+    exec_datafusion_err, exec_err, internal_err, Constraints, HashMap, HashSet, Result,
+    UnnestOptions,
 };
 use datafusion_execution::TaskContext;
-use datafusion_physical_expr::EquivalenceProperties;
+use datafusion_physical_expr::equivalence::ProjectionMapping;
+use datafusion_physical_expr::expressions::Column;
+use datafusion_physical_expr::PhysicalExpr;
 use futures::{Stream, StreamExt};
 use log::trace;
 
 /// Unnest the given columns (either with type struct or list)
-/// For list unnesting, each rows is vertically transformed into multiple rows
-/// For struct unnesting, each columns is horizontally transformed into multiple columns,
+/// For list unnesting, each row is vertically transformed into multiple rows
+/// For struct unnesting, each column is horizontally transformed into multiple columns,
 /// Thus the original RecordBatch with dimension (n x m) may have new dimension (n' x m')
 ///
 /// See [`UnnestOptions`] for more details and an example.
@@ -82,10 +85,15 @@ impl UnnestExec {
         struct_column_indices: Vec<usize>,
         schema: SchemaRef,
         options: UnnestOptions,
-    ) -> Self {
-        let cache = Self::compute_properties(&input, Arc::clone(&schema));
+    ) -> Result<Self> {
+        let cache = Self::compute_properties(
+            &input,
+            &list_column_indices,
+            &struct_column_indices,
+            Arc::clone(&schema),
+        )?;
 
-        UnnestExec {
+        Ok(UnnestExec {
             input,
             schema,
             list_column_indices,
@@ -93,20 +101,78 @@ impl UnnestExec {
             options,
             metrics: Default::default(),
             cache,
-        }
+        })
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn compute_properties(
         input: &Arc<dyn ExecutionPlan>,
+        list_column_indices: &[ListUnnest],
+        struct_column_indices: &[usize],
         schema: SchemaRef,
-    ) -> PlanProperties {
-        PlanProperties::new(
-            EquivalenceProperties::new(schema),
-            input.output_partitioning().to_owned(),
+    ) -> Result<PlanProperties> {
+        // Find out which indices are not unnested, such that they can be copied over from the input plan
+        let input_schema = input.schema();
+        let mut unnested_indices = BooleanBufferBuilder::new(input_schema.fields().len());
+        unnested_indices.append_n(input_schema.fields().len(), false);
+        for list_unnest in list_column_indices {
+            unnested_indices.set_bit(list_unnest.index_in_input_schema, true);
+        }
+        for struct_unnest in struct_column_indices {
+            unnested_indices.set_bit(*struct_unnest, true)
+        }
+        let unnested_indices = unnested_indices.finish();
+        let non_unnested_indices: Vec<usize> = (0..input_schema.fields().len())
+            .filter(|idx| !unnested_indices.value(*idx))
+            .collect();
+
+        // Manually build projection mapping from non-unnested input columns to their positions in the output
+        let input_schema = input.schema();
+        let projection_mapping: ProjectionMapping = non_unnested_indices
+            .iter()
+            .map(|&input_idx| {
+                // Find what index the input column has in the output schema
+                let input_field = input_schema.field(input_idx);
+                let output_idx = schema
+                    .fields()
+                    .iter()
+                    .position(|output_field| output_field.name() == input_field.name())
+                    .ok_or_else(|| {
+                        exec_datafusion_err!(
+                            "Non-unnested column '{}' must exist in output schema",
+                            input_field.name()
+                        )
+                    })?;
+
+                let input_col = Arc::new(Column::new(input_field.name(), input_idx))
+                    as Arc<dyn PhysicalExpr>;
+                let target_col = Arc::new(Column::new(input_field.name(), output_idx))
+                    as Arc<dyn PhysicalExpr>;
+                // Use From<Vec<(Arc<dyn PhysicalExpr>, usize)>> for ProjectionTargets
+                let targets = vec![(target_col, output_idx)].into();
+                Ok((input_col, targets))
+            })
+            .collect::<Result<ProjectionMapping>>()?;
+
+        // Create the unnest's equivalence properties by copying the input plan's equivalence properties
+        // for the unaffected columns. Except for the constraints, which are removed entirely because
+        // the unnest operation invalidates any global uniqueness or primary-key constraints.
+        let input_eq_properties = input.equivalence_properties();
+        let eq_properties = input_eq_properties
+            .project(&projection_mapping, Arc::clone(&schema))
+            .with_constraints(Constraints::default());
+
+        // Output partitioning must use the projection mapping
+        let output_partitioning = input
+            .output_partitioning()
+            .project(&projection_mapping, &eq_properties);
+
+        Ok(PlanProperties::new(
+            eq_properties,
+            output_partitioning,
             input.pipeline_behavior(),
             input.boundedness(),
-        )
+        ))
     }
 
     /// Input execution plan
@@ -173,7 +239,7 @@ impl ExecutionPlan for UnnestExec {
             self.struct_column_indices.clone(),
             Arc::clone(&self.schema),
             self.options.clone(),
-        )))
+        )?))
     }
 
     fn required_input_distribution(&self) -> Vec<Distribution> {
diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
index 891fd0ae4851..a76316369ec7 100644
--- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
@@ -1696,7 +1696,7 @@ mod tests {
 
         // Get string representation of the plan
         assert_snapshot!(displayable(physical_plan.as_ref()).indent(true), @r#"
-        BoundedWindowAggExec: wdw=[last: Field { name: "last", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, nth_value(-1): Field { name: "nth_value(-1)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, nth_value(-2): Field { name: "nth_value(-2)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        BoundedWindowAggExec: wdw=[last: Field { "last": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, nth_value(-1): Field { "nth_value(-1)": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, nth_value(-2): Field { "nth_value(-2)": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
           DataSourceExec: partitions=1, partition_sizes=[3]
         "#);
 
@@ -1814,7 +1814,7 @@ mod tests {
         // Get string representation of the plan
         assert_snapshot!(displayable(plan.as_ref()).indent(true), @r#"
         ProjectionExec: expr=[sn@0 as sn, hash@1 as hash, count([Column { name: "sn", index: 0 }]) PARTITION BY: [[Column { name: "hash", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: "sn", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]@2 as col_2]
-          BoundedWindowAggExec: wdw=[count([Column { name: "sn", index: 0 }]) PARTITION BY: [[Column { name: "hash", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: "sn", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]: Field { name: "count([Column { name: \"sn\", index: 0 }]) PARTITION BY: [[Column { name: \"hash\", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: \"sn\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING], mode=[Linear]
+          BoundedWindowAggExec: wdw=[count([Column { name: "sn", index: 0 }]) PARTITION BY: [[Column { name: "hash", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: "sn", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]: Field { "count([Column { name: \"sn\", index: 0 }]) PARTITION BY: [[Column { name: \"hash\", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: \"sn\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]": Int64 }, frame: RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING], mode=[Linear]
             StreamingTableExec: partition_sizes=1, projection=[sn, hash], infinite_source=true, output_ordering=[sn@0 ASC NULLS LAST]
         "#);
 
diff --git a/datafusion/proto-common/src/from_proto/mod.rs b/datafusion/proto-common/src/from_proto/mod.rs
index 2d07fb841021..4ede5b970eae 100644
--- a/datafusion/proto-common/src/from_proto/mod.rs
+++ b/datafusion/proto-common/src/from_proto/mod.rs
@@ -138,11 +138,17 @@ where
     }
 }
 
+impl From<protobuf::ColumnRelation> for TableReference {
+    fn from(rel: protobuf::ColumnRelation) -> Self {
+        Self::parse_str_normalized(rel.relation.as_str(), true)
+    }
+}
+
 impl From<protobuf::Column> for Column {
     fn from(c: protobuf::Column) -> Self {
         let protobuf::Column { relation, name } = c;
 
-        Self::new(relation.map(|r| r.relation), name)
+        Self::new(relation, name)
     }
 }
 
@@ -164,10 +170,7 @@ impl TryFrom<&protobuf::DfSchema> for DFSchema {
             .map(|df_field| {
                 let field: Field = df_field.field.as_ref().required("field")?;
                 Ok((
-                    df_field
-                        .qualifier
-                        .as_ref()
-                        .map(|q| q.relation.clone().into()),
+                    df_field.qualifier.as_ref().map(|q| q.clone().into()),
                     Arc::new(field),
                 ))
             })
diff --git a/datafusion/proto-common/src/to_proto/mod.rs b/datafusion/proto-common/src/to_proto/mod.rs
index 8e4131479e50..e9de1d9e9a9e 100644
--- a/datafusion/proto-common/src/to_proto/mod.rs
+++ b/datafusion/proto-common/src/to_proto/mod.rs
@@ -28,7 +28,9 @@ use arrow::datatypes::{
     DataType, Field, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit, Schema,
     SchemaRef, TimeUnit, UnionMode,
 };
-use arrow::ipc::writer::{DictionaryTracker, IpcDataGenerator};
+use arrow::ipc::writer::{
+    CompressionContext, DictionaryTracker, IpcDataGenerator, IpcWriteOptions,
+};
 use datafusion_common::{
     config::{
         CsvOptions, JsonOptions, ParquetColumnOptions, ParquetOptions,
@@ -1018,8 +1020,15 @@ fn encode_scalar_nested_value(
 
     let gen = IpcDataGenerator {};
     let mut dict_tracker = DictionaryTracker::new(false);
+    let write_options = IpcWriteOptions::default();
+    let mut compression_context = CompressionContext::default();
     let (encoded_dictionaries, encoded_message) = gen
-        .encoded_batch(&batch, &mut dict_tracker, &Default::default())
+        .encode(
+            &batch,
+            &mut dict_tracker,
+            &write_options,
+            &mut compression_context,
+        )
         .map_err(|e| {
             Error::General(format!("Error encoding ScalarValue::List as IPC: {e}"))
         })?;
diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml
index c1d894a6c062..920e277b8ccc 100644
--- a/datafusion/proto/Cargo.toml
+++ b/datafusion/proto/Cargo.toml
@@ -40,15 +40,31 @@ name = "datafusion_proto"
 [features]
 default = ["parquet"]
 json = ["pbjson", "serde", "serde_json", "datafusion-proto-common/json"]
-parquet = ["datafusion/parquet", "datafusion-common/parquet"]
-avro = ["datafusion/avro", "datafusion-common/avro"]
+parquet = ["datafusion-datasource-parquet", "datafusion-common/parquet", "datafusion/parquet"]
+avro = ["datafusion-datasource-avro", "datafusion-common/avro"]
+
+# Note to developers: do *not* add `datafusion` as a dependency in
+# this crate. See https://github.com/apache/datafusion/issues/17713
+# for additional information.
 
 [dependencies]
 arrow = { workspace = true }
 chrono = { workspace = true }
-datafusion = { workspace = true, default-features = false }
+datafusion-catalog = { workspace = true }
+datafusion-catalog-listing = { workspace = true }
 datafusion-common = { workspace = true }
+datafusion-datasource = { workspace = true }
+datafusion-datasource-arrow = { workspace = true }
+datafusion-datasource-avro = { workspace = true, optional = true }
+datafusion-datasource-csv = { workspace = true }
+datafusion-datasource-json = { workspace = true }
+datafusion-datasource-parquet = { workspace = true, optional = true }
+datafusion-execution = { workspace = true }
 datafusion-expr = { workspace = true }
+datafusion-functions-table = { workspace = true }
+datafusion-physical-expr = { workspace = true }
+datafusion-physical-expr-common = { workspace = true }
+datafusion-physical-plan = { workspace = true }
 datafusion-proto-common = { workspace = true }
 object_store = { workspace = true }
 pbjson = { workspace = true, optional = true }
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index ee9ac0e7902d..f9400d14a59c 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -179,8 +179,11 @@ message CreateExternalTableNode {
 
 message PrepareNode {
   string name = 1;
+  // We serialize both the data types and the fields for compatibility with
+  // older versions (newer versions populate both).
   repeated datafusion_common.ArrowType data_types = 2;
   LogicalPlanNode input = 3;
+  repeated datafusion_common.Field fields = 4;
 }
 
 message CreateCatalogSchemaNode {
@@ -412,7 +415,11 @@ message Wildcard {
 
 message PlaceholderNode {
   string id = 1;
+  // We serialize the data type, metadata, and nullability separately to maintain
+  // compatibility with older versions
   datafusion_common.ArrowType data_type = 2;
+  optional bool nullable = 3;
+  map<string, string> metadata = 4;
 }
 
 message LogicalExprList {
@@ -518,6 +525,7 @@ message AggregateUDFExprNode {
   LogicalExprNode filter = 3;
   repeated SortExprNode order_by = 4;
   optional bytes fun_definition = 6;
+  optional NullTreatment null_treatment = 7;
 }
 
 message ScalarUDFExprNode {
@@ -538,6 +546,9 @@ message WindowExprNode {
   // repeated LogicalExprNode filter = 7;
   WindowFrame window_frame = 8;
   optional bytes fun_definition = 10;
+  optional NullTreatment null_treatment = 11;
+  bool distinct = 12;
+  LogicalExprNode filter = 13;
 }
 
 message BetweenNode {
@@ -622,6 +633,11 @@ message WindowFrameBound {
   datafusion_common.ScalarValue bound_value = 2;
 }
 
+enum NullTreatment {
+  RESPECT_NULLS = 0;
+  IGNORE_NULLS = 1;
+}
+
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 // Arrow Data Types
 ///////////////////////////////////////////////////////////////////////////////////////////////////
@@ -1365,4 +1381,4 @@ message SortMergeJoinExecNode {
   JoinFilter filter = 5;
   repeated SortExprNode sort_options = 6;
   datafusion_common.NullEquality null_equality = 7;
-}
\ No newline at end of file
+}
diff --git a/datafusion/proto/src/bytes/mod.rs b/datafusion/proto/src/bytes/mod.rs
index 5b07e59e807f..6eab2239015a 100644
--- a/datafusion/proto/src/bytes/mod.rs
+++ b/datafusion/proto/src/bytes/mod.rs
@@ -24,8 +24,8 @@ use crate::physical_plan::{
     AsExecutionPlan, DefaultPhysicalExtensionCodec, PhysicalExtensionCodec,
 };
 use crate::protobuf;
-use datafusion::execution::TaskContext;
 use datafusion_common::{plan_datafusion_err, Result};
+use datafusion_execution::TaskContext;
 use datafusion_expr::{
     create_udaf, create_udf, create_udwf, AggregateUDF, Expr, LogicalPlan, Volatility,
     WindowUDF,
@@ -37,10 +37,9 @@ use prost::{
 use std::sync::Arc;
 
 // Reexport Bytes which appears in the API
-use datafusion::execution::registry::FunctionRegistry;
-use datafusion::physical_plan::ExecutionPlan;
-use datafusion::prelude::SessionContext;
+use datafusion_execution::registry::FunctionRegistry;
 use datafusion_expr::planner::ExprPlanner;
+use datafusion_physical_plan::ExecutionPlan;
 
 mod registry;
 
@@ -240,16 +239,13 @@ pub fn logical_plan_to_json_with_extension_codec(
 
 /// Deserialize a LogicalPlan from JSON
 #[cfg(feature = "json")]
-pub fn logical_plan_from_json(json: &str, ctx: &SessionContext) -> Result<LogicalPlan> {
+pub fn logical_plan_from_json(json: &str, ctx: &TaskContext) -> Result<LogicalPlan> {
     let extension_codec = DefaultLogicalExtensionCodec {};
     logical_plan_from_json_with_extension_codec(json, ctx, &extension_codec)
 }
 
 /// Deserialize a LogicalPlan from bytes
-pub fn logical_plan_from_bytes(
-    bytes: &[u8],
-    ctx: &SessionContext,
-) -> Result<LogicalPlan> {
+pub fn logical_plan_from_bytes(bytes: &[u8], ctx: &TaskContext) -> Result<LogicalPlan> {
     let extension_codec = DefaultLogicalExtensionCodec {};
     logical_plan_from_bytes_with_extension_codec(bytes, ctx, &extension_codec)
 }
@@ -257,7 +253,7 @@ pub fn logical_plan_from_bytes(
 /// Deserialize a LogicalPlan from bytes
 pub fn logical_plan_from_bytes_with_extension_codec(
     bytes: &[u8],
-    ctx: &SessionContext,
+    ctx: &TaskContext,
     extension_codec: &dyn LogicalExtensionCodec,
 ) -> Result<LogicalPlan> {
     let protobuf = protobuf::LogicalPlanNode::decode(bytes)
@@ -269,7 +265,7 @@ pub fn logical_plan_from_bytes_with_extension_codec(
 #[cfg(feature = "json")]
 pub fn logical_plan_from_json_with_extension_codec(
     json: &str,
-    ctx: &SessionContext,
+    ctx: &TaskContext,
     extension_codec: &dyn LogicalExtensionCodec,
 ) -> Result<LogicalPlan> {
     let back: protobuf::LogicalPlanNode = serde_json::from_str(json)
@@ -312,12 +308,12 @@ pub fn physical_plan_to_bytes_with_extension_codec(
 #[cfg(feature = "json")]
 pub fn physical_plan_from_json(
     json: &str,
-    ctx: &SessionContext,
+    ctx: &TaskContext,
 ) -> Result<Arc<dyn ExecutionPlan>> {
     let back: protobuf::PhysicalPlanNode = serde_json::from_str(json)
         .map_err(|e| plan_datafusion_err!("Error serializing plan: {e}"))?;
     let extension_codec = DefaultPhysicalExtensionCodec {};
-    back.try_into_physical_plan(&ctx.task_ctx(), &extension_codec)
+    back.try_into_physical_plan(ctx, &extension_codec)
 }
 
 /// Deserialize a PhysicalPlan from bytes
diff --git a/datafusion/proto/src/bytes/registry.rs b/datafusion/proto/src/bytes/registry.rs
index 5d46d41f793e..087e073db21a 100644
--- a/datafusion/proto/src/bytes/registry.rs
+++ b/datafusion/proto/src/bytes/registry.rs
@@ -17,9 +17,9 @@
 
 use std::{collections::HashSet, sync::Arc};
 
-use datafusion::execution::registry::FunctionRegistry;
 use datafusion_common::plan_err;
 use datafusion_common::Result;
+use datafusion_execution::registry::FunctionRegistry;
 use datafusion_expr::planner::ExprPlanner;
 use datafusion_expr::{AggregateUDF, ScalarUDF, WindowUDF};
 
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index 29967d812000..4cf834d0601e 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -596,6 +596,9 @@ impl serde::Serialize for AggregateUdfExprNode {
         if self.fun_definition.is_some() {
             len += 1;
         }
+        if self.null_treatment.is_some() {
+            len += 1;
+        }
         let mut struct_ser = serializer.serialize_struct("datafusion.AggregateUDFExprNode", len)?;
         if !self.fun_name.is_empty() {
             struct_ser.serialize_field("funName", &self.fun_name)?;
@@ -617,6 +620,11 @@ impl serde::Serialize for AggregateUdfExprNode {
             #[allow(clippy::needless_borrows_for_generic_args)]
             struct_ser.serialize_field("funDefinition", pbjson::private::base64::encode(&v).as_str())?;
         }
+        if let Some(v) = self.null_treatment.as_ref() {
+            let v = NullTreatment::try_from(*v)
+                .map_err(|_| serde::ser::Error::custom(format!("Invalid variant {}", *v)))?;
+            struct_ser.serialize_field("nullTreatment", &v)?;
+        }
         struct_ser.end()
     }
 }
@@ -636,6 +644,8 @@ impl<'de> serde::Deserialize<'de> for AggregateUdfExprNode {
             "orderBy",
             "fun_definition",
             "funDefinition",
+            "null_treatment",
+            "nullTreatment",
         ];
 
         #[allow(clippy::enum_variant_names)]
@@ -646,6 +656,7 @@ impl<'de> serde::Deserialize<'de> for AggregateUdfExprNode {
             Filter,
             OrderBy,
             FunDefinition,
+            NullTreatment,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -673,6 +684,7 @@ impl<'de> serde::Deserialize<'de> for AggregateUdfExprNode {
                             "filter" => Ok(GeneratedField::Filter),
                             "orderBy" | "order_by" => Ok(GeneratedField::OrderBy),
                             "funDefinition" | "fun_definition" => Ok(GeneratedField::FunDefinition),
+                            "nullTreatment" | "null_treatment" => Ok(GeneratedField::NullTreatment),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -698,6 +710,7 @@ impl<'de> serde::Deserialize<'de> for AggregateUdfExprNode {
                 let mut filter__ = None;
                 let mut order_by__ = None;
                 let mut fun_definition__ = None;
+                let mut null_treatment__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
                         GeneratedField::FunName => {
@@ -738,6 +751,12 @@ impl<'de> serde::Deserialize<'de> for AggregateUdfExprNode {
                                 map_.next_value::<::std::option::Option<::pbjson::private::BytesDeserialize<_>>>()?.map(|x| x.0)
                             ;
                         }
+                        GeneratedField::NullTreatment => {
+                            if null_treatment__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("nullTreatment"));
+                            }
+                            null_treatment__ = map_.next_value::<::std::option::Option<NullTreatment>>()?.map(|x| x as i32);
+                        }
                     }
                 }
                 Ok(AggregateUdfExprNode {
@@ -747,6 +766,7 @@ impl<'de> serde::Deserialize<'de> for AggregateUdfExprNode {
                     filter: filter__,
                     order_by: order_by__.unwrap_or_default(),
                     fun_definition: fun_definition__,
+                    null_treatment: null_treatment__,
                 })
             }
         }
@@ -13284,6 +13304,77 @@ impl<'de> serde::Deserialize<'de> for Not {
         deserializer.deserialize_struct("datafusion.Not", FIELDS, GeneratedVisitor)
     }
 }
+impl serde::Serialize for NullTreatment {
+    #[allow(deprecated)]
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        let variant = match self {
+            Self::RespectNulls => "RESPECT_NULLS",
+            Self::IgnoreNulls => "IGNORE_NULLS",
+        };
+        serializer.serialize_str(variant)
+    }
+}
+impl<'de> serde::Deserialize<'de> for NullTreatment {
+    #[allow(deprecated)]
+    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        const FIELDS: &[&str] = &[
+            "RESPECT_NULLS",
+            "IGNORE_NULLS",
+        ];
+
+        struct GeneratedVisitor;
+
+        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+            type Value = NullTreatment;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                write!(formatter, "expected one of: {:?}", &FIELDS)
+            }
+
+            fn visit_i64<E>(self, v: i64) -> std::result::Result<Self::Value, E>
+            where
+                E: serde::de::Error,
+            {
+                i32::try_from(v)
+                    .ok()
+                    .and_then(|x| x.try_into().ok())
+                    .ok_or_else(|| {
+                        serde::de::Error::invalid_value(serde::de::Unexpected::Signed(v), &self)
+                    })
+            }
+
+            fn visit_u64<E>(self, v: u64) -> std::result::Result<Self::Value, E>
+            where
+                E: serde::de::Error,
+            {
+                i32::try_from(v)
+                    .ok()
+                    .and_then(|x| x.try_into().ok())
+                    .ok_or_else(|| {
+                        serde::de::Error::invalid_value(serde::de::Unexpected::Unsigned(v), &self)
+                    })
+            }
+
+            fn visit_str<E>(self, value: &str) -> std::result::Result<Self::Value, E>
+            where
+                E: serde::de::Error,
+            {
+                match value {
+                    "RESPECT_NULLS" => Ok(NullTreatment::RespectNulls),
+                    "IGNORE_NULLS" => Ok(NullTreatment::IgnoreNulls),
+                    _ => Err(serde::de::Error::unknown_variant(value, FIELDS)),
+                }
+            }
+        }
+        deserializer.deserialize_any(GeneratedVisitor)
+    }
+}
 impl serde::Serialize for OptimizedLogicalPlanType {
     #[allow(deprecated)]
     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
@@ -18343,6 +18434,12 @@ impl serde::Serialize for PlaceholderNode {
         if self.data_type.is_some() {
             len += 1;
         }
+        if self.nullable.is_some() {
+            len += 1;
+        }
+        if !self.metadata.is_empty() {
+            len += 1;
+        }
         let mut struct_ser = serializer.serialize_struct("datafusion.PlaceholderNode", len)?;
         if !self.id.is_empty() {
             struct_ser.serialize_field("id", &self.id)?;
@@ -18350,6 +18447,12 @@ impl serde::Serialize for PlaceholderNode {
         if let Some(v) = self.data_type.as_ref() {
             struct_ser.serialize_field("dataType", v)?;
         }
+        if let Some(v) = self.nullable.as_ref() {
+            struct_ser.serialize_field("nullable", v)?;
+        }
+        if !self.metadata.is_empty() {
+            struct_ser.serialize_field("metadata", &self.metadata)?;
+        }
         struct_ser.end()
     }
 }
@@ -18363,12 +18466,16 @@ impl<'de> serde::Deserialize<'de> for PlaceholderNode {
             "id",
             "data_type",
             "dataType",
+            "nullable",
+            "metadata",
         ];
 
         #[allow(clippy::enum_variant_names)]
         enum GeneratedField {
             Id,
             DataType,
+            Nullable,
+            Metadata,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -18392,6 +18499,8 @@ impl<'de> serde::Deserialize<'de> for PlaceholderNode {
                         match value {
                             "id" => Ok(GeneratedField::Id),
                             "dataType" | "data_type" => Ok(GeneratedField::DataType),
+                            "nullable" => Ok(GeneratedField::Nullable),
+                            "metadata" => Ok(GeneratedField::Metadata),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -18413,6 +18522,8 @@ impl<'de> serde::Deserialize<'de> for PlaceholderNode {
             {
                 let mut id__ = None;
                 let mut data_type__ = None;
+                let mut nullable__ = None;
+                let mut metadata__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
                         GeneratedField::Id => {
@@ -18427,11 +18538,27 @@ impl<'de> serde::Deserialize<'de> for PlaceholderNode {
                             }
                             data_type__ = map_.next_value()?;
                         }
+                        GeneratedField::Nullable => {
+                            if nullable__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("nullable"));
+                            }
+                            nullable__ = map_.next_value()?;
+                        }
+                        GeneratedField::Metadata => {
+                            if metadata__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("metadata"));
+                            }
+                            metadata__ = Some(
+                                map_.next_value::<std::collections::HashMap<_, _>>()?
+                            );
+                        }
                     }
                 }
                 Ok(PlaceholderNode {
                     id: id__.unwrap_or_default(),
                     data_type: data_type__,
+                    nullable: nullable__,
+                    metadata: metadata__.unwrap_or_default(),
                 })
             }
         }
@@ -18798,6 +18925,9 @@ impl serde::Serialize for PrepareNode {
         if self.input.is_some() {
             len += 1;
         }
+        if !self.fields.is_empty() {
+            len += 1;
+        }
         let mut struct_ser = serializer.serialize_struct("datafusion.PrepareNode", len)?;
         if !self.name.is_empty() {
             struct_ser.serialize_field("name", &self.name)?;
@@ -18808,6 +18938,9 @@ impl serde::Serialize for PrepareNode {
         if let Some(v) = self.input.as_ref() {
             struct_ser.serialize_field("input", v)?;
         }
+        if !self.fields.is_empty() {
+            struct_ser.serialize_field("fields", &self.fields)?;
+        }
         struct_ser.end()
     }
 }
@@ -18822,6 +18955,7 @@ impl<'de> serde::Deserialize<'de> for PrepareNode {
             "data_types",
             "dataTypes",
             "input",
+            "fields",
         ];
 
         #[allow(clippy::enum_variant_names)]
@@ -18829,6 +18963,7 @@ impl<'de> serde::Deserialize<'de> for PrepareNode {
             Name,
             DataTypes,
             Input,
+            Fields,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -18853,6 +18988,7 @@ impl<'de> serde::Deserialize<'de> for PrepareNode {
                             "name" => Ok(GeneratedField::Name),
                             "dataTypes" | "data_types" => Ok(GeneratedField::DataTypes),
                             "input" => Ok(GeneratedField::Input),
+                            "fields" => Ok(GeneratedField::Fields),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -18875,6 +19011,7 @@ impl<'de> serde::Deserialize<'de> for PrepareNode {
                 let mut name__ = None;
                 let mut data_types__ = None;
                 let mut input__ = None;
+                let mut fields__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
                         GeneratedField::Name => {
@@ -18895,12 +19032,19 @@ impl<'de> serde::Deserialize<'de> for PrepareNode {
                             }
                             input__ = map_.next_value()?;
                         }
+                        GeneratedField::Fields => {
+                            if fields__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("fields"));
+                            }
+                            fields__ = Some(map_.next_value()?);
+                        }
                     }
                 }
                 Ok(PrepareNode {
                     name: name__.unwrap_or_default(),
                     data_types: data_types__.unwrap_or_default(),
                     input: input__,
+                    fields: fields__.unwrap_or_default(),
                 })
             }
         }
@@ -23514,6 +23658,15 @@ impl serde::Serialize for WindowExprNode {
         if self.fun_definition.is_some() {
             len += 1;
         }
+        if self.null_treatment.is_some() {
+            len += 1;
+        }
+        if self.distinct {
+            len += 1;
+        }
+        if self.filter.is_some() {
+            len += 1;
+        }
         if self.window_function.is_some() {
             len += 1;
         }
@@ -23535,6 +23688,17 @@ impl serde::Serialize for WindowExprNode {
             #[allow(clippy::needless_borrows_for_generic_args)]
             struct_ser.serialize_field("funDefinition", pbjson::private::base64::encode(&v).as_str())?;
         }
+        if let Some(v) = self.null_treatment.as_ref() {
+            let v = NullTreatment::try_from(*v)
+                .map_err(|_| serde::ser::Error::custom(format!("Invalid variant {}", *v)))?;
+            struct_ser.serialize_field("nullTreatment", &v)?;
+        }
+        if self.distinct {
+            struct_ser.serialize_field("distinct", &self.distinct)?;
+        }
+        if let Some(v) = self.filter.as_ref() {
+            struct_ser.serialize_field("filter", v)?;
+        }
         if let Some(v) = self.window_function.as_ref() {
             match v {
                 window_expr_node::WindowFunction::Udaf(v) => {
@@ -23564,6 +23728,10 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
             "windowFrame",
             "fun_definition",
             "funDefinition",
+            "null_treatment",
+            "nullTreatment",
+            "distinct",
+            "filter",
             "udaf",
             "udwf",
         ];
@@ -23575,6 +23743,9 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
             OrderBy,
             WindowFrame,
             FunDefinition,
+            NullTreatment,
+            Distinct,
+            Filter,
             Udaf,
             Udwf,
         }
@@ -23603,6 +23774,9 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
                             "orderBy" | "order_by" => Ok(GeneratedField::OrderBy),
                             "windowFrame" | "window_frame" => Ok(GeneratedField::WindowFrame),
                             "funDefinition" | "fun_definition" => Ok(GeneratedField::FunDefinition),
+                            "nullTreatment" | "null_treatment" => Ok(GeneratedField::NullTreatment),
+                            "distinct" => Ok(GeneratedField::Distinct),
+                            "filter" => Ok(GeneratedField::Filter),
                             "udaf" => Ok(GeneratedField::Udaf),
                             "udwf" => Ok(GeneratedField::Udwf),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
@@ -23629,6 +23803,9 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
                 let mut order_by__ = None;
                 let mut window_frame__ = None;
                 let mut fun_definition__ = None;
+                let mut null_treatment__ = None;
+                let mut distinct__ = None;
+                let mut filter__ = None;
                 let mut window_function__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
@@ -23664,6 +23841,24 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
                                 map_.next_value::<::std::option::Option<::pbjson::private::BytesDeserialize<_>>>()?.map(|x| x.0)
                             ;
                         }
+                        GeneratedField::NullTreatment => {
+                            if null_treatment__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("nullTreatment"));
+                            }
+                            null_treatment__ = map_.next_value::<::std::option::Option<NullTreatment>>()?.map(|x| x as i32);
+                        }
+                        GeneratedField::Distinct => {
+                            if distinct__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("distinct"));
+                            }
+                            distinct__ = Some(map_.next_value()?);
+                        }
+                        GeneratedField::Filter => {
+                            if filter__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("filter"));
+                            }
+                            filter__ = map_.next_value()?;
+                        }
                         GeneratedField::Udaf => {
                             if window_function__.is_some() {
                                 return Err(serde::de::Error::duplicate_field("udaf"));
@@ -23684,6 +23879,9 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
                     order_by: order_by__.unwrap_or_default(),
                     window_frame: window_frame__,
                     fun_definition: fun_definition__,
+                    null_treatment: null_treatment__,
+                    distinct: distinct__.unwrap_or_default(),
+                    filter: filter__,
                     window_function: window_function__,
                 })
             }
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index d3b5f566e98b..12b417627411 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -278,10 +278,14 @@ pub struct CreateExternalTableNode {
 pub struct PrepareNode {
     #[prost(string, tag = "1")]
     pub name: ::prost::alloc::string::String,
+    /// We serialize both the data types and the fields for compatibility with
+    /// older versions (newer versions populate both).
     #[prost(message, repeated, tag = "2")]
     pub data_types: ::prost::alloc::vec::Vec<super::datafusion_common::ArrowType>,
     #[prost(message, optional, boxed, tag = "3")]
     pub input: ::core::option::Option<::prost::alloc::boxed::Box<LogicalPlanNode>>,
+    #[prost(message, repeated, tag = "4")]
+    pub fields: ::prost::alloc::vec::Vec<super::datafusion_common::Field>,
 }
 #[derive(Clone, PartialEq, ::prost::Message)]
 pub struct CreateCatalogSchemaNode {
@@ -605,7 +609,7 @@ pub mod logical_expr_node {
         TryCast(::prost::alloc::boxed::Box<super::TryCastNode>),
         /// window expressions
         #[prost(message, tag = "18")]
-        WindowExpr(super::WindowExprNode),
+        WindowExpr(::prost::alloc::boxed::Box<super::WindowExprNode>),
         /// AggregateUDF expressions
         #[prost(message, tag = "19")]
         AggregateUdfExpr(::prost::alloc::boxed::Box<super::AggregateUdfExprNode>),
@@ -651,8 +655,17 @@ pub struct Wildcard {
 pub struct PlaceholderNode {
     #[prost(string, tag = "1")]
     pub id: ::prost::alloc::string::String,
+    /// We serialize the data type, metadata, and nullability separately to maintain
+    /// compatibility with older versions
     #[prost(message, optional, tag = "2")]
     pub data_type: ::core::option::Option<super::datafusion_common::ArrowType>,
+    #[prost(bool, optional, tag = "3")]
+    pub nullable: ::core::option::Option<bool>,
+    #[prost(map = "string, string", tag = "4")]
+    pub metadata: ::std::collections::HashMap<
+        ::prost::alloc::string::String,
+        ::prost::alloc::string::String,
+    >,
 }
 #[derive(Clone, PartialEq, ::prost::Message)]
 pub struct LogicalExprList {
@@ -795,6 +808,8 @@ pub struct AggregateUdfExprNode {
     pub order_by: ::prost::alloc::vec::Vec<SortExprNode>,
     #[prost(bytes = "vec", optional, tag = "6")]
     pub fun_definition: ::core::option::Option<::prost::alloc::vec::Vec<u8>>,
+    #[prost(enumeration = "NullTreatment", optional, tag = "7")]
+    pub null_treatment: ::core::option::Option<i32>,
 }
 #[derive(Clone, PartialEq, ::prost::Message)]
 pub struct ScalarUdfExprNode {
@@ -818,6 +833,12 @@ pub struct WindowExprNode {
     pub window_frame: ::core::option::Option<WindowFrame>,
     #[prost(bytes = "vec", optional, tag = "10")]
     pub fun_definition: ::core::option::Option<::prost::alloc::vec::Vec<u8>>,
+    #[prost(enumeration = "NullTreatment", optional, tag = "11")]
+    pub null_treatment: ::core::option::Option<i32>,
+    #[prost(bool, tag = "12")]
+    pub distinct: bool,
+    #[prost(message, optional, boxed, tag = "13")]
+    pub filter: ::core::option::Option<::prost::alloc::boxed::Box<LogicalExprNode>>,
     #[prost(oneof = "window_expr_node::WindowFunction", tags = "3, 9")]
     pub window_function: ::core::option::Option<window_expr_node::WindowFunction>,
 }
@@ -2129,6 +2150,32 @@ impl WindowFrameBoundType {
 }
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
 #[repr(i32)]
+pub enum NullTreatment {
+    RespectNulls = 0,
+    IgnoreNulls = 1,
+}
+impl NullTreatment {
+    /// String value of the enum field names used in the ProtoBuf definition.
+    ///
+    /// The values are not transformed in any way and thus are considered stable
+    /// (if the ProtoBuf definition does not change) and safe for programmatic use.
+    pub fn as_str_name(&self) -> &'static str {
+        match self {
+            Self::RespectNulls => "RESPECT_NULLS",
+            Self::IgnoreNulls => "IGNORE_NULLS",
+        }
+    }
+    /// Creates an enum from field names used in the ProtoBuf definition.
+    pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
+        match value {
+            "RESPECT_NULLS" => Some(Self::RespectNulls),
+            "IGNORE_NULLS" => Some(Self::IgnoreNulls),
+            _ => None,
+        }
+    }
+}
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
+#[repr(i32)]
 pub enum DateUnit {
     Day = 0,
     DateMillisecond = 1,
diff --git a/datafusion/proto/src/lib.rs b/datafusion/proto/src/lib.rs
index bb7b992f145f..b1590b9ad2aa 100644
--- a/datafusion/proto/src/lib.rs
+++ b/datafusion/proto/src/lib.rs
@@ -34,8 +34,8 @@
 //!
 //! [`LogicalPlan`]: datafusion_expr::LogicalPlan
 //! [`Expr`]: datafusion_expr::Expr
-//! [`ExecutionPlan`]: datafusion::physical_plan::ExecutionPlan
-//! [`PhysicalExpr`]: datafusion::physical_expr::PhysicalExpr
+//! [`ExecutionPlan`]: datafusion_physical_plan::ExecutionPlan
+//! [`PhysicalExpr`]: datafusion_physical_expr::PhysicalExpr
 //!
 //! Internally, this crate is implemented by converting the plans to [protocol
 //! buffers] using [prost].
@@ -93,7 +93,7 @@
 //!  let bytes = logical_plan_to_bytes(&plan)?;
 //!
 //!  // Decode bytes from somewhere (over network, etc.) back to LogicalPlan
-//!  let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+//!  let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
 //!  assert_eq!(format!("{:?}", plan), format!("{:?}", logical_round_trip));
 //! # Ok(())
 //! # }
diff --git a/datafusion/proto/src/logical_plan/file_formats.rs b/datafusion/proto/src/logical_plan/file_formats.rs
index 0e76e19ecb1a..d32bfb22ffdd 100644
--- a/datafusion/proto/src/logical_plan/file_formats.rs
+++ b/datafusion/proto/src/logical_plan/file_formats.rs
@@ -17,22 +17,19 @@
 
 use std::sync::Arc;
 
-use datafusion::{
-    config::{CsvOptions, JsonOptions},
-    datasource::file_format::{
-        arrow::ArrowFormatFactory, csv::CsvFormatFactory, json::JsonFormatFactory,
-        FileFormatFactory,
-    },
-    prelude::SessionContext,
-};
+use crate::protobuf::{CsvOptions as CsvOptionsProto, JsonOptions as JsonOptionsProto};
+use datafusion_common::config::{CsvOptions, JsonOptions};
 use datafusion_common::{
     exec_datafusion_err, exec_err, not_impl_err, parsers::CompressionTypeVariant,
     TableReference,
 };
+use datafusion_datasource::file_format::FileFormatFactory;
+use datafusion_datasource_arrow::file_format::ArrowFormatFactory;
+use datafusion_datasource_csv::file_format::CsvFormatFactory;
+use datafusion_datasource_json::file_format::JsonFormatFactory;
+use datafusion_execution::TaskContext;
 use prost::Message;
 
-use crate::protobuf::{CsvOptions as CsvOptionsProto, JsonOptions as JsonOptionsProto};
-
 use super::LogicalExtensionCodec;
 
 #[derive(Debug)]
@@ -165,7 +162,7 @@ impl LogicalExtensionCodec for CsvLogicalExtensionCodec {
         &self,
         _buf: &[u8],
         _inputs: &[datafusion_expr::LogicalPlan],
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> datafusion_common::Result<datafusion_expr::Extension> {
         not_impl_err!("Method not implemented")
     }
@@ -183,15 +180,15 @@ impl LogicalExtensionCodec for CsvLogicalExtensionCodec {
         _buf: &[u8],
         _table_ref: &TableReference,
         _schema: arrow::datatypes::SchemaRef,
-        _ctx: &SessionContext,
-    ) -> datafusion_common::Result<Arc<dyn datafusion::datasource::TableProvider>> {
+        _ctx: &TaskContext,
+    ) -> datafusion_common::Result<Arc<dyn datafusion_catalog::TableProvider>> {
         not_impl_err!("Method not implemented")
     }
 
     fn try_encode_table_provider(
         &self,
         _table_ref: &TableReference,
-        _node: Arc<dyn datafusion::datasource::TableProvider>,
+        _node: Arc<dyn datafusion_catalog::TableProvider>,
         _buf: &mut Vec<u8>,
     ) -> datafusion_common::Result<()> {
         not_impl_err!("Method not implemented")
@@ -200,7 +197,7 @@ impl LogicalExtensionCodec for CsvLogicalExtensionCodec {
     fn try_decode_file_format(
         &self,
         buf: &[u8],
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> datafusion_common::Result<Arc<dyn FileFormatFactory>> {
         let proto = CsvOptionsProto::decode(buf).map_err(|e| {
             exec_datafusion_err!("Failed to decode CsvOptionsProto: {e:?}")
@@ -272,7 +269,7 @@ impl LogicalExtensionCodec for JsonLogicalExtensionCodec {
         &self,
         _buf: &[u8],
         _inputs: &[datafusion_expr::LogicalPlan],
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> datafusion_common::Result<datafusion_expr::Extension> {
         not_impl_err!("Method not implemented")
     }
@@ -290,15 +287,15 @@ impl LogicalExtensionCodec for JsonLogicalExtensionCodec {
         _buf: &[u8],
         _table_ref: &TableReference,
         _schema: arrow::datatypes::SchemaRef,
-        _ctx: &SessionContext,
-    ) -> datafusion_common::Result<Arc<dyn datafusion::datasource::TableProvider>> {
+        _ctx: &TaskContext,
+    ) -> datafusion_common::Result<Arc<dyn datafusion_catalog::TableProvider>> {
         not_impl_err!("Method not implemented")
     }
 
     fn try_encode_table_provider(
         &self,
         _table_ref: &TableReference,
-        _node: Arc<dyn datafusion::datasource::TableProvider>,
+        _node: Arc<dyn datafusion_catalog::TableProvider>,
         _buf: &mut Vec<u8>,
     ) -> datafusion_common::Result<()> {
         not_impl_err!("Method not implemented")
@@ -307,7 +304,7 @@ impl LogicalExtensionCodec for JsonLogicalExtensionCodec {
     fn try_decode_file_format(
         &self,
         buf: &[u8],
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> datafusion_common::Result<Arc<dyn FileFormatFactory>> {
         let proto = JsonOptionsProto::decode(buf).map_err(|e| {
             exec_datafusion_err!("Failed to decode JsonOptionsProto: {e:?}")
@@ -353,11 +350,10 @@ mod parquet {
         ParquetOptions as ParquetOptionsProto,
         TableParquetOptions as TableParquetOptionsProto,
     };
-
-    use datafusion::{
-        config::{ParquetColumnOptions, ParquetOptions, TableParquetOptions},
-        datasource::file_format::parquet::ParquetFormatFactory,
+    use datafusion_common::config::{
+        ParquetColumnOptions, ParquetOptions, TableParquetOptions,
     };
+    use datafusion_datasource_parquet::file_format::ParquetFormatFactory;
 
     impl TableParquetOptionsProto {
         fn from_factory(factory: &ParquetFormatFactory) -> Self {
@@ -595,7 +591,7 @@ mod parquet {
             &self,
             _buf: &[u8],
             _inputs: &[datafusion_expr::LogicalPlan],
-            _ctx: &SessionContext,
+            _ctx: &TaskContext,
         ) -> datafusion_common::Result<datafusion_expr::Extension> {
             not_impl_err!("Method not implemented")
         }
@@ -613,8 +609,8 @@ mod parquet {
             _buf: &[u8],
             _table_ref: &TableReference,
             _schema: arrow::datatypes::SchemaRef,
-            _ctx: &SessionContext,
-        ) -> datafusion_common::Result<Arc<dyn datafusion::datasource::TableProvider>>
+            _ctx: &TaskContext,
+        ) -> datafusion_common::Result<Arc<dyn datafusion_catalog::TableProvider>>
         {
             not_impl_err!("Method not implemented")
         }
@@ -622,7 +618,7 @@ mod parquet {
         fn try_encode_table_provider(
             &self,
             _table_ref: &TableReference,
-            _node: Arc<dyn datafusion::datasource::TableProvider>,
+            _node: Arc<dyn datafusion_catalog::TableProvider>,
             _buf: &mut Vec<u8>,
         ) -> datafusion_common::Result<()> {
             not_impl_err!("Method not implemented")
@@ -631,14 +627,14 @@ mod parquet {
         fn try_decode_file_format(
             &self,
             buf: &[u8],
-            _ctx: &SessionContext,
+            _ctx: &TaskContext,
         ) -> datafusion_common::Result<Arc<dyn FileFormatFactory>> {
             let proto = TableParquetOptionsProto::decode(buf).map_err(|e| {
                 exec_datafusion_err!("Failed to decode TableParquetOptionsProto: {e:?}")
             })?;
             let options: TableParquetOptions = (&proto).into();
             Ok(Arc::new(
-                datafusion::datasource::file_format::parquet::ParquetFormatFactory {
+                datafusion_datasource_parquet::file_format::ParquetFormatFactory {
                     options: Some(options),
                 },
             ))
@@ -649,7 +645,7 @@ mod parquet {
             buf: &mut Vec<u8>,
             node: Arc<dyn FileFormatFactory>,
         ) -> datafusion_common::Result<()> {
-            use datafusion::datasource::file_format::parquet::ParquetFormatFactory;
+            use datafusion_datasource_parquet::file_format::ParquetFormatFactory;
 
             let options = if let Some(parquet_factory) =
                 node.as_any().downcast_ref::<ParquetFormatFactory>()
@@ -683,7 +679,7 @@ impl LogicalExtensionCodec for ArrowLogicalExtensionCodec {
         &self,
         _buf: &[u8],
         _inputs: &[datafusion_expr::LogicalPlan],
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> datafusion_common::Result<datafusion_expr::Extension> {
         not_impl_err!("Method not implemented")
     }
@@ -701,15 +697,15 @@ impl LogicalExtensionCodec for ArrowLogicalExtensionCodec {
         _buf: &[u8],
         _table_ref: &TableReference,
         _schema: arrow::datatypes::SchemaRef,
-        _ctx: &SessionContext,
-    ) -> datafusion_common::Result<Arc<dyn datafusion::datasource::TableProvider>> {
+        _ctx: &TaskContext,
+    ) -> datafusion_common::Result<Arc<dyn datafusion_catalog::TableProvider>> {
         not_impl_err!("Method not implemented")
     }
 
     fn try_encode_table_provider(
         &self,
         _table_ref: &TableReference,
-        _node: Arc<dyn datafusion::datasource::TableProvider>,
+        _node: Arc<dyn datafusion_catalog::TableProvider>,
         _buf: &mut Vec<u8>,
     ) -> datafusion_common::Result<()> {
         not_impl_err!("Method not implemented")
@@ -718,7 +714,7 @@ impl LogicalExtensionCodec for ArrowLogicalExtensionCodec {
     fn try_decode_file_format(
         &self,
         __buf: &[u8],
-        __ctx: &SessionContext,
+        __ctx: &TaskContext,
     ) -> datafusion_common::Result<Arc<dyn FileFormatFactory>> {
         Ok(Arc::new(ArrowFormatFactory::new()))
     }
@@ -741,7 +737,7 @@ impl LogicalExtensionCodec for AvroLogicalExtensionCodec {
         &self,
         _buf: &[u8],
         _inputs: &[datafusion_expr::LogicalPlan],
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> datafusion_common::Result<datafusion_expr::Extension> {
         not_impl_err!("Method not implemented")
     }
@@ -759,15 +755,15 @@ impl LogicalExtensionCodec for AvroLogicalExtensionCodec {
         _buf: &[u8],
         _table_ref: &TableReference,
         _schema: arrow::datatypes::SchemaRef,
-        _cts: &SessionContext,
-    ) -> datafusion_common::Result<Arc<dyn datafusion::datasource::TableProvider>> {
+        _cts: &TaskContext,
+    ) -> datafusion_common::Result<Arc<dyn datafusion_catalog::TableProvider>> {
         not_impl_err!("Method not implemented")
     }
 
     fn try_encode_table_provider(
         &self,
         _table_ref: &TableReference,
-        _node: Arc<dyn datafusion::datasource::TableProvider>,
+        _node: Arc<dyn datafusion_catalog::TableProvider>,
         _buf: &mut Vec<u8>,
     ) -> datafusion_common::Result<()> {
         not_impl_err!("Method not implemented")
@@ -776,7 +772,7 @@ impl LogicalExtensionCodec for AvroLogicalExtensionCodec {
     fn try_decode_file_format(
         &self,
         __buf: &[u8],
-        __ctx: &SessionContext,
+        __ctx: &TaskContext,
     ) -> datafusion_common::Result<Arc<dyn FileFormatFactory>> {
         Ok(Arc::new(ArrowFormatFactory::new()))
     }
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index cbfa15183b5c..598a77f5420e 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -17,13 +17,14 @@
 
 use std::sync::Arc;
 
-use datafusion::execution::registry::FunctionRegistry;
+use arrow::datatypes::Field;
 use datafusion_common::{
     exec_datafusion_err, internal_err, plan_datafusion_err, NullEquality,
     RecursionUnnestOption, Result, ScalarValue, TableReference, UnnestOptions,
 };
+use datafusion_execution::registry::FunctionRegistry;
 use datafusion_expr::dml::InsertOp;
-use datafusion_expr::expr::{Alias, Placeholder, Sort};
+use datafusion_expr::expr::{Alias, NullTreatment, Placeholder, Sort};
 use datafusion_expr::expr::{Unnest, WildcardOptions};
 use datafusion_expr::{
     expr::{self, InList, WindowFunction},
@@ -243,6 +244,15 @@ impl From<protobuf::dml_node::Type> for WriteOp {
     }
 }
 
+impl From<protobuf::NullTreatment> for NullTreatment {
+    fn from(t: protobuf::NullTreatment) -> Self {
+        match t {
+            protobuf::NullTreatment::RespectNulls => NullTreatment::RespectNulls,
+            protobuf::NullTreatment::IgnoreNulls => NullTreatment::IgnoreNulls,
+        }
+    }
+}
+
 pub fn parse_expr(
     proto: &protobuf::LogicalExprNode,
     registry: &dyn FunctionRegistry,
@@ -301,9 +311,20 @@ pub fn parse_expr(
                     exec_datafusion_err!("missing window frame during deserialization")
                 })?;
 
-            // TODO: support null treatment, distinct, and filter in proto.
-            // See https://github.com/apache/datafusion/issues/17417
-            match window_function {
+            let null_treatment = match expr.null_treatment {
+                Some(null_treatment) => {
+                    let null_treatment  =  protobuf::NullTreatment::try_from(null_treatment)
+                    .map_err(|_| {
+                        proto_error(format!(
+                            "Received a WindowExprNode message with unknown NullTreatment {null_treatment}",
+                        ))
+                    })?;
+                    Some(NullTreatment::from(null_treatment))
+                }
+                None => None,
+            };
+
+            let agg_fn = match window_function {
                 window_expr_node::WindowFunction::Udaf(udaf_name) => {
                     let udaf_function = match &expr.fun_definition {
                         Some(buf) => codec.try_decode_udaf(udaf_name, buf)?,
@@ -311,17 +332,7 @@ pub fn parse_expr(
                             .udaf(udaf_name)
                             .or_else(|_| codec.try_decode_udaf(udaf_name, &[]))?,
                     };
-
-                    let args = parse_exprs(&expr.exprs, registry, codec)?;
-                    Expr::from(WindowFunction::new(
-                        expr::WindowFunctionDefinition::AggregateUDF(udaf_function),
-                        args,
-                    ))
-                    .partition_by(partition_by)
-                    .order_by(order_by)
-                    .window_frame(window_frame)
-                    .build()
-                    .map_err(Error::DataFusionError)
+                    expr::WindowFunctionDefinition::AggregateUDF(udaf_function)
                 }
                 window_expr_node::WindowFunction::Udwf(udwf_name) => {
                     let udwf_function = match &expr.fun_definition {
@@ -330,19 +341,28 @@ pub fn parse_expr(
                             .udwf(udwf_name)
                             .or_else(|_| codec.try_decode_udwf(udwf_name, &[]))?,
                     };
-
-                    let args = parse_exprs(&expr.exprs, registry, codec)?;
-                    Expr::from(WindowFunction::new(
-                        expr::WindowFunctionDefinition::WindowUDF(udwf_function),
-                        args,
-                    ))
-                    .partition_by(partition_by)
-                    .order_by(order_by)
-                    .window_frame(window_frame)
-                    .build()
-                    .map_err(Error::DataFusionError)
+                    expr::WindowFunctionDefinition::WindowUDF(udwf_function)
                 }
+            };
+
+            let args = parse_exprs(&expr.exprs, registry, codec)?;
+            let mut builder = Expr::from(WindowFunction::new(agg_fn, args))
+                .partition_by(partition_by)
+                .order_by(order_by)
+                .window_frame(window_frame)
+                .null_treatment(null_treatment);
+
+            if expr.distinct {
+                builder = builder.distinct();
+            };
+
+            if let Some(filter) =
+                parse_optional_expr(expr.filter.as_deref(), registry, codec)?
+            {
+                builder = builder.filter(filter);
             }
+
+            builder.build().map_err(Error::DataFusionError)
         }
         ExprType::Alias(alias) => Ok(Expr::Alias(Alias::new(
             parse_required_expr(alias.expr.as_deref(), registry, "expr", codec)?,
@@ -571,6 +591,18 @@ pub fn parse_expr(
                     .udaf(&pb.fun_name)
                     .or_else(|_| codec.try_decode_udaf(&pb.fun_name, &[]))?,
             };
+            let null_treatment = match pb.null_treatment {
+                Some(null_treatment) => {
+                    let null_treatment  =  protobuf::NullTreatment::try_from(null_treatment)
+                    .map_err(|_| {
+                        proto_error(format!(
+                            "Received an AggregateUdfExprNode message with unknown NullTreatment {null_treatment}",
+                        ))
+                    })?;
+                    Some(NullTreatment::from(null_treatment))
+                }
+                None => None,
+            };
 
             Ok(Expr::AggregateFunction(expr::AggregateFunction::new_udf(
                 agg_fn,
@@ -578,7 +610,7 @@ pub fn parse_expr(
                 pb.distinct,
                 parse_optional_expr(pb.filter.as_deref(), registry, codec)?.map(Box::new),
                 parse_sorts(&pb.order_by, registry, codec)?,
-                None,
+                null_treatment,
             )))
         }
 
@@ -595,12 +627,25 @@ pub fn parse_expr(
         ExprType::Rollup(RollupNode { expr }) => Ok(Expr::GroupingSet(
             GroupingSet::Rollup(parse_exprs(expr, registry, codec)?),
         )),
-        ExprType::Placeholder(PlaceholderNode { id, data_type }) => match data_type {
-            None => Ok(Expr::Placeholder(Placeholder::new(id.clone(), None))),
-            Some(data_type) => Ok(Expr::Placeholder(Placeholder::new(
+        ExprType::Placeholder(PlaceholderNode {
+            id,
+            data_type,
+            nullable,
+            metadata,
+        }) => match data_type {
+            None => Ok(Expr::Placeholder(Placeholder::new_with_field(
                 id.clone(),
-                Some(data_type.try_into()?),
+                None,
             ))),
+            Some(data_type) => {
+                let field =
+                    Field::new("", data_type.try_into()?, nullable.unwrap_or(true))
+                        .with_metadata(metadata.clone());
+                Ok(Expr::Placeholder(Placeholder::new_with_field(
+                    id.clone(),
+                    Some(field.into()),
+                )))
+            }
         },
     }
 }
diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs
index fd9e07914b07..9644c9f69fea 100644
--- a/datafusion/proto/src/logical_plan/mod.rs
+++ b/datafusion/proto/src/logical_plan/mod.rs
@@ -33,33 +33,24 @@ use crate::{
 };
 
 use crate::protobuf::{proto_error, ToProtoError};
-use arrow::datatypes::{DataType, Schema, SchemaBuilder, SchemaRef};
-use datafusion::datasource::cte_worktable::CteWorkTable;
-use datafusion::datasource::file_format::arrow::ArrowFormat;
-#[cfg(feature = "avro")]
-use datafusion::datasource::file_format::avro::AvroFormat;
-#[cfg(feature = "parquet")]
-use datafusion::datasource::file_format::parquet::ParquetFormat;
-use datafusion::datasource::file_format::{
-    file_type_to_format, format_as_file_type, FileFormatFactory,
-};
-use datafusion::{
-    datasource::{
-        file_format::{
-            csv::CsvFormat, json::JsonFormat as OtherNdJsonFormat, FileFormat,
-        },
-        listing::{ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl},
-        view::ViewTable,
-        TableProvider,
-    },
-    datasource::{provider_as_source, source_as_provider},
-    prelude::SessionContext,
-};
+use arrow::datatypes::{DataType, Field, Schema, SchemaBuilder, SchemaRef};
+use datafusion_catalog::cte_worktable::CteWorkTable;
 use datafusion_common::file_options::file_type::FileType;
 use datafusion_common::{
     context, internal_datafusion_err, internal_err, not_impl_err, plan_err, Result,
     TableReference, ToDFSchema,
 };
+use datafusion_datasource::file_format::FileFormat;
+use datafusion_datasource::file_format::{
+    file_type_to_format, format_as_file_type, FileFormatFactory,
+};
+use datafusion_datasource_arrow::file_format::ArrowFormat;
+#[cfg(feature = "avro")]
+use datafusion_datasource_avro::file_format::AvroFormat;
+use datafusion_datasource_csv::file_format::CsvFormat;
+use datafusion_datasource_json::file_format::JsonFormat as OtherNdJsonFormat;
+#[cfg(feature = "parquet")]
+use datafusion_datasource_parquet::file_format::ParquetFormat;
 use datafusion_expr::{
     dml,
     logical_plan::{
@@ -77,6 +68,12 @@ use datafusion_expr::{
 
 use self::to_proto::{serialize_expr, serialize_exprs};
 use crate::logical_plan::to_proto::serialize_sorts;
+use datafusion_catalog::default_table_source::{provider_as_source, source_as_provider};
+use datafusion_catalog::view::ViewTable;
+use datafusion_catalog::TableProvider;
+use datafusion_catalog_listing::{ListingOptions, ListingTable, ListingTableConfig};
+use datafusion_datasource::ListingTableUrl;
+use datafusion_execution::TaskContext;
 use prost::bytes::BufMut;
 use prost::Message;
 
@@ -96,7 +93,7 @@ pub trait AsLogicalPlan: Debug + Send + Sync + Clone {
 
     fn try_into_logical_plan(
         &self,
-        ctx: &SessionContext,
+        ctx: &TaskContext,
         extension_codec: &dyn LogicalExtensionCodec,
     ) -> Result<LogicalPlan>;
 
@@ -113,7 +110,7 @@ pub trait LogicalExtensionCodec: Debug + Send + Sync {
         &self,
         buf: &[u8],
         inputs: &[LogicalPlan],
-        ctx: &SessionContext,
+        ctx: &TaskContext,
     ) -> Result<Extension>;
 
     fn try_encode(&self, node: &Extension, buf: &mut Vec<u8>) -> Result<()>;
@@ -123,7 +120,7 @@ pub trait LogicalExtensionCodec: Debug + Send + Sync {
         buf: &[u8],
         table_ref: &TableReference,
         schema: SchemaRef,
-        ctx: &SessionContext,
+        ctx: &TaskContext,
     ) -> Result<Arc<dyn TableProvider>>;
 
     fn try_encode_table_provider(
@@ -136,7 +133,7 @@ pub trait LogicalExtensionCodec: Debug + Send + Sync {
     fn try_decode_file_format(
         &self,
         _buf: &[u8],
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> Result<Arc<dyn FileFormatFactory>> {
         not_impl_err!("LogicalExtensionCodec is not provided for file format")
     }
@@ -184,7 +181,7 @@ impl LogicalExtensionCodec for DefaultLogicalExtensionCodec {
         &self,
         _buf: &[u8],
         _inputs: &[LogicalPlan],
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> Result<Extension> {
         not_impl_err!("LogicalExtensionCodec is not provided")
     }
@@ -198,7 +195,7 @@ impl LogicalExtensionCodec for DefaultLogicalExtensionCodec {
         _buf: &[u8],
         _table_ref: &TableReference,
         _schema: SchemaRef,
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> Result<Arc<dyn TableProvider>> {
         not_impl_err!("LogicalExtensionCodec is not provided")
     }
@@ -242,7 +239,7 @@ fn from_table_reference(
 /// serialized by [from_table_source]
 fn to_table_source(
     node: &Option<Box<LogicalPlanNode>>,
-    ctx: &SessionContext,
+    ctx: &TaskContext,
     extension_codec: &dyn LogicalExtensionCodec,
 ) -> Result<Arc<dyn TableSource>> {
     if let Some(node) = node {
@@ -296,7 +293,7 @@ impl AsLogicalPlan for LogicalPlanNode {
 
     fn try_into_logical_plan(
         &self,
-        ctx: &SessionContext,
+        ctx: &TaskContext,
         extension_codec: &dyn LogicalExtensionCodec,
     ) -> Result<LogicalPlan> {
         let plan = self.logical_plan_type.as_ref().ok_or_else(|| {
@@ -475,10 +472,7 @@ impl AsLogicalPlan for LogicalPlanNode {
                         .with_schema(Arc::new(schema));
 
                 let provider = ListingTable::try_new(config)?.with_cache(
-                    ctx.state()
-                        .runtime_env()
-                        .cache_manager
-                        .get_file_statistic_cache(),
+                    ctx.runtime_env().cache_manager.get_file_statistic_cache(),
                 );
 
                 let table_name =
@@ -547,7 +541,7 @@ impl AsLogicalPlan for LogicalPlanNode {
                     .build()
             }
             LogicalPlanType::Repartition(repartition) => {
-                use datafusion::logical_expr::Partitioning;
+                use datafusion_expr::Partitioning;
                 let input: LogicalPlan =
                     into_logical_plan!(repartition.input, ctx, extension_codec)?;
                 use protobuf::repartition_node::PartitionMethod;
@@ -595,11 +589,6 @@ impl AsLogicalPlan for LogicalPlanNode {
                     None
                 };
 
-                let file_type = create_extern_table.file_type.as_str();
-                if ctx.table_factory(file_type).is_none() {
-                    internal_err!("No TableProviderFactory for file type: {file_type}")?
-                }
-
                 let mut order_exprs = vec![];
                 for expr in &create_extern_table.order_exprs {
                     order_exprs.push(from_proto::parse_sorts(
@@ -888,9 +877,33 @@ impl AsLogicalPlan for LogicalPlanNode {
                     .iter()
                     .map(DataType::try_from)
                     .collect::<Result<_, _>>()?;
-                LogicalPlanBuilder::from(input)
-                    .prepare(prepare.name.clone(), data_types)?
-                    .build()
+                let fields: Vec<Field> = prepare
+                    .fields
+                    .iter()
+                    .map(Field::try_from)
+                    .collect::<Result<_, _>>()?;
+
+                // If the fields are empty this may have been generated by an
+                // earlier version of DataFusion, in which case the DataTypes
+                // can be used to construct the plan.
+                if fields.is_empty() {
+                    LogicalPlanBuilder::from(input)
+                        .prepare(
+                            prepare.name.clone(),
+                            data_types
+                                .into_iter()
+                                .map(|dt| Field::new("", dt, true).into())
+                                .collect(),
+                        )?
+                        .build()
+                } else {
+                    LogicalPlanBuilder::from(input)
+                        .prepare(
+                            prepare.name.clone(),
+                            fields.into_iter().map(|f| f.into()).collect(),
+                        )?
+                        .build()
+                }
             }
             LogicalPlanType::DropView(dropview) => {
                 Ok(LogicalPlan::Ddl(DdlStatement::DropView(DropView {
@@ -961,14 +974,14 @@ impl AsLogicalPlan for LogicalPlanNode {
                 )?
                 .build()
             }
-            LogicalPlanType::Dml(dml_node) => Ok(LogicalPlan::Dml(
-                datafusion::logical_expr::DmlStatement::new(
+            LogicalPlanType::Dml(dml_node) => {
+                Ok(LogicalPlan::Dml(datafusion_expr::DmlStatement::new(
                     from_table_reference(dml_node.table_name.as_ref(), "DML ")?,
                     to_table_source(&dml_node.target, ctx, extension_codec)?,
                     dml_node.dml_type().into(),
                     Arc::new(into_logical_plan!(dml_node.input, ctx, extension_codec)?),
-                ),
-            )),
+                )))
+            }
         }
     }
 
@@ -1417,7 +1430,7 @@ impl AsLogicalPlan for LogicalPlanNode {
                 input,
                 partitioning_scheme,
             }) => {
-                use datafusion::logical_expr::Partitioning;
+                use datafusion_expr::Partitioning;
                 let input: LogicalPlanNode = LogicalPlanNode::try_from_logical_plan(
                     input.as_ref(),
                     extension_codec,
@@ -1621,7 +1634,7 @@ impl AsLogicalPlan for LogicalPlanNode {
             }
             LogicalPlan::Statement(Statement::Prepare(Prepare {
                 name,
-                data_types,
+                fields,
                 input,
             })) => {
                 let input =
@@ -1630,11 +1643,17 @@ impl AsLogicalPlan for LogicalPlanNode {
                     logical_plan_type: Some(LogicalPlanType::Prepare(Box::new(
                         protobuf::PrepareNode {
                             name: name.clone(),
-                            data_types: data_types
+                            input: Some(Box::new(input)),
+                            // Store the DataTypes for reading by older DataFusion
+                            data_types: fields
                                 .iter()
-                                .map(|t| t.try_into())
+                                .map(|f| f.data_type().try_into())
+                                .collect::<Result<Vec<_>, _>>()?,
+                            // Store the Fields for current and future DataFusion
+                            fields: fields
+                                .iter()
+                                .map(|f| f.as_ref().try_into())
                                 .collect::<Result<Vec<_>, _>>()?,
-                            input: Some(Box::new(input)),
                         },
                     ))),
                 })
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs
index 1be3300008c7..2774b5b6ba7c 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -25,7 +25,7 @@ use datafusion_common::{NullEquality, TableReference, UnnestOptions};
 use datafusion_expr::dml::InsertOp;
 use datafusion_expr::expr::{
     self, AggregateFunctionParams, Alias, Between, BinaryExpr, Cast, GroupingSet, InList,
-    Like, Placeholder, ScalarFunction, Unnest,
+    Like, NullTreatment, Placeholder, ScalarFunction, Unnest,
 };
 use datafusion_expr::WriteOp;
 use datafusion_expr::{
@@ -314,11 +314,9 @@ pub fn serialize_expr(
                         ref partition_by,
                         ref order_by,
                         ref window_frame,
-                        // TODO: support null treatment, distinct, and filter in proto.
-                        // See https://github.com/apache/datafusion/issues/17417
-                        null_treatment: _,
-                        distinct: _,
-                        filter: _,
+                        ref null_treatment,
+                        ref distinct,
+                        ref filter,
                     },
             } = window_fun.as_ref();
             let mut buf = Vec::new();
@@ -342,16 +340,24 @@ pub fn serialize_expr(
 
             let window_frame: Option<protobuf::WindowFrame> =
                 Some(window_frame.try_into()?);
+
             let window_expr = protobuf::WindowExprNode {
                 exprs: serialize_exprs(args, codec)?,
                 window_function: Some(window_function),
                 partition_by,
                 order_by,
                 window_frame,
+                distinct: *distinct,
+                filter: match filter {
+                    Some(e) => Some(Box::new(serialize_expr(e.as_ref(), codec)?)),
+                    None => None,
+                },
+                null_treatment: null_treatment
+                    .map(|nt| protobuf::NullTreatment::from(nt).into()),
                 fun_definition,
             };
             protobuf::LogicalExprNode {
-                expr_type: Some(ExprType::WindowExpr(window_expr)),
+                expr_type: Some(ExprType::WindowExpr(Box::new(window_expr))),
             }
         }
         Expr::AggregateFunction(expr::AggregateFunction {
@@ -362,7 +368,7 @@ pub fn serialize_expr(
                     ref distinct,
                     ref filter,
                     ref order_by,
-                    null_treatment: _,
+                    ref null_treatment,
                 },
         }) => {
             let mut buf = Vec::new();
@@ -379,6 +385,8 @@ pub fn serialize_expr(
                         },
                         order_by: serialize_sorts(order_by, codec)?,
                         fun_definition: (!buf.is_empty()).then_some(buf),
+                        null_treatment: null_treatment
+                            .map(|nt| protobuf::NullTreatment::from(nt).into()),
                     },
                 ))),
             }
@@ -600,18 +608,20 @@ pub fn serialize_expr(
                 })),
             }
         }
-        Expr::Placeholder(Placeholder { id, data_type }) => {
-            let data_type = match data_type {
-                Some(data_type) => Some(data_type.try_into()?),
-                None => None,
-            };
-            protobuf::LogicalExprNode {
-                expr_type: Some(ExprType::Placeholder(PlaceholderNode {
-                    id: id.clone(),
-                    data_type,
-                })),
-            }
-        }
+        Expr::Placeholder(Placeholder { id, field }) => protobuf::LogicalExprNode {
+            expr_type: Some(ExprType::Placeholder(PlaceholderNode {
+                id: id.clone(),
+                data_type: match field {
+                    Some(field) => Some(field.data_type().try_into()?),
+                    None => None,
+                },
+                nullable: field.as_ref().map(|f| f.is_nullable()),
+                metadata: field
+                    .as_ref()
+                    .map(|f| f.metadata().clone())
+                    .unwrap_or(HashMap::new()),
+            })),
+        },
     };
 
     Ok(expr_node)
@@ -722,3 +732,12 @@ impl From<&WriteOp> for protobuf::dml_node::Type {
         }
     }
 }
+
+impl From<NullTreatment> for protobuf::NullTreatment {
+    fn from(t: NullTreatment) -> Self {
+        match t {
+            NullTreatment::RespectNulls => protobuf::NullTreatment::RespectNulls,
+            NullTreatment::IgnoreNulls => protobuf::NullTreatment::IgnoreNulls,
+        }
+    }
+}
diff --git a/datafusion/proto/src/physical_plan/from_proto.rs b/datafusion/proto/src/physical_plan/from_proto.rs
index e2ee1be7d732..7c4b9e55b813 100644
--- a/datafusion/proto/src/physical_plan/from_proto.rs
+++ b/datafusion/proto/src/physical_plan/from_proto.rs
@@ -28,26 +28,27 @@ use datafusion_expr::dml::InsertOp;
 use object_store::path::Path;
 use object_store::ObjectMeta;
 
-use datafusion::arrow::datatypes::Schema;
-use datafusion::datasource::file_format::csv::CsvSink;
-use datafusion::datasource::file_format::json::JsonSink;
+use arrow::datatypes::Schema;
+use datafusion_common::{internal_datafusion_err, not_impl_err, DataFusionError, Result};
+use datafusion_datasource::file::FileSource;
+use datafusion_datasource::file_groups::FileGroup;
+use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder};
+use datafusion_datasource::file_sink_config::FileSinkConfig;
+use datafusion_datasource::{FileRange, ListingTableUrl, PartitionedFile};
+use datafusion_datasource_csv::file_format::CsvSink;
+use datafusion_datasource_json::file_format::JsonSink;
 #[cfg(feature = "parquet")]
-use datafusion::datasource::file_format::parquet::ParquetSink;
-use datafusion::datasource::listing::{FileRange, ListingTableUrl, PartitionedFile};
-use datafusion::datasource::object_store::ObjectStoreUrl;
-use datafusion::datasource::physical_plan::{
-    FileGroup, FileScanConfig, FileScanConfigBuilder, FileSinkConfig, FileSource,
-};
-use datafusion::execution::{FunctionRegistry, TaskContext};
-use datafusion::logical_expr::WindowFunctionDefinition;
-use datafusion::physical_expr::{LexOrdering, PhysicalSortExpr, ScalarFunctionExpr};
-use datafusion::physical_plan::expressions::{
+use datafusion_datasource_parquet::file_format::ParquetSink;
+use datafusion_execution::object_store::ObjectStoreUrl;
+use datafusion_execution::{FunctionRegistry, TaskContext};
+use datafusion_expr::WindowFunctionDefinition;
+use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr, ScalarFunctionExpr};
+use datafusion_physical_plan::expressions::{
     in_list, BinaryExpr, CaseExpr, CastExpr, Column, IsNotNullExpr, IsNullExpr, LikeExpr,
     Literal, NegativeExpr, NotExpr, TryCastExpr, UnKnownColumn,
 };
-use datafusion::physical_plan::windows::{create_window_expr, schema_add_window_field};
-use datafusion::physical_plan::{Partitioning, PhysicalExpr, WindowExpr};
-use datafusion_common::{internal_datafusion_err, not_impl_err, DataFusionError, Result};
+use datafusion_physical_plan::windows::{create_window_expr, schema_add_window_field};
+use datafusion_physical_plan::{Partitioning, PhysicalExpr, WindowExpr};
 use datafusion_proto_common::common::proto_error;
 
 use crate::convert_required;
diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs
index d76bcc89b3db..e5f4a1f7d026 100644
--- a/datafusion/proto/src/physical_plan/mod.rs
+++ b/datafusion/proto/src/physical_plan/mod.rs
@@ -23,8 +23,7 @@ use self::to_proto::{serialize_partitioning, serialize_physical_expr};
 use crate::common::{byte_to_string, str_to_byte};
 use crate::physical_plan::from_proto::{
     parse_physical_expr, parse_physical_sort_expr, parse_physical_sort_exprs,
-    parse_physical_window_expr, parse_protobuf_file_scan_config,
-    parse_protobuf_file_scan_schema, parse_record_batches,
+    parse_physical_window_expr, parse_protobuf_file_scan_config, parse_record_batches,
 };
 use crate::physical_plan::to_proto::{
     serialize_file_scan_config, serialize_maybe_filter, serialize_physical_aggr_expr,
@@ -40,64 +39,64 @@ use crate::protobuf::{
 };
 use crate::{convert_required, into_required};
 
-use datafusion::arrow::compute::SortOptions;
-use datafusion::arrow::datatypes::{IntervalMonthDayNanoType, Schema, SchemaRef};
-use datafusion::catalog::memory::MemorySourceConfig;
-use datafusion::datasource::file_format::csv::CsvSink;
-use datafusion::datasource::file_format::file_compression_type::FileCompressionType;
-use datafusion::datasource::file_format::json::JsonSink;
+use arrow::compute::SortOptions;
+use arrow::datatypes::{IntervalMonthDayNanoType, SchemaRef};
+use datafusion_catalog::memory::MemorySourceConfig;
+use datafusion_common::{
+    internal_datafusion_err, internal_err, not_impl_err, DataFusionError, Result,
+};
 #[cfg(feature = "parquet")]
-use datafusion::datasource::file_format::parquet::ParquetSink;
+use datafusion_datasource::file::FileSource;
+use datafusion_datasource::file_compression_type::FileCompressionType;
+use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder};
+use datafusion_datasource::sink::DataSinkExec;
+use datafusion_datasource::source::{DataSource, DataSourceExec};
 #[cfg(feature = "avro")]
-use datafusion::datasource::physical_plan::AvroSource;
+use datafusion_datasource_avro::source::AvroSource;
+use datafusion_datasource_csv::file_format::CsvSink;
+use datafusion_datasource_csv::source::CsvSource;
+use datafusion_datasource_json::file_format::JsonSink;
+use datafusion_datasource_json::source::JsonSource;
 #[cfg(feature = "parquet")]
-use datafusion::datasource::physical_plan::ParquetSource;
-use datafusion::datasource::physical_plan::{
-    CsvSource, FileScanConfig, FileScanConfigBuilder, FileSource, JsonSource,
-};
-use datafusion::datasource::sink::DataSinkExec;
-use datafusion::datasource::source::{DataSource, DataSourceExec};
-use datafusion::execution::{FunctionRegistry, TaskContext};
-use datafusion::functions_table::generate_series::{
+use datafusion_datasource_parquet::file_format::ParquetSink;
+#[cfg(feature = "parquet")]
+use datafusion_datasource_parquet::source::ParquetSource;
+use datafusion_execution::{FunctionRegistry, TaskContext};
+use datafusion_expr::{AggregateUDF, ScalarUDF, WindowUDF};
+use datafusion_functions_table::generate_series::{
     Empty, GenSeriesArgs, GenerateSeriesTable, GenericSeriesState, TimestampValue,
 };
-use datafusion::physical_expr::aggregate::AggregateExprBuilder;
-use datafusion::physical_expr::aggregate::AggregateFunctionExpr;
-use datafusion::physical_expr::{LexOrdering, LexRequirement, PhysicalExprRef};
-use datafusion::physical_plan::aggregates::AggregateMode;
-use datafusion::physical_plan::aggregates::{AggregateExec, PhysicalGroupBy};
-use datafusion::physical_plan::analyze::AnalyzeExec;
-use datafusion::physical_plan::coalesce_batches::CoalesceBatchesExec;
-use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec;
-use datafusion::physical_plan::coop::CooperativeExec;
-use datafusion::physical_plan::empty::EmptyExec;
-use datafusion::physical_plan::explain::ExplainExec;
-use datafusion::physical_plan::expressions::PhysicalSortExpr;
-use datafusion::physical_plan::filter::FilterExec;
-use datafusion::physical_plan::joins::utils::{ColumnIndex, JoinFilter};
-use datafusion::physical_plan::joins::{
+use datafusion_physical_expr::aggregate::AggregateExprBuilder;
+use datafusion_physical_expr::aggregate::AggregateFunctionExpr;
+use datafusion_physical_expr::{LexOrdering, LexRequirement, PhysicalExprRef};
+use datafusion_physical_plan::aggregates::AggregateMode;
+use datafusion_physical_plan::aggregates::{AggregateExec, PhysicalGroupBy};
+use datafusion_physical_plan::analyze::AnalyzeExec;
+use datafusion_physical_plan::coalesce_batches::CoalesceBatchesExec;
+use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
+use datafusion_physical_plan::coop::CooperativeExec;
+use datafusion_physical_plan::empty::EmptyExec;
+use datafusion_physical_plan::explain::ExplainExec;
+use datafusion_physical_plan::expressions::PhysicalSortExpr;
+use datafusion_physical_plan::filter::FilterExec;
+use datafusion_physical_plan::joins::utils::{ColumnIndex, JoinFilter};
+use datafusion_physical_plan::joins::{
     CrossJoinExec, NestedLoopJoinExec, SortMergeJoinExec, StreamJoinPartitionMode,
     SymmetricHashJoinExec,
 };
-use datafusion::physical_plan::joins::{HashJoinExec, PartitionMode};
-use datafusion::physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
-use datafusion::physical_plan::memory::LazyMemoryExec;
-use datafusion::physical_plan::placeholder_row::PlaceholderRowExec;
-use datafusion::physical_plan::projection::{ProjectionExec, ProjectionExpr};
-use datafusion::physical_plan::repartition::RepartitionExec;
-use datafusion::physical_plan::sorts::sort::SortExec;
-use datafusion::physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
-use datafusion::physical_plan::union::{InterleaveExec, UnionExec};
-use datafusion::physical_plan::unnest::{ListUnnest, UnnestExec};
-use datafusion::physical_plan::windows::{BoundedWindowAggExec, WindowAggExec};
-use datafusion::physical_plan::{
-    ExecutionPlan, InputOrderMode, PhysicalExpr, WindowExpr,
-};
-use datafusion_common::config::TableParquetOptions;
-use datafusion_common::{
-    internal_datafusion_err, internal_err, not_impl_err, DataFusionError, Result,
-};
-use datafusion_expr::{AggregateUDF, ScalarUDF, WindowUDF};
+use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
+use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
+use datafusion_physical_plan::memory::LazyMemoryExec;
+use datafusion_physical_plan::metrics::MetricType;
+use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
+use datafusion_physical_plan::projection::{ProjectionExec, ProjectionExpr};
+use datafusion_physical_plan::repartition::RepartitionExec;
+use datafusion_physical_plan::sorts::sort::SortExec;
+use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
+use datafusion_physical_plan::union::{InterleaveExec, UnionExec};
+use datafusion_physical_plan::unnest::{ListUnnest, UnnestExec};
+use datafusion_physical_plan::windows::{BoundedWindowAggExec, WindowAggExec};
+use datafusion_physical_plan::{ExecutionPlan, InputOrderMode, PhysicalExpr, WindowExpr};
 
 use prost::bytes::BufMut;
 use prost::Message;
@@ -661,8 +660,9 @@ impl protobuf::PhysicalPlanNode {
     ) -> Result<Arc<dyn ExecutionPlan>> {
         #[cfg(feature = "parquet")]
         {
-            let schema =
-                parse_protobuf_file_scan_schema(scan.base_conf.as_ref().unwrap())?;
+            let schema = from_proto::parse_protobuf_file_scan_schema(
+                scan.base_conf.as_ref().unwrap(),
+            )?;
 
             // Check if there's a projection and use projected schema for predicate parsing
             let base_conf = scan.base_conf.as_ref().unwrap();
@@ -673,7 +673,7 @@ impl protobuf::PhysicalPlanNode {
                     .iter()
                     .map(|&i| schema.field(i as usize).clone())
                     .collect();
-                Arc::new(Schema::new(projected_fields))
+                Arc::new(arrow::datatypes::Schema::new(projected_fields))
             } else {
                 schema
             };
@@ -690,7 +690,7 @@ impl protobuf::PhysicalPlanNode {
                     )
                 })
                 .transpose()?;
-            let mut options = TableParquetOptions::default();
+            let mut options = datafusion_common::config::TableParquetOptions::default();
 
             if let Some(table_options) = scan.parquet_options.as_ref() {
                 options = table_options.try_into()?;
@@ -1611,6 +1611,7 @@ impl protobuf::PhysicalPlanNode {
         Ok(Arc::new(AnalyzeExec::new(
             analyze.verbose,
             analyze.show_statistics,
+            vec![MetricType::SUMMARY, MetricType::DEV],
             input,
             Arc::new(convert_required!(analyze.schema)?),
         )))
@@ -1692,6 +1693,7 @@ impl protobuf::PhysicalPlanNode {
         )))
     }
 
+    #[cfg_attr(not(feature = "parquet"), expect(unused_variables))]
     fn try_into_parquet_sink_physical_plan(
         &self,
         sink: &protobuf::ParquetSinkExecNode,
@@ -1757,7 +1759,7 @@ impl protobuf::PhysicalPlanNode {
             unnest.struct_type_columns.iter().map(|c| *c as _).collect(),
             Arc::new(convert_required!(unnest.schema)?),
             into_required!(unnest.options)?,
-        )))
+        )?))
     }
 
     fn generate_series_name_to_str(name: protobuf::GenerateSeriesName) -> &'static str {
diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs
index 19a76de3e5b0..399c234191aa 100644
--- a/datafusion/proto/src/physical_plan/to_proto.rs
+++ b/datafusion/proto/src/physical_plan/to_proto.rs
@@ -20,32 +20,30 @@ use std::sync::Arc;
 use arrow::array::RecordBatch;
 use arrow::datatypes::Schema;
 use arrow::ipc::writer::StreamWriter;
-#[cfg(feature = "parquet")]
-use datafusion::datasource::file_format::parquet::ParquetSink;
-use datafusion::datasource::physical_plan::FileSink;
-use datafusion::physical_expr::window::{SlidingAggregateWindowExpr, StandardWindowExpr};
-use datafusion::physical_expr::ScalarFunctionExpr;
-use datafusion::physical_expr_common::physical_expr::snapshot_physical_expr;
-use datafusion::physical_expr_common::sort_expr::PhysicalSortExpr;
-use datafusion::physical_plan::expressions::{
-    BinaryExpr, CaseExpr, CastExpr, Column, InListExpr, IsNotNullExpr, IsNullExpr,
-    Literal, NegativeExpr, NotExpr, TryCastExpr, UnKnownColumn,
-};
-use datafusion::physical_plan::udaf::AggregateFunctionExpr;
-use datafusion::physical_plan::windows::{PlainAggregateWindowExpr, WindowUDFExpr};
-use datafusion::physical_plan::{Partitioning, PhysicalExpr, WindowExpr};
-use datafusion::{
-    datasource::{
-        file_format::{csv::CsvSink, json::JsonSink},
-        listing::{FileRange, PartitionedFile},
-        physical_plan::{FileScanConfig, FileSinkConfig},
-    },
-    physical_plan::expressions::LikeExpr,
-};
 use datafusion_common::{
     internal_datafusion_err, internal_err, not_impl_err, DataFusionError, Result,
 };
+use datafusion_datasource::file_scan_config::FileScanConfig;
+use datafusion_datasource::file_sink_config::FileSink;
+use datafusion_datasource::file_sink_config::FileSinkConfig;
+use datafusion_datasource::{FileRange, PartitionedFile};
+use datafusion_datasource_csv::file_format::CsvSink;
+use datafusion_datasource_json::file_format::JsonSink;
+#[cfg(feature = "parquet")]
+use datafusion_datasource_parquet::file_format::ParquetSink;
 use datafusion_expr::WindowFrame;
+use datafusion_physical_expr::window::{SlidingAggregateWindowExpr, StandardWindowExpr};
+use datafusion_physical_expr::ScalarFunctionExpr;
+use datafusion_physical_expr_common::physical_expr::snapshot_physical_expr;
+use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
+use datafusion_physical_plan::expressions::LikeExpr;
+use datafusion_physical_plan::expressions::{
+    BinaryExpr, CaseExpr, CastExpr, Column, InListExpr, IsNotNullExpr, IsNullExpr,
+    Literal, NegativeExpr, NotExpr, TryCastExpr, UnKnownColumn,
+};
+use datafusion_physical_plan::udaf::AggregateFunctionExpr;
+use datafusion_physical_plan::windows::{PlainAggregateWindowExpr, WindowUDFExpr};
+use datafusion_physical_plan::{Partitioning, PhysicalExpr, WindowExpr};
 
 use crate::protobuf::{
     self, physical_aggregate_expr_node, physical_window_expr_node, PhysicalSortExprNode,
@@ -517,16 +515,16 @@ pub fn serialize_file_scan_config(
     // Fields must be added to the schema so that they can persist in the protobuf,
     // and then they are to be removed from the schema in `parse_protobuf_file_scan_config`
     let mut fields = conf
-        .file_schema
+        .file_schema()
         .fields()
         .iter()
         .cloned()
         .collect::<Vec<_>>();
-    fields.extend(conf.table_partition_cols.iter().cloned());
+    fields.extend(conf.table_partition_cols().iter().cloned());
 
     let schema = Arc::new(
         arrow::datatypes::Schema::new(fields.clone())
-            .with_metadata(conf.file_schema.metadata.clone()),
+            .with_metadata(conf.file_schema().metadata.clone()),
     );
 
     Ok(protobuf::FileScanExecConf {
@@ -542,7 +540,7 @@ pub fn serialize_file_scan_config(
             .collect(),
         schema: Some(schema.as_ref().try_into()?),
         table_partition_cols: conf
-            .table_partition_cols
+            .table_partition_cols()
             .iter()
             .map(|x| x.name().clone())
             .collect::<Vec<_>>(),
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index c5d4b49092d9..bfd693e6a0f8 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -71,10 +71,11 @@ use datafusion_common::{
     internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, DFSchemaRef,
     DataFusionError, Result, ScalarValue, TableReference,
 };
+use datafusion_execution::TaskContext;
 use datafusion_expr::dml::CopyTo;
 use datafusion_expr::expr::{
-    self, Between, BinaryExpr, Case, Cast, GroupingSet, InList, Like, ScalarFunction,
-    Unnest, WildcardOptions,
+    self, Between, BinaryExpr, Case, Cast, GroupingSet, InList, Like, NullTreatment,
+    ScalarFunction, Unnest, WildcardOptions,
 };
 use datafusion_expr::logical_plan::{Extension, UserDefinedLogicalNodeCore};
 use datafusion_expr::{
@@ -153,8 +154,11 @@ async fn roundtrip_logical_plan() -> Result<()> {
     });
     let extension_codec = TopKExtensionCodec {};
     let bytes = logical_plan_to_bytes_with_extension_codec(&topk_plan, &extension_codec)?;
-    let logical_round_trip =
-        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx, &extension_codec)?;
+    let logical_round_trip = logical_plan_from_bytes_with_extension_codec(
+        &bytes,
+        &ctx.task_ctx(),
+        &extension_codec,
+    )?;
     assert_eq!(format!("{topk_plan:?}"), format!("{logical_round_trip:?}"));
     Ok(())
 }
@@ -177,7 +181,7 @@ impl LogicalExtensionCodec for TestTableProviderCodec {
         &self,
         _buf: &[u8],
         _inputs: &[LogicalPlan],
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> Result<Extension> {
         not_impl_err!("No extension codec provided")
     }
@@ -191,7 +195,7 @@ impl LogicalExtensionCodec for TestTableProviderCodec {
         buf: &[u8],
         table_ref: &TableReference,
         schema: SchemaRef,
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> Result<Arc<dyn TableProvider>> {
         let msg = TestTableProto::decode(buf)
             .map_err(|_| internal_datafusion_err!("Error decoding test table"))?;
@@ -240,7 +244,7 @@ async fn roundtrip_custom_tables() -> Result<()> {
     let scan = ctx.table("t").await?.into_optimized_plan()?;
     let bytes = logical_plan_to_bytes_with_extension_codec(&scan, &codec)?;
     let logical_round_trip =
-        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx, &codec)?;
+        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx.task_ctx(), &codec)?;
     assert_eq!(format!("{scan:?}"), format!("{logical_round_trip:?}"));
     Ok(())
 }
@@ -266,7 +270,7 @@ async fn roundtrip_custom_memory_tables() -> Result<()> {
     let plan = ctx.sql(query).await?.into_optimized_plan()?;
 
     let bytes = logical_plan_to_bytes(&plan)?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
 
     Ok(())
@@ -293,7 +297,7 @@ async fn roundtrip_custom_listing_tables() -> Result<()> {
     let plan = ctx.state().create_logical_plan(query).await?;
 
     let bytes = logical_plan_to_bytes(&plan)?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     // Use exact matching to verify everything. Make sure during round-trip,
     // information like constraints, column defaults, and other aspects of the plan are preserved.
     assert_eq!(plan, logical_round_trip);
@@ -328,7 +332,7 @@ async fn roundtrip_logical_plan_aggregation_with_pk() -> Result<()> {
     let plan = ctx.sql(query).await?.into_optimized_plan()?;
 
     let bytes = logical_plan_to_bytes(&plan)?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     Ok(())
@@ -354,7 +358,7 @@ async fn roundtrip_logical_plan_aggregation() -> Result<()> {
     let plan = ctx.sql(query).await?.into_optimized_plan()?;
 
     let bytes = logical_plan_to_bytes(&plan)?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     Ok(())
@@ -380,7 +384,7 @@ async fn roundtrip_logical_plan_sort() -> Result<()> {
     let plan = ctx.sql(query).await?.into_optimized_plan()?;
 
     let bytes = logical_plan_to_bytes(&plan)?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     Ok(())
@@ -412,7 +416,7 @@ async fn roundtrip_logical_plan_dml() -> Result<()> {
     for query in queries {
         let plan = ctx.sql(query).await?.into_optimized_plan()?;
         let bytes = logical_plan_to_bytes(&plan)?;
-        let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+        let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
         assert_eq!(
             format!("{plan}"),
             format!("{logical_round_trip}"),
@@ -442,7 +446,7 @@ async fn roundtrip_logical_plan_copy_to_sql_options() -> Result<()> {
     let codec = CsvLogicalExtensionCodec {};
     let bytes = logical_plan_to_bytes_with_extension_codec(&plan, &codec)?;
     let logical_round_trip =
-        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx, &codec)?;
+        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx.task_ctx(), &codec)?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     Ok(())
@@ -482,7 +486,7 @@ async fn roundtrip_logical_plan_copy_to_writer_options() -> Result<()> {
     let codec = ParquetLogicalExtensionCodec {};
     let bytes = logical_plan_to_bytes_with_extension_codec(&plan, &codec)?;
     let logical_round_trip =
-        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx, &codec)?;
+        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx.task_ctx(), &codec)?;
     assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
     match logical_round_trip {
         LogicalPlan::Copy(copy_to) => {
@@ -514,7 +518,7 @@ async fn roundtrip_logical_plan_copy_to_arrow() -> Result<()> {
     let codec = ArrowLogicalExtensionCodec {};
     let bytes = logical_plan_to_bytes_with_extension_codec(&plan, &codec)?;
     let logical_round_trip =
-        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx, &codec)?;
+        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx.task_ctx(), &codec)?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     match logical_round_trip {
@@ -561,7 +565,7 @@ async fn roundtrip_logical_plan_copy_to_csv() -> Result<()> {
     let codec = CsvLogicalExtensionCodec {};
     let bytes = logical_plan_to_bytes_with_extension_codec(&plan, &codec)?;
     let logical_round_trip =
-        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx, &codec)?;
+        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx.task_ctx(), &codec)?;
     assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
 
     match logical_round_trip {
@@ -628,7 +632,7 @@ async fn roundtrip_logical_plan_copy_to_json() -> Result<()> {
     let codec = JsonLogicalExtensionCodec {};
     let bytes = logical_plan_to_bytes_with_extension_codec(&plan, &codec)?;
     let logical_round_trip =
-        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx, &codec)?;
+        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx.task_ctx(), &codec)?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     match logical_round_trip {
@@ -700,7 +704,7 @@ async fn roundtrip_logical_plan_copy_to_parquet() -> Result<()> {
     let codec = ParquetLogicalExtensionCodec {};
     let bytes = logical_plan_to_bytes_with_extension_codec(&plan, &codec)?;
     let logical_round_trip =
-        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx, &codec)?;
+        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx.task_ctx(), &codec)?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     match logical_round_trip {
@@ -787,7 +791,7 @@ async fn roundtrip_logical_plan_distinct_on() -> Result<()> {
     let plan = ctx.sql(query).await?.into_optimized_plan()?;
 
     let bytes = logical_plan_to_bytes(&plan)?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     Ok(())
@@ -813,7 +817,7 @@ async fn roundtrip_single_count_distinct() -> Result<()> {
     let plan = ctx.sql(query).await?.into_optimized_plan()?;
 
     let bytes = logical_plan_to_bytes(&plan)?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     Ok(())
@@ -826,7 +830,7 @@ async fn roundtrip_logical_plan_with_extension() -> Result<()> {
         .await?;
     let plan = ctx.table("t1").await?.into_optimized_plan()?;
     let bytes = logical_plan_to_bytes(&plan)?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
     Ok(())
 }
@@ -851,7 +855,7 @@ async fn roundtrip_logical_plan_unnest() -> Result<()> {
     let query = "SELECT unnest(b) FROM t1";
     let plan = ctx.sql(query).await?.into_optimized_plan()?;
     let bytes = logical_plan_to_bytes(&plan)?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
     Ok(())
 }
@@ -1039,7 +1043,7 @@ async fn roundtrip_expr_api() -> Result<()> {
     // ensure expressions created with the expr api can be round tripped
     let plan = table.select(expr_list)?.into_optimized_plan()?;
     let bytes = logical_plan_to_bytes(&plan)?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
     Ok(())
 }
@@ -1059,18 +1063,47 @@ async fn roundtrip_logical_plan_with_view_scan() -> Result<()> {
         .into_optimized_plan()?;
 
     let bytes = logical_plan_to_bytes(&plan)?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     // DROP
     let plan = ctx.sql("DROP VIEW view_t1").await?.into_optimized_plan()?;
     let bytes = logical_plan_to_bytes(&plan)?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     Ok(())
 }
 
+#[tokio::test]
+async fn roundtrip_logical_plan_prepared_statement_with_metadata() -> Result<()> {
+    let ctx = SessionContext::new();
+
+    let plan = ctx
+        .sql("SELECT $1")
+        .await
+        .unwrap()
+        .into_optimized_plan()
+        .unwrap();
+    let prepared = LogicalPlanBuilder::new(plan)
+        .prepare(
+            "".to_string(),
+            vec![Field::new("", DataType::Int32, true)
+                .with_metadata(
+                    [("some_key".to_string(), "some_value".to_string())].into(),
+                )
+                .into()],
+        )
+        .unwrap()
+        .plan()
+        .clone();
+
+    let bytes = logical_plan_to_bytes(&prepared)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
+    assert_eq!(format!("{prepared}"), format!("{logical_round_trip}"));
+    Ok(())
+}
+
 pub mod proto {
     #[derive(Clone, PartialEq, ::prost::Message)]
     pub struct TopKPlanProto {
@@ -1160,7 +1193,7 @@ impl LogicalExtensionCodec for TopKExtensionCodec {
         &self,
         buf: &[u8],
         inputs: &[LogicalPlan],
-        ctx: &SessionContext,
+        ctx: &TaskContext,
     ) -> Result<Extension> {
         if let Some((input, _)) = inputs.split_first() {
             let proto = proto::TopKPlanProto::decode(buf).map_err(|e| {
@@ -1207,7 +1240,7 @@ impl LogicalExtensionCodec for TopKExtensionCodec {
         _buf: &[u8],
         _table_ref: &TableReference,
         _schema: SchemaRef,
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> Result<Arc<dyn TableProvider>> {
         internal_err!("unsupported plan type")
     }
@@ -1230,7 +1263,7 @@ impl LogicalExtensionCodec for UDFExtensionCodec {
         &self,
         _buf: &[u8],
         _inputs: &[LogicalPlan],
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> Result<Extension> {
         not_impl_err!("No extension codec provided")
     }
@@ -1244,7 +1277,7 @@ impl LogicalExtensionCodec for UDFExtensionCodec {
         _buf: &[u8],
         _table_ref: &TableReference,
         _schema: SchemaRef,
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> Result<Arc<dyn TableProvider>> {
         internal_err!("unsupported plan type")
     }
@@ -2190,7 +2223,11 @@ fn roundtrip_aggregate_udf() {
         Arc::new(vec![DataType::Float64, DataType::UInt32]),
     );
 
-    let test_expr = Expr::AggregateFunction(expr::AggregateFunction::new_udf(
+    let ctx = SessionContext::new();
+    ctx.register_udaf(dummy_agg.clone());
+
+    // null_treatment absent
+    let test_expr1 = Expr::AggregateFunction(expr::AggregateFunction::new_udf(
         Arc::new(dummy_agg.clone()),
         vec![lit(1.0_f64)],
         false,
@@ -2199,10 +2236,29 @@ fn roundtrip_aggregate_udf() {
         None,
     ));
 
-    let ctx = SessionContext::new();
-    ctx.register_udaf(dummy_agg);
+    // null_treatment respect nulls
+    let test_expr2 = Expr::AggregateFunction(expr::AggregateFunction::new_udf(
+        Arc::new(dummy_agg.clone()),
+        vec![lit(1.0_f64)],
+        true,
+        Some(Box::new(lit(true))),
+        vec![],
+        Some(NullTreatment::RespectNulls),
+    ));
 
-    roundtrip_expr_test(test_expr, ctx);
+    // null_treatment ignore nulls
+    let test_expr3 = Expr::AggregateFunction(expr::AggregateFunction::new_udf(
+        Arc::new(dummy_agg),
+        vec![lit(1.0_f64)],
+        true,
+        Some(Box::new(lit(true))),
+        vec![],
+        Some(NullTreatment::IgnoreNulls),
+    ));
+
+    roundtrip_expr_test(test_expr1, ctx.clone());
+    roundtrip_expr_test(test_expr2, ctx.clone());
+    roundtrip_expr_test(test_expr3, ctx);
 }
 
 fn dummy_udf() -> ScalarUDF {
@@ -2246,7 +2302,7 @@ fn roundtrip_scalar_udf() {
             &self,
             _buf: &[u8],
             _inputs: &[LogicalPlan],
-            _ctx: &SessionContext,
+            _ctx: &TaskContext,
         ) -> Result<Extension> {
             not_impl_err!("LogicalExtensionCodec is not provided")
         }
@@ -2260,7 +2316,7 @@ fn roundtrip_scalar_udf() {
             _buf: &[u8],
             _table_ref: &TableReference,
             _schema: SchemaRef,
-            _ctx: &SessionContext,
+            _ctx: &TaskContext,
         ) -> Result<Arc<dyn TableProvider>> {
             not_impl_err!("LogicalExtensionCodec is not provided")
         }
@@ -2566,8 +2622,10 @@ fn roundtrip_window() {
     .window_frame(row_number_frame.clone())
     .build()
     .unwrap();
+    ctx.register_udwf(dummy_window_udf);
 
-    let text_expr7 = Expr::from(expr::WindowFunction::new(
+    // 7. test with average udaf
+    let test_expr7 = Expr::from(expr::WindowFunction::new(
         WindowFunctionDefinition::AggregateUDF(avg_udaf()),
         vec![col("col1")],
     ))
@@ -2575,7 +2633,53 @@ fn roundtrip_window() {
     .build()
     .unwrap();
 
-    ctx.register_udwf(dummy_window_udf);
+    // 8. test with respect nulls
+    let test_expr8 = Expr::from(expr::WindowFunction::new(
+        WindowFunctionDefinition::WindowUDF(rank_udwf()),
+        vec![],
+    ))
+    .partition_by(vec![col("col1")])
+    .order_by(vec![col("col2").sort(true, false)])
+    .window_frame(WindowFrame::new(Some(false)))
+    .null_treatment(NullTreatment::RespectNulls)
+    .build()
+    .unwrap();
+
+    // 9. test with ignore nulls
+    let test_expr9 = Expr::from(expr::WindowFunction::new(
+        WindowFunctionDefinition::WindowUDF(rank_udwf()),
+        vec![],
+    ))
+    .partition_by(vec![col("col1")])
+    .order_by(vec![col("col2").sort(true, false)])
+    .window_frame(WindowFrame::new(Some(false)))
+    .null_treatment(NullTreatment::IgnoreNulls)
+    .build()
+    .unwrap();
+
+    // 10. test with distinct is `true`
+    let test_expr10 = Expr::from(expr::WindowFunction::new(
+        WindowFunctionDefinition::WindowUDF(rank_udwf()),
+        vec![],
+    ))
+    .partition_by(vec![col("col1")])
+    .order_by(vec![col("col2").sort(true, false)])
+    .window_frame(WindowFrame::new(Some(false)))
+    .distinct()
+    .build()
+    .unwrap();
+
+    // 11. test with filter
+    let test_expr11 = Expr::from(expr::WindowFunction::new(
+        WindowFunctionDefinition::WindowUDF(rank_udwf()),
+        vec![],
+    ))
+    .partition_by(vec![col("col1")])
+    .order_by(vec![col("col2").sort(true, false)])
+    .window_frame(WindowFrame::new(Some(false)))
+    .filter(col("col1").eq(lit(1)))
+    .build()
+    .unwrap();
 
     roundtrip_expr_test(test_expr1, ctx.clone());
     roundtrip_expr_test(test_expr2, ctx.clone());
@@ -2583,7 +2687,11 @@ fn roundtrip_window() {
     roundtrip_expr_test(test_expr4, ctx.clone());
     roundtrip_expr_test(test_expr5, ctx.clone());
     roundtrip_expr_test(test_expr6, ctx.clone());
-    roundtrip_expr_test(text_expr7, ctx);
+    roundtrip_expr_test(test_expr7, ctx.clone());
+    roundtrip_expr_test(test_expr8, ctx.clone());
+    roundtrip_expr_test(test_expr9, ctx.clone());
+    roundtrip_expr_test(test_expr10, ctx.clone());
+    roundtrip_expr_test(test_expr11, ctx);
 }
 
 #[tokio::test]
@@ -2602,7 +2710,7 @@ async fn roundtrip_recursive_query() {
     let bytes = logical_plan_to_bytes(&plan).unwrap();
 
     let ctx = SessionContext::new();
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx).unwrap();
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx()).unwrap();
     assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
     let dataframe = ctx.execute_logical_plan(logical_round_trip).await.unwrap();
     let output_round_trip = dataframe.collect().await.unwrap();
@@ -2633,7 +2741,7 @@ async fn roundtrip_union_query() -> Result<()> {
         .await?;
     ctx.register_csv("t2", "tests/testdata/test.csv", CsvReadOptions::default())
         .await?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     // proto deserialization only supports 2-way union, hence this plan has nested unions
     // apply the flatten unions optimizer rule to be able to compare
     let optimizer = Optimizer::with_rules(vec![Arc::new(EliminateNestedUnion::new())]);
@@ -2670,7 +2778,7 @@ async fn roundtrip_custom_listing_tables_schema() -> Result<()> {
         .clone();
 
     let bytes = logical_plan_to_bytes(&plan)?;
-    let new_plan = logical_plan_from_bytes(&bytes, &ctx)?;
+    let new_plan = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     assert_eq!(plan, new_plan);
     Ok(())
 }
@@ -2707,7 +2815,7 @@ async fn roundtrip_custom_listing_tables_schema_table_scan_projection() -> Resul
     .build()?;
 
     let bytes = logical_plan_to_bytes(&plan)?;
-    let new_plan = logical_plan_from_bytes(&bytes, &ctx)?;
+    let new_plan = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
 
     assert_eq!(plan, new_plan);
     Ok(())
@@ -2721,7 +2829,53 @@ async fn roundtrip_arrow_scan() -> Result<()> {
         .await?
         .into_optimized_plan()?;
     let bytes = logical_plan_to_bytes(&plan)?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
     Ok(())
 }
+
+#[tokio::test]
+async fn roundtrip_mixed_case_table_reference() -> Result<()> {
+    // Prepare "client" database
+    let client_ctx = SessionContext::new_with_config(
+        SessionConfig::new()
+            .set_bool("datafusion.sql_parser.enable_ident_normalization", false),
+    );
+    client_ctx
+        .register_csv(
+            "\"TestData\"",
+            "tests/testdata/test.csv",
+            CsvReadOptions::default(),
+        )
+        .await?;
+
+    // Prepare "server" database
+    let server_ctx = SessionContext::new_with_config(
+        SessionConfig::new()
+            .set_bool("datafusion.sql_parser.enable_ident_normalization", false),
+    );
+    server_ctx
+        .register_csv(
+            "\"TestData\"",
+            "tests/testdata/test.csv",
+            CsvReadOptions::default(),
+        )
+        .await?;
+
+    // Create a logical plan, serialize it (client), then deserialize it (server)
+    let dataframe = client_ctx
+        .sql("SELECT a FROM TestData WHERE TestData.a = 1")
+        .await?;
+
+    let client_logical_plan = dataframe.into_optimized_plan()?;
+    let plan_bytes = logical_plan_to_bytes(&client_logical_plan)?;
+    let server_logical_plan =
+        logical_plan_from_bytes(&plan_bytes, &server_ctx.task_ctx())?;
+
+    assert_eq!(
+        format!("{}", client_logical_plan.display_indent_schema()),
+        format!("{}", server_logical_plan.display_indent_schema())
+    );
+
+    Ok(())
+}
diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
index b93d0d3c4e7c..a0456e2031be 100644
--- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
@@ -32,6 +32,7 @@ use arrow::csv::WriterBuilder;
 use arrow::datatypes::{Fields, TimeUnit};
 use datafusion::physical_expr::aggregate::AggregateExprBuilder;
 use datafusion::physical_plan::coalesce_batches::CoalesceBatchesExec;
+use datafusion::physical_plan::metrics::MetricType;
 use datafusion_expr::dml::InsertOp;
 use datafusion_functions_aggregate::approx_percentile_cont::approx_percentile_cont_udaf;
 use datafusion_functions_aggregate::array_agg::array_agg_udaf;
@@ -1436,6 +1437,7 @@ fn roundtrip_analyze() -> Result<()> {
     roundtrip_test(Arc::new(AnalyzeExec::new(
         false,
         false,
+        vec![MetricType::SUMMARY, MetricType::DEV],
         input,
         Arc::new(schema),
     )))
@@ -1714,7 +1716,7 @@ fn roundtrip_unnest() -> Result<()> {
         vec![2, 4],
         output_schema,
         options,
-    );
+    )?;
     roundtrip_test(Arc::new(unnest))
 }
 
diff --git a/datafusion/proto/tests/cases/serialize.rs b/datafusion/proto/tests/cases/serialize.rs
index c9ef4377d43b..f45a62e94874 100644
--- a/datafusion/proto/tests/cases/serialize.rs
+++ b/datafusion/proto/tests/cases/serialize.rs
@@ -18,10 +18,11 @@
 use std::sync::Arc;
 
 use arrow::array::ArrayRef;
-use arrow::datatypes::DataType;
+use arrow::datatypes::{DataType, Field};
 
 use datafusion::execution::FunctionRegistry;
 use datafusion::prelude::SessionContext;
+use datafusion_expr::expr::Placeholder;
 use datafusion_expr::{col, create_udf, lit, ColumnarValue};
 use datafusion_expr::{Expr, Volatility};
 use datafusion_functions::string;
@@ -61,7 +62,7 @@ fn json_to_plan() {
 
     let input = r#"{"emptyRelation":{}}"#.to_string();
     let ctx = SessionContext::new();
-    let actual = logical_plan_from_json(&input, &ctx).unwrap();
+    let actual = logical_plan_from_json(&input, &ctx.task_ctx()).unwrap();
     let result = matches!(actual, LogicalPlan::EmptyRelation(_));
     assert!(result, "Should parse empty relation");
 }
@@ -136,6 +137,21 @@ fn roundtrip_qualified_alias() {
     assert_eq!(qual_alias, roundtrip_expr(&qual_alias));
 }
 
+#[test]
+fn roundtrip_placeholder_with_metadata() {
+    let expr = Expr::Placeholder(Placeholder::new_with_field(
+        "placeholder_id".to_string(),
+        Some(
+            Field::new("", DataType::Utf8, false)
+                .with_metadata(
+                    [("some_key".to_string(), "some_value".to_string())].into(),
+                )
+                .into(),
+        ),
+    ));
+    assert_eq!(expr, roundtrip_expr(&expr));
+}
+
 #[test]
 fn roundtrip_deeply_nested_binary_expr() {
     // We need more stack space so this doesn't overflow in dev builds
diff --git a/datafusion/spark/Cargo.toml b/datafusion/spark/Cargo.toml
index b95cc31caec6..7f6210fb32bf 100644
--- a/datafusion/spark/Cargo.toml
+++ b/datafusion/spark/Cargo.toml
@@ -46,12 +46,12 @@ datafusion-execution = { workspace = true }
 datafusion-expr = { workspace = true }
 datafusion-functions = { workspace = true, features = ["crypto_expressions"] }
 log = { workspace = true }
+rand = { workspace = true }
 sha1 = "0.10"
 url = { workspace = true }
 
 [dev-dependencies]
 criterion = { workspace = true }
-rand = { workspace = true }
 
 [[bench]]
 harness = false
diff --git a/datafusion/spark/src/function/aggregate/avg.rs b/datafusion/spark/src/function/aggregate/avg.rs
index a22561ba8b9c..65736815fec5 100644
--- a/datafusion/spark/src/function/aggregate/avg.rs
+++ b/datafusion/spark/src/function/aggregate/avg.rs
@@ -25,41 +25,38 @@ use arrow::array::{
 use arrow::compute::sum;
 use arrow::datatypes::{DataType, Field, FieldRef};
 use datafusion_common::utils::take_function_args;
-use datafusion_common::{not_impl_err, Result, ScalarValue};
+use datafusion_common::{not_impl_err, plan_err, Result, ScalarValue};
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
-use datafusion_expr::type_coercion::aggregates::coerce_avg_type;
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::Volatility::Immutable;
 use datafusion_expr::{
-    type_coercion::aggregates::avg_return_type, Accumulator, AggregateUDFImpl, EmitTo,
-    GroupsAccumulator, ReversedUDAF, Signature,
+    Accumulator, AggregateUDFImpl, EmitTo, GroupsAccumulator, ReversedUDAF, Signature,
 };
 use std::{any::Any, sync::Arc};
-use DataType::*;
 
 /// AVG aggregate expression
 /// Spark average aggregate expression. Differs from standard DataFusion average aggregate
 /// in that it uses an `i64` for the count (DataFusion version uses `u64`); also there is ANSI mode
 /// support planned in the future for Spark version.
 
+// TODO: see if can deduplicate with DF version
+//       https://github.com/apache/datafusion/issues/17964
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct SparkAvg {
-    name: String,
     signature: Signature,
-    input_data_type: DataType,
-    result_data_type: DataType,
+}
+
+impl Default for SparkAvg {
+    fn default() -> Self {
+        Self::new()
+    }
 }
 
 impl SparkAvg {
     /// Implement AVG aggregate function
-    pub fn new(name: impl Into<String>, data_type: DataType) -> Self {
-        let result_data_type = avg_return_type("avg", &data_type).unwrap();
-
+    pub fn new() -> Self {
         Self {
-            name: name.into(),
             signature: Signature::user_defined(Immutable),
-            input_data_type: data_type,
-            result_data_type,
         }
     }
 }
@@ -69,63 +66,87 @@ impl AggregateUDFImpl for SparkAvg {
         self
     }
 
-    fn accumulator(&self, _acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
+    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        let [args] = take_function_args(self.name(), arg_types)?;
+
+        fn coerced_type(data_type: &DataType) -> Result<DataType> {
+            match &data_type {
+                d if d.is_numeric() => Ok(DataType::Float64),
+                DataType::Dictionary(_, v) => coerced_type(v.as_ref()),
+                _ => {
+                    plan_err!("Avg does not support inputs of type {data_type}.")
+                }
+            }
+        }
+        Ok(vec![coerced_type(args)?])
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(DataType::Float64)
+    }
+
+    fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
+        if acc_args.is_distinct {
+            return not_impl_err!("DistinctAvgAccumulator");
+        }
+
+        let data_type = acc_args.exprs[0].data_type(acc_args.schema)?;
+
         // instantiate specialized accumulator based for the type
-        match (&self.input_data_type, &self.result_data_type) {
-            (Float64, Float64) => Ok(Box::<AvgAccumulator>::default()),
-            _ => not_impl_err!(
-                "AvgAccumulator for ({} --> {})",
-                self.input_data_type,
-                self.result_data_type
-            ),
+        match (&data_type, &acc_args.return_type()) {
+            (DataType::Float64, DataType::Float64) => {
+                Ok(Box::<AvgAccumulator>::default())
+            }
+            (dt, return_type) => {
+                not_impl_err!("AvgAccumulator for ({dt} --> {return_type})")
+            }
         }
     }
 
-    fn state_fields(&self, _args: StateFieldsArgs) -> Result<Vec<FieldRef>> {
+    fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<FieldRef>> {
         Ok(vec![
             Arc::new(Field::new(
-                format_state_name(&self.name, "sum"),
-                self.input_data_type.clone(),
+                format_state_name(self.name(), "sum"),
+                args.input_fields[0].data_type().clone(),
                 true,
             )),
             Arc::new(Field::new(
-                format_state_name(&self.name, "count"),
-                Int64,
+                format_state_name(self.name(), "count"),
+                DataType::Int64,
                 true,
             )),
         ])
     }
 
     fn name(&self) -> &str {
-        &self.name
+        "avg"
     }
 
     fn reverse_expr(&self) -> ReversedUDAF {
         ReversedUDAF::Identical
     }
 
-    fn groups_accumulator_supported(&self, _args: AccumulatorArgs) -> bool {
-        true
+    fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool {
+        !args.is_distinct
     }
 
     fn create_groups_accumulator(
         &self,
-        _args: AccumulatorArgs,
+        args: AccumulatorArgs,
     ) -> Result<Box<dyn GroupsAccumulator>> {
+        let data_type = args.exprs[0].data_type(args.schema)?;
+
         // instantiate specialized accumulator based for the type
-        match (&self.input_data_type, &self.result_data_type) {
-            (Float64, Float64) => {
+        match (&data_type, args.return_type()) {
+            (DataType::Float64, DataType::Float64) => {
                 Ok(Box::new(AvgGroupsAccumulator::<Float64Type, _>::new(
-                    &self.input_data_type,
+                    args.return_field.data_type(),
                     |sum: f64, count: i64| Ok(sum / count as f64),
                 )))
             }
-
-            _ => not_impl_err!(
-                "AvgGroupsAccumulator for ({} --> {})",
-                self.input_data_type,
-                self.result_data_type
-            ),
+            (dt, return_type) => {
+                not_impl_err!("AvgGroupsAccumulator for ({dt} --> {return_type})")
+            }
         }
     }
 
@@ -136,15 +157,6 @@ impl AggregateUDFImpl for SparkAvg {
     fn signature(&self) -> &Signature {
         &self.signature
     }
-
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        avg_return_type(self.name(), &arg_types[0])
-    }
-
-    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        let [arg] = take_function_args(self.name(), arg_types)?;
-        coerce_avg_type(self.name(), std::slice::from_ref(arg))
-    }
 }
 
 /// An accumulator to compute the average
diff --git a/datafusion/spark/src/function/aggregate/mod.rs b/datafusion/spark/src/function/aggregate/mod.rs
index 54001d28da6b..d765d9c82f06 100644
--- a/datafusion/spark/src/function/aggregate/mod.rs
+++ b/datafusion/spark/src/function/aggregate/mod.rs
@@ -15,7 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::datatypes::DataType;
 use datafusion_expr::AggregateUDF;
 use std::sync::Arc;
 
@@ -26,11 +25,9 @@ pub mod expr_fn {
     export_functions!((avg, "Returns the average value of a given column", arg1));
 }
 
+// TODO: try use something like datafusion_functions_aggregate::create_func!()
 pub fn avg() -> Arc<AggregateUDF> {
-    Arc::new(AggregateUDF::new_from_impl(avg::SparkAvg::new(
-        "avg",
-        DataType::Float64,
-    )))
+    Arc::new(AggregateUDF::new_from_impl(avg::SparkAvg::new()))
 }
 
 pub fn functions() -> Vec<Arc<AggregateUDF>> {
diff --git a/datafusion/spark/src/function/array/mod.rs b/datafusion/spark/src/function/array/mod.rs
index fed52a494281..01056ba95298 100644
--- a/datafusion/spark/src/function/array/mod.rs
+++ b/datafusion/spark/src/function/array/mod.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+pub mod shuffle;
 pub mod spark_array;
 
 use datafusion_expr::ScalarUDF;
@@ -22,13 +23,19 @@ use datafusion_functions::make_udf_function;
 use std::sync::Arc;
 
 make_udf_function!(spark_array::SparkArray, array);
+make_udf_function!(shuffle::SparkShuffle, shuffle);
 
 pub mod expr_fn {
     use datafusion_functions::export_functions;
 
     export_functions!((array, "Returns an array with the given elements.", args));
+    export_functions!((
+        shuffle,
+        "Returns a random permutation of the given array.",
+        args
+    ));
 }
 
 pub fn functions() -> Vec<Arc<ScalarUDF>> {
-    vec![array()]
+    vec![array(), shuffle()]
 }
diff --git a/datafusion/spark/src/function/array/shuffle.rs b/datafusion/spark/src/function/array/shuffle.rs
new file mode 100644
index 000000000000..abeafd3a9366
--- /dev/null
+++ b/datafusion/spark/src/function/array/shuffle.rs
@@ -0,0 +1,191 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::function::functions_nested_utils::make_scalar_function;
+use arrow::array::{
+    Array, ArrayRef, Capacities, FixedSizeListArray, GenericListArray, MutableArrayData,
+    OffsetSizeTrait,
+};
+use arrow::buffer::OffsetBuffer;
+use arrow::datatypes::DataType::{FixedSizeList, LargeList, List, Null};
+use arrow::datatypes::{DataType, FieldRef};
+use datafusion_common::cast::{
+    as_fixed_size_list_array, as_large_list_array, as_list_array,
+};
+use datafusion_common::{exec_err, utils::take_function_args, Result};
+use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use rand::rng;
+use rand::seq::SliceRandom;
+use std::any::Any;
+use std::sync::Arc;
+
+#[derive(Debug, PartialEq, Eq, Hash)]
+pub struct SparkShuffle {
+    signature: Signature,
+}
+
+impl Default for SparkShuffle {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl SparkShuffle {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::arrays(1, None, Volatility::Volatile),
+        }
+    }
+}
+
+impl ScalarUDFImpl for SparkShuffle {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "shuffle"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        Ok(arg_types[0].clone())
+    }
+
+    fn invoke_with_args(
+        &self,
+        args: datafusion_expr::ScalarFunctionArgs,
+    ) -> Result<ColumnarValue> {
+        make_scalar_function(array_shuffle_inner)(&args.args)
+    }
+}
+
+/// array_shuffle SQL function
+pub fn array_shuffle_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
+    let [input_array] = take_function_args("shuffle", arg)?;
+    match &input_array.data_type() {
+        List(field) => {
+            let array = as_list_array(input_array)?;
+            general_array_shuffle::<i32>(array, field)
+        }
+        LargeList(field) => {
+            let array = as_large_list_array(input_array)?;
+            general_array_shuffle::<i64>(array, field)
+        }
+        FixedSizeList(field, _) => {
+            let array = as_fixed_size_list_array(input_array)?;
+            fixed_size_array_shuffle(array, field)
+        }
+        Null => Ok(Arc::clone(input_array)),
+        array_type => exec_err!("shuffle does not support type '{array_type}'."),
+    }
+}
+
+fn general_array_shuffle<O: OffsetSizeTrait>(
+    array: &GenericListArray<O>,
+    field: &FieldRef,
+) -> Result<ArrayRef> {
+    let values = array.values();
+    let original_data = values.to_data();
+    let capacity = Capacities::Array(original_data.len());
+    let mut offsets = vec![O::usize_as(0)];
+    let mut nulls = vec![];
+    let mut mutable =
+        MutableArrayData::with_capacities(vec![&original_data], false, capacity);
+    let mut rng = rng();
+
+    for (row_index, offset_window) in array.offsets().windows(2).enumerate() {
+        // skip the null value
+        if array.is_null(row_index) {
+            nulls.push(false);
+            offsets.push(offsets[row_index] + O::one());
+            mutable.extend(0, 0, 1);
+            continue;
+        }
+        nulls.push(true);
+        let start = offset_window[0];
+        let end = offset_window[1];
+        let length = (end - start).to_usize().unwrap();
+
+        // Create indices and shuffle them
+        let mut indices: Vec<usize> =
+            (start.to_usize().unwrap()..end.to_usize().unwrap()).collect();
+        indices.shuffle(&mut rng);
+
+        // Add shuffled elements
+        for &index in &indices {
+            mutable.extend(0, index, index + 1);
+        }
+
+        offsets.push(offsets[row_index] + O::usize_as(length));
+    }
+
+    let data = mutable.freeze();
+    Ok(Arc::new(GenericListArray::<O>::try_new(
+        Arc::clone(field),
+        OffsetBuffer::<O>::new(offsets.into()),
+        arrow::array::make_array(data),
+        Some(nulls.into()),
+    )?))
+}
+
+fn fixed_size_array_shuffle(
+    array: &FixedSizeListArray,
+    field: &FieldRef,
+) -> Result<ArrayRef> {
+    let values = array.values();
+    let original_data = values.to_data();
+    let capacity = Capacities::Array(original_data.len());
+    let mut nulls = vec![];
+    let mut mutable =
+        MutableArrayData::with_capacities(vec![&original_data], false, capacity);
+    let value_length = array.value_length() as usize;
+    let mut rng = rng();
+
+    for row_index in 0..array.len() {
+        // skip the null value
+        if array.is_null(row_index) {
+            nulls.push(false);
+            mutable.extend(0, 0, value_length);
+            continue;
+        }
+        nulls.push(true);
+
+        let start = row_index * value_length;
+        let end = start + value_length;
+
+        // Create indices and shuffle them
+        let mut indices: Vec<usize> = (start..end).collect();
+        indices.shuffle(&mut rng);
+
+        // Add shuffled elements
+        for &index in &indices {
+            mutable.extend(0, index, index + 1);
+        }
+    }
+
+    let data = mutable.freeze();
+    Ok(Arc::new(FixedSizeListArray::try_new(
+        Arc::clone(field),
+        array.value_length(),
+        arrow::array::make_array(data),
+        Some(nulls.into()),
+    )?))
+}
diff --git a/datafusion/spark/src/function/bitmap/bitmap_count.rs b/datafusion/spark/src/function/bitmap/bitmap_count.rs
index 15bd33229a3d..56a9c5edb812 100644
--- a/datafusion/spark/src/function/bitmap/bitmap_count.rs
+++ b/datafusion/spark/src/function/bitmap/bitmap_count.rs
@@ -19,13 +19,13 @@ use std::any::Any;
 use std::sync::Arc;
 
 use arrow::array::{
-    Array, ArrayRef, BinaryArray, BinaryViewArray, FixedSizeBinaryArray, Int64Array,
-    LargeBinaryArray,
+    as_dictionary_array, Array, ArrayRef, BinaryArray, BinaryViewArray,
+    FixedSizeBinaryArray, Int64Array, LargeBinaryArray,
 };
-use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::{
-    Binary, BinaryView, FixedSizeBinary, Int64, LargeBinary,
+    Binary, BinaryView, Dictionary, FixedSizeBinary, LargeBinary,
 };
+use arrow::datatypes::{DataType, Int16Type, Int32Type, Int64Type, Int8Type};
 use datafusion_common::utils::take_function_args;
 use datafusion_common::{internal_err, Result};
 use datafusion_expr::{
@@ -71,7 +71,7 @@ impl ScalarUDFImpl for BitmapCount {
     }
 
     fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        Ok(Int64)
+        Ok(DataType::Int64)
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
@@ -90,6 +90,17 @@ macro_rules! downcast_and_count_ones {
     }};
 }
 
+macro_rules! downcast_dict_and_count_ones {
+    ($input_dict:expr, $key_array_type:ident) => {{
+        let dict_array = as_dictionary_array::<$key_array_type>($input_dict);
+        let array = dict_array.downcast_dict::<BinaryArray>().unwrap();
+        Ok(array
+            .into_iter()
+            .map(binary_count_ones)
+            .collect::<Int64Array>())
+    }};
+}
+
 pub fn bitmap_count_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
     let [input_array] = take_function_args("bitmap_count", arg)?;
 
@@ -100,6 +111,17 @@ pub fn bitmap_count_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
         FixedSizeBinary(_size) => {
             downcast_and_count_ones!(input_array, FixedSizeBinaryArray)
         }
+        Dictionary(k, v) if v.as_ref() == &Binary => match k.as_ref() {
+            DataType::Int8 => downcast_dict_and_count_ones!(input_array, Int8Type),
+            DataType::Int16 => downcast_dict_and_count_ones!(input_array, Int16Type),
+            DataType::Int32 => downcast_dict_and_count_ones!(input_array, Int32Type),
+            DataType::Int64 => downcast_dict_and_count_ones!(input_array, Int64Type),
+            data_type => {
+                internal_err!(
+                    "bitmap_count does not support Dictionary({data_type}, Binary)"
+                )
+            }
+        },
         data_type => {
             internal_err!("bitmap_count does not support {data_type}")
         }
@@ -114,8 +136,12 @@ mod tests {
     use crate::function::utils::test::test_scalar_function;
     use arrow::array::{Array, Int64Array};
     use arrow::datatypes::DataType::Int64;
+    use arrow::datatypes::{DataType, Field};
+    use datafusion_common::config::ConfigOptions;
     use datafusion_common::{Result, ScalarValue};
-    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
+    use datafusion_expr::ColumnarValue::Scalar;
+    use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl};
+    use std::sync::Arc;
 
     macro_rules! test_bitmap_count_binary_invoke {
         ($INPUT:expr, $EXPECTED:expr) => {
@@ -171,4 +197,31 @@ mod tests {
         );
         Ok(())
     }
+
+    #[test]
+    fn test_dictionary_encoded_bitmap_count_invoke() -> Result<()> {
+        let dict = Scalar(ScalarValue::Dictionary(
+            Box::new(DataType::Int32),
+            Box::new(ScalarValue::Binary(Some(vec![0xFFu8, 0xFFu8]))),
+        ));
+
+        let arg_fields = vec![Field::new(
+            "a",
+            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Binary)),
+            true,
+        )
+        .into()];
+        let args = ScalarFunctionArgs {
+            args: vec![dict.clone()],
+            arg_fields,
+            number_rows: 1,
+            return_field: Field::new("f", Int64, true).into(),
+            config_options: Arc::new(ConfigOptions::default()),
+        };
+        let udf = BitmapCount::new();
+        let actual = udf.invoke_with_args(args)?;
+        let expect = Scalar(ScalarValue::Int64(Some(16)));
+        assert_eq!(*actual.into_array(1)?, *expect.into_array(1)?);
+        Ok(())
+    }
 }
diff --git a/datafusion/spark/src/function/string/concat.rs b/datafusion/spark/src/function/string/concat.rs
new file mode 100644
index 000000000000..0dcc58d5bb8e
--- /dev/null
+++ b/datafusion/spark/src/function/string/concat.rs
@@ -0,0 +1,269 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::Array;
+use arrow::buffer::NullBuffer;
+use arrow::datatypes::DataType;
+use datafusion_common::{Result, ScalarValue};
+use datafusion_expr::{
+    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature,
+    Volatility,
+};
+use datafusion_functions::string::concat::ConcatFunc;
+use std::any::Any;
+use std::sync::Arc;
+
+/// Spark-compatible `concat` expression
+/// <https://spark.apache.org/docs/latest/api/sql/index.html#concat>
+///
+/// Concatenates multiple input strings into a single string.
+/// Returns NULL if any input is NULL.
+///
+/// Differences with DataFusion concat:
+/// - Support 0 arguments
+/// - Return NULL if any input is NULL
+#[derive(Debug, PartialEq, Eq, Hash)]
+pub struct SparkConcat {
+    signature: Signature,
+}
+
+impl Default for SparkConcat {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl SparkConcat {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::one_of(
+                vec![TypeSignature::UserDefined, TypeSignature::Nullary],
+                Volatility::Immutable,
+            ),
+        }
+    }
+}
+
+impl ScalarUDFImpl for SparkConcat {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "concat"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(DataType::Utf8)
+    }
+
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        spark_concat(args)
+    }
+
+    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        // Accept any string types, including zero arguments
+        Ok(arg_types.to_vec())
+    }
+}
+
+/// Represents the null state for Spark concat
+enum NullMaskResolution {
+    /// Return NULL as the result (e.g., scalar inputs with at least one NULL)
+    ReturnNull,
+    /// No null mask needed (e.g., all scalar inputs are non-NULL)
+    NoMask,
+    /// Null mask to apply for arrays
+    Apply(NullBuffer),
+}
+
+/// Concatenates strings, returning NULL if any input is NULL
+/// This is a Spark-specific wrapper around DataFusion's concat that returns NULL
+/// if any argument is NULL (Spark behavior), whereas DataFusion's concat ignores NULLs.
+fn spark_concat(args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+    let ScalarFunctionArgs {
+        args: arg_values,
+        arg_fields,
+        number_rows,
+        return_field,
+        config_options,
+    } = args;
+
+    // Handle zero-argument case: return empty string
+    if arg_values.is_empty() {
+        return Ok(ColumnarValue::Scalar(ScalarValue::Utf8(
+            Some(String::new()),
+        )));
+    }
+
+    // Step 1: Check for NULL mask in incoming args
+    let null_mask = compute_null_mask(&arg_values, number_rows)?;
+
+    // If all scalars and any is NULL, return NULL immediately
+    if matches!(null_mask, NullMaskResolution::ReturnNull) {
+        return Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)));
+    }
+
+    // Step 2: Delegate to DataFusion's concat
+    let concat_func = ConcatFunc::new();
+    let func_args = ScalarFunctionArgs {
+        args: arg_values,
+        arg_fields,
+        number_rows,
+        return_field,
+        config_options,
+    };
+    let result = concat_func.invoke_with_args(func_args)?;
+
+    // Step 3: Apply NULL mask to result
+    apply_null_mask(result, null_mask)
+}
+
+/// Compute NULL mask for the arguments using NullBuffer::union
+fn compute_null_mask(
+    args: &[ColumnarValue],
+    number_rows: usize,
+) -> Result<NullMaskResolution> {
+    // Check if all arguments are scalars
+    let all_scalars = args
+        .iter()
+        .all(|arg| matches!(arg, ColumnarValue::Scalar(_)));
+
+    if all_scalars {
+        // For scalars, check if any is NULL
+        for arg in args {
+            if let ColumnarValue::Scalar(scalar) = arg {
+                if scalar.is_null() {
+                    return Ok(NullMaskResolution::ReturnNull);
+                }
+            }
+        }
+        // No NULLs in scalars
+        Ok(NullMaskResolution::NoMask)
+    } else {
+        // For arrays, compute NULL mask for each row using NullBuffer::union
+        let array_len = args
+            .iter()
+            .find_map(|arg| match arg {
+                ColumnarValue::Array(array) => Some(array.len()),
+                _ => None,
+            })
+            .unwrap_or(number_rows);
+
+        // Convert all scalars to arrays for uniform processing
+        let arrays: Result<Vec<_>> = args
+            .iter()
+            .map(|arg| match arg {
+                ColumnarValue::Array(array) => Ok(Arc::clone(array)),
+                ColumnarValue::Scalar(scalar) => scalar.to_array_of_size(array_len),
+            })
+            .collect();
+        let arrays = arrays?;
+
+        // Use NullBuffer::union to combine all null buffers
+        let combined_nulls = arrays
+            .iter()
+            .map(|arr| arr.nulls())
+            .fold(None, |acc, nulls| NullBuffer::union(acc.as_ref(), nulls));
+
+        match combined_nulls {
+            Some(nulls) => Ok(NullMaskResolution::Apply(nulls)),
+            None => Ok(NullMaskResolution::NoMask),
+        }
+    }
+}
+
+/// Apply NULL mask to the result using NullBuffer::union
+fn apply_null_mask(
+    result: ColumnarValue,
+    null_mask: NullMaskResolution,
+) -> Result<ColumnarValue> {
+    match (result, null_mask) {
+        // Scalar with ReturnNull mask means return NULL
+        (ColumnarValue::Scalar(_), NullMaskResolution::ReturnNull) => {
+            Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)))
+        }
+        // Scalar without mask, return as-is
+        (scalar @ ColumnarValue::Scalar(_), NullMaskResolution::NoMask) => Ok(scalar),
+        // Array with NULL mask - use NullBuffer::union to combine nulls
+        (ColumnarValue::Array(array), NullMaskResolution::Apply(null_mask)) => {
+            // Combine the result's existing nulls with our computed null mask
+            let combined_nulls = NullBuffer::union(array.nulls(), Some(&null_mask));
+
+            // Create new array with combined nulls
+            let new_array = array
+                .into_data()
+                .into_builder()
+                .nulls(combined_nulls)
+                .build()?;
+
+            Ok(ColumnarValue::Array(Arc::new(arrow::array::make_array(
+                new_array,
+            ))))
+        }
+        // Array without NULL mask, return as-is
+        (array @ ColumnarValue::Array(_), NullMaskResolution::NoMask) => Ok(array),
+        // Edge cases that shouldn't happen in practice
+        (scalar, _) => Ok(scalar),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::function::utils::test::test_scalar_function;
+    use arrow::array::StringArray;
+    use arrow::datatypes::DataType;
+    use datafusion_common::Result;
+
+    #[test]
+    fn test_concat_basic() -> Result<()> {
+        test_scalar_function!(
+            SparkConcat::new(),
+            vec![
+                ColumnarValue::Scalar(ScalarValue::Utf8(Some("Spark".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Utf8(Some("SQL".to_string()))),
+            ],
+            Ok(Some("SparkSQL")),
+            &str,
+            DataType::Utf8,
+            StringArray
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_concat_with_null() -> Result<()> {
+        test_scalar_function!(
+            SparkConcat::new(),
+            vec![
+                ColumnarValue::Scalar(ScalarValue::Utf8(Some("Spark".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Utf8(Some("SQL".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Utf8(None)),
+            ],
+            Ok(None),
+            &str,
+            DataType::Utf8,
+            StringArray
+        );
+        Ok(())
+    }
+}
diff --git a/datafusion/spark/src/function/string/mod.rs b/datafusion/spark/src/function/string/mod.rs
index 3115c1e960fa..480984f02159 100644
--- a/datafusion/spark/src/function/string/mod.rs
+++ b/datafusion/spark/src/function/string/mod.rs
@@ -17,6 +17,7 @@
 
 pub mod ascii;
 pub mod char;
+pub mod concat;
 pub mod elt;
 pub mod format_string;
 pub mod ilike;
@@ -30,6 +31,7 @@ use std::sync::Arc;
 
 make_udf_function!(ascii::SparkAscii, ascii);
 make_udf_function!(char::CharFunc, char);
+make_udf_function!(concat::SparkConcat, concat);
 make_udf_function!(ilike::SparkILike, ilike);
 make_udf_function!(length::SparkLengthFunc, length);
 make_udf_function!(elt::SparkElt, elt);
@@ -50,6 +52,11 @@ pub mod expr_fn {
         "Returns the ASCII character having the binary equivalent to col. If col is larger than 256 the result is equivalent to char(col % 256).",
         arg1
     ));
+    export_functions!((
+        concat,
+        "Concatenates multiple input strings into a single string. Returns NULL if any input is NULL.",
+        args
+    ));
     export_functions!((
         elt,
         "Returns the n-th input (1-indexed), e.g. returns 2nd input when n is 2. The function returns NULL if the index is 0 or exceeds the length of the array.",
@@ -86,6 +93,7 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
     vec![
         ascii(),
         char(),
+        concat(),
         elt(),
         ilike(),
         length(),
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index 23426701409e..715a02db8b02 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -20,9 +20,10 @@ use datafusion_expr::planner::{
     PlannerResult, RawBinaryExpr, RawDictionaryExpr, RawFieldAccessExpr,
 };
 use sqlparser::ast::{
-    AccessExpr, BinaryOperator, CastFormat, CastKind, DataType as SQLDataType,
-    DictionaryField, Expr as SQLExpr, ExprWithAlias as SQLExprWithAlias, MapEntry,
-    StructField, Subscript, TrimWhereField, TypedString, Value, ValueWithSpan,
+    AccessExpr, BinaryOperator, CastFormat, CastKind, CeilFloorKind,
+    DataType as SQLDataType, DateTimeField, DictionaryField, Expr as SQLExpr,
+    ExprWithAlias as SQLExprWithAlias, MapEntry, StructField, Subscript, TrimWhereField,
+    TypedString, Value, ValueWithSpan,
 };
 
 use datafusion_common::{
@@ -287,7 +288,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                         schema,
                         planner_context,
                     )?),
-                    self.convert_data_type(&data_type)?,
+                    self.convert_data_type_to_field(&data_type)?
+                        .data_type()
+                        .clone(),
                 )))
             }
 
@@ -297,7 +300,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 uses_odbc_syntax: _,
             }) => Ok(Expr::Cast(Cast::new(
                 Box::new(lit(value.into_string().unwrap())),
-                self.convert_data_type(&data_type)?,
+                self.convert_data_type_to_field(&data_type)?
+                    .data_type()
+                    .clone(),
             ))),
 
             SQLExpr::IsNull(expr) => Ok(Expr::IsNull(Box::new(
@@ -494,14 +499,28 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 self.sql_grouping_sets_to_expr(exprs, schema, planner_context)
             }
 
-            SQLExpr::Floor {
-                expr,
-                field: _field,
-            } => self.sql_fn_name_to_expr(*expr, "floor", schema, planner_context),
-            SQLExpr::Ceil {
-                expr,
-                field: _field,
-            } => self.sql_fn_name_to_expr(*expr, "ceil", schema, planner_context),
+            SQLExpr::Floor { expr, field } => match field {
+                CeilFloorKind::DateTimeField(DateTimeField::NoDateTime) => {
+                    self.sql_fn_name_to_expr(*expr, "floor", schema, planner_context)
+                }
+                CeilFloorKind::DateTimeField(_) => {
+                    not_impl_err!("FLOOR with datetime is not supported")
+                }
+                CeilFloorKind::Scale(_) => {
+                    not_impl_err!("FLOOR with scale is not supported")
+                }
+            },
+            SQLExpr::Ceil { expr, field } => match field {
+                CeilFloorKind::DateTimeField(DateTimeField::NoDateTime) => {
+                    self.sql_fn_name_to_expr(*expr, "ceil", schema, planner_context)
+                }
+                CeilFloorKind::DateTimeField(_) => {
+                    not_impl_err!("CEIL with datetime is not supported")
+                }
+                CeilFloorKind::Scale(_) => {
+                    not_impl_err!("CEIL with scale is not supported")
+                }
+            },
             SQLExpr::Overlay {
                 expr,
                 overlay_what,
@@ -969,12 +988,12 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             return not_impl_err!("CAST with format is not supported: {format}");
         }
 
-        let dt = self.convert_data_type(&data_type)?;
+        let dt = self.convert_data_type_to_field(&data_type)?;
         let expr = self.sql_expr_to_logical_expr(expr, schema, planner_context)?;
 
         // numeric constants are treated as seconds (rather as nanoseconds)
         // to align with postgres / duckdb semantics
-        let expr = match &dt {
+        let expr = match dt.data_type() {
             DataType::Timestamp(TimeUnit::Nanosecond, tz)
                 if expr.get_type(schema)? == DataType::Int64 =>
             {
@@ -986,7 +1005,12 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             _ => expr,
         };
 
-        Ok(Expr::Cast(Cast::new(Box::new(expr), dt)))
+        // Currently drops metadata attached to the type
+        // https://github.com/apache/datafusion/issues/18060
+        Ok(Expr::Cast(Cast::new(
+            Box::new(expr),
+            dt.data_type().clone(),
+        )))
     }
 
     /// Extracts the root expression and access chain from a compound expression.
diff --git a/datafusion/sql/src/expr/value.rs b/datafusion/sql/src/expr/value.rs
index 7075a1afd9dd..3abb2752988f 100644
--- a/datafusion/sql/src/expr/value.rs
+++ b/datafusion/sql/src/expr/value.rs
@@ -20,7 +20,7 @@ use arrow::compute::kernels::cast_utils::{
     parse_interval_month_day_nano_config, IntervalParseConfig, IntervalUnit,
 };
 use arrow::datatypes::{
-    i256, DataType, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION,
+    i256, FieldRef, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION,
 };
 use bigdecimal::num_bigint::BigInt;
 use bigdecimal::{BigDecimal, Signed, ToPrimitive};
@@ -45,7 +45,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
     pub(crate) fn parse_value(
         &self,
         value: Value,
-        param_data_types: &[DataType],
+        param_data_types: &[FieldRef],
     ) -> Result<Expr> {
         match value {
             Value::Number(n, _) => self.parse_sql_number(&n, false),
@@ -108,7 +108,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
     /// number 1, 2, ... etc. For example, `$1` is the first placeholder; $2 is the second one and so on.
     fn create_placeholder_expr(
         param: String,
-        param_data_types: &[DataType],
+        param_data_types: &[FieldRef],
     ) -> Result<Expr> {
         // Parse the placeholder as a number because it is the only support from sqlparser and postgres
         let index = param[1..].parse::<usize>();
@@ -121,7 +121,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             Ok(index) => index - 1,
             Err(_) => {
                 return if param_data_types.is_empty() {
-                    Ok(Expr::Placeholder(Placeholder::new(param, None)))
+                    Ok(Expr::Placeholder(Placeholder::new_with_field(param, None)))
                 } else {
                     // when PREPARE Statement, param_data_types length is always 0
                     plan_err!("Invalid placeholder, not a number: {param}")
@@ -133,7 +133,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         // Data type of the parameter
         debug!("type of param {param} param_data_types[idx]: {param_type:?}");
 
-        Ok(Expr::Placeholder(Placeholder::new(
+        Ok(Expr::Placeholder(Placeholder::new_with_field(
             param,
             param_type.cloned(),
         )))
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index e93c5e066b66..99138e1b0016 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -21,8 +21,10 @@ use std::str::FromStr;
 use std::sync::Arc;
 use std::vec;
 
+use crate::utils::make_decimal_type;
 use arrow::datatypes::*;
 use datafusion_common::config::SqlParserOptions;
+use datafusion_common::datatype::{DataTypeExt, FieldExt};
 use datafusion_common::error::add_possible_columns_to_diag;
 use datafusion_common::TableReference;
 use datafusion_common::{
@@ -31,15 +33,13 @@ use datafusion_common::{
 };
 use datafusion_common::{not_impl_err, plan_err, DFSchema, DataFusionError, Result};
 use datafusion_expr::logical_plan::{LogicalPlan, LogicalPlanBuilder};
+pub use datafusion_expr::planner::ContextProvider;
 use datafusion_expr::utils::find_column_exprs;
 use datafusion_expr::{col, Expr};
 use sqlparser::ast::{ArrayElemTypeDef, ExactNumberInfo, TimezoneInfo};
 use sqlparser::ast::{ColumnDef as SQLColumnDef, ColumnOption};
 use sqlparser::ast::{DataType as SQLDataType, Ident, ObjectName, TableAlias};
 
-use crate::utils::make_decimal_type;
-pub use datafusion_expr::planner::ContextProvider;
-
 /// SQL parser options
 #[derive(Debug, Clone, Copy)]
 pub struct ParserOptions {
@@ -256,7 +256,7 @@ impl IdentNormalizer {
 pub struct PlannerContext {
     /// Data types for numbered parameters ($1, $2, etc), if supplied
     /// in `PREPARE` statement
-    prepare_param_data_types: Arc<Vec<DataType>>,
+    prepare_param_data_types: Arc<Vec<FieldRef>>,
     /// Map of CTE name to logical plan of the WITH clause.
     /// Use `Arc<LogicalPlan>` to allow cheap cloning
     ctes: HashMap<String, Arc<LogicalPlan>>,
@@ -290,7 +290,7 @@ impl PlannerContext {
     /// Update the PlannerContext with provided prepare_param_data_types
     pub fn with_prepare_param_data_types(
         mut self,
-        prepare_param_data_types: Vec<DataType>,
+        prepare_param_data_types: Vec<FieldRef>,
     ) -> Self {
         self.prepare_param_data_types = prepare_param_data_types.into();
         self
@@ -347,7 +347,7 @@ impl PlannerContext {
     }
 
     /// Return the types of parameters (`$1`, `$2`, etc) if known
-    pub fn prepare_param_data_types(&self) -> &[DataType] {
+    pub fn prepare_param_data_types(&self) -> &[FieldRef] {
         &self.prepare_param_data_types
     }
 
@@ -428,16 +428,18 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         let mut fields = Vec::with_capacity(columns.len());
 
         for column in columns {
-            let data_type = self.convert_data_type(&column.data_type)?;
+            let data_type = self.convert_data_type_to_field(&column.data_type)?;
             let not_nullable = column
                 .options
                 .iter()
                 .any(|x| x.option == ColumnOption::NotNull);
-            fields.push(Field::new(
-                self.ident_normalizer.normalize(column.name),
-                data_type,
-                !not_nullable,
-            ));
+            fields.push(
+                data_type
+                    .as_ref()
+                    .clone()
+                    .with_name(self.ident_normalizer.normalize(column.name))
+                    .with_nullable(!not_nullable),
+            );
         }
 
         Ok(Schema::new(fields))
@@ -587,11 +589,14 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             })
     }
 
-    pub(crate) fn convert_data_type(&self, sql_type: &SQLDataType) -> Result<DataType> {
+    pub(crate) fn convert_data_type_to_field(
+        &self,
+        sql_type: &SQLDataType,
+    ) -> Result<FieldRef> {
         // First check if any of the registered type_planner can handle this type
         if let Some(type_planner) = self.context_provider.get_type_planner() {
             if let Some(data_type) = type_planner.plan_type(sql_type)? {
-                return Ok(data_type);
+                return Ok(data_type.into_nullable_field_ref());
             }
         }
 
@@ -599,28 +604,30 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         match sql_type {
             SQLDataType::Array(ArrayElemTypeDef::AngleBracket(inner_sql_type)) => {
                 // Arrays may be multi-dimensional.
-                let inner_data_type = self.convert_data_type(inner_sql_type)?;
-                Ok(DataType::new_list(inner_data_type, true))
+                Ok(self.convert_data_type_to_field(inner_sql_type)?.into_list())
             }
             SQLDataType::Array(ArrayElemTypeDef::SquareBracket(
                 inner_sql_type,
                 maybe_array_size,
             )) => {
-                let inner_data_type = self.convert_data_type(inner_sql_type)?;
+                let inner_field = self.convert_data_type_to_field(inner_sql_type)?;
                 if let Some(array_size) = maybe_array_size {
-                    Ok(DataType::new_fixed_size_list(
-                        inner_data_type,
-                        *array_size as i32,
-                        true,
-                    ))
+                    let array_size: i32 = (*array_size).try_into().map_err(|_| {
+                        plan_datafusion_err!(
+                            "Array size must be a positive 32 bit integer, got {array_size}"
+                        )
+                    })?;
+                    Ok(inner_field.into_fixed_size_list(array_size))
                 } else {
-                    Ok(DataType::new_list(inner_data_type, true))
+                    Ok(inner_field.into_list())
                 }
             }
             SQLDataType::Array(ArrayElemTypeDef::None) => {
                 not_impl_err!("Arrays with unspecified type is not supported")
             }
-            other => self.convert_simple_data_type(other),
+            other => Ok(self
+                .convert_simple_data_type(other)?
+                .into_nullable_field_ref()),
         }
     }
 
@@ -733,17 +740,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 let fields = fields
                     .iter()
                     .enumerate()
-                    .map(|(idx, field)| {
-                        let data_type = self.convert_data_type(&field.field_type)?;
-                        let field_name = match &field.field_name {
+                    .map(|(idx, sql_struct_field)| {
+                        let field = self.convert_data_type_to_field(&sql_struct_field.field_type)?;
+                        let field_name = match &sql_struct_field.field_name {
                             Some(ident) => ident.clone(),
                             None => Ident::new(format!("c{idx}")),
                         };
-                        Ok(Arc::new(Field::new(
-                            self.ident_normalizer.normalize(field_name),
-                            data_type,
-                            true,
-                        )))
+                        Ok(field.as_ref().clone().with_name(self.ident_normalizer.normalize(field_name)))
                     })
                     .collect::<Result<Vec<_>>>()?;
                 Ok(DataType::Struct(Fields::from(fields)))
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 0e868e8c2689..81381bf49fc5 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -29,7 +29,7 @@ use crate::planner::{
 };
 use crate::utils::normalize_ident;
 
-use arrow::datatypes::{DataType, Fields};
+use arrow::datatypes::{Field, FieldRef, Fields};
 use datafusion_common::error::_plan_err;
 use datafusion_common::parsers::CompressionTypeVariant;
 use datafusion_common::{
@@ -242,6 +242,16 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 table_name,
                 ..
             } => self.describe_table_to_plan(table_name),
+            Statement::Explain {
+                describe_alias: DescribeAlias::Describe | DescribeAlias::Desc, // only parse 'DESCRIBE statement' or 'DESC statement' and not 'EXPLAIN statement'
+                statement,
+                ..
+            } => match *statement {
+                Statement::Query(query) => self.describe_query_to_plan(*query),
+                _ => {
+                    not_impl_err!("Describing statements other than SELECT not supported")
+                }
+            },
             Statement::Explain {
                 verbose,
                 statement,
@@ -730,14 +740,14 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 statement,
             } => {
                 // Convert parser data types to DataFusion data types
-                let mut data_types: Vec<DataType> = data_types
+                let mut fields: Vec<FieldRef> = data_types
                     .into_iter()
-                    .map(|t| self.convert_data_type(&t))
+                    .map(|t| self.convert_data_type_to_field(&t))
                     .collect::<Result<_>>()?;
 
                 // Create planner context with parameters
-                let mut planner_context = PlannerContext::new()
-                    .with_prepare_param_data_types(data_types.clone());
+                let mut planner_context =
+                    PlannerContext::new().with_prepare_param_data_types(fields.clone());
 
                 // Build logical plan for inner statement of the prepare statement
                 let plan = self.sql_statement_to_plan_with_context_impl(
@@ -745,21 +755,21 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                     &mut planner_context,
                 )?;
 
-                if data_types.is_empty() {
-                    let map_types = plan.get_parameter_types()?;
+                if fields.is_empty() {
+                    let map_types = plan.get_parameter_fields()?;
                     let param_types: Vec<_> = (1..=map_types.len())
                         .filter_map(|i| {
                             let key = format!("${i}");
                             map_types.get(&key).and_then(|opt| opt.clone())
                         })
                         .collect();
-                    data_types.extend(param_types.iter().cloned());
+                    fields.extend(param_types.iter().cloned());
                     planner_context.with_prepare_param_data_types(param_types);
                 }
 
                 Ok(LogicalPlan::Statement(PlanStatement::Prepare(Prepare {
                     name: ident_to_string(&name),
-                    data_types,
+                    fields,
                     input: Arc::new(plan),
                 })))
             }
@@ -1179,7 +1189,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 ..
             }) => {
                 let return_type = match return_type {
-                    Some(t) => Some(self.convert_data_type(&t)?),
+                    Some(t) => Some(self.convert_data_type_to_field(&t)?),
                     None => None,
                 };
                 let mut planner_context = PlannerContext::new();
@@ -1190,7 +1200,8 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                         let function_args = function_args
                             .into_iter()
                             .map(|arg| {
-                                let data_type = self.convert_data_type(&arg.data_type)?;
+                                let data_type =
+                                    self.convert_data_type_to_field(&arg.data_type)?;
 
                                 let default_expr = match arg.default_expr {
                                     Some(expr) => Some(self.sql_to_expr(
@@ -1203,7 +1214,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                                 Ok(OperateFunctionArg {
                                     name: arg.name,
                                     default_expr,
-                                    data_type,
+                                    data_type: data_type.data_type().clone(),
                                 })
                             })
                             .collect::<Result<Vec<OperateFunctionArg>>>();
@@ -1221,7 +1232,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 // Convert resulting expression to data fusion expression
                 //
                 let arg_types = args.as_ref().map(|arg| {
-                    arg.iter().map(|t| t.data_type.clone()).collect::<Vec<_>>()
+                    arg.iter()
+                        .map(|t| Arc::new(Field::new("", t.data_type.clone(), true)))
+                        .collect::<Vec<_>>()
                 });
                 let mut planner_context = PlannerContext::new()
                     .with_prepare_param_data_types(arg_types.unwrap_or_default());
@@ -1264,7 +1277,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                     or_replace,
                     temporary,
                     name,
-                    return_type,
+                    return_type: return_type.map(|f| f.data_type().clone()),
                     args,
                     params,
                     schema: DFSchemaRef::new(DFSchema::empty()),
@@ -1396,6 +1409,19 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         }))
     }
 
+    fn describe_query_to_plan(&self, query: Query) -> Result<LogicalPlan> {
+        let plan = self.query_to_plan(query, &mut PlannerContext::new())?;
+
+        let schema = Arc::new(plan.schema().as_arrow().clone());
+
+        let output_schema = DFSchema::try_from(LogicalPlan::describe_schema()).unwrap();
+
+        Ok(LogicalPlan::DescribeTable(DescribeTable {
+            schema,
+            output_schema: Arc::new(output_schema),
+        }))
+    }
+
     fn copy_to_plan(&self, statement: CopyToStatement) -> Result<LogicalPlan> {
         // Determine if source is table or query and handle accordingly
         let copy_source = statement.source;
@@ -1998,10 +2024,10 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                         )?;
                         // Update placeholder's datatype to the type of the target column
                         if let Expr::Placeholder(placeholder) = &mut expr {
-                            placeholder.data_type = placeholder
-                                .data_type
+                            placeholder.field = placeholder
+                                .field
                                 .take()
-                                .or_else(|| Some(field.data_type().clone()));
+                                .or_else(|| Some(Arc::clone(field)));
                         }
                         // Cast to target column type, if necessary
                         expr.cast_to(field.data_type(), source.schema())?
@@ -2105,8 +2131,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                                 idx + 1
                             )
                         })?;
-                        let dt = field.data_type().clone();
-                        let _ = prepare_param_data_types.insert(name, dt);
+                        let _ = prepare_param_data_types.insert(name, Arc::clone(field));
                     }
                 }
             }
diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs
index 647ad680674b..834b0a97a47b 100644
--- a/datafusion/sql/src/unparser/dialect.rs
+++ b/datafusion/sql/src/unparser/dialect.rs
@@ -207,6 +207,13 @@ pub trait Dialect: Send + Sync {
         Ok(None)
     }
 
+    /// Allows the dialect to support the QUALIFY clause
+    ///
+    /// Some dialects, like Postgres, do not support the QUALIFY clause
+    fn supports_qualify(&self) -> bool {
+        true
+    }
+
     /// Allows the dialect to override logic of formatting datetime with tz into string.
     fn timestamp_with_tz_to_string(&self, dt: DateTime<Tz>, _unit: TimeUnit) -> String {
         dt.to_string()
@@ -274,6 +281,14 @@ impl Dialect for DefaultDialect {
 pub struct PostgreSqlDialect {}
 
 impl Dialect for PostgreSqlDialect {
+    fn supports_qualify(&self) -> bool {
+        false
+    }
+
+    fn requires_derived_table_alias(&self) -> bool {
+        true
+    }
+
     fn identifier_quote_style(&self, _: &str) -> Option<char> {
         Some('"')
     }
@@ -424,6 +439,10 @@ impl Dialect for DuckDBDialect {
 pub struct MySqlDialect {}
 
 impl Dialect for MySqlDialect {
+    fn supports_qualify(&self) -> bool {
+        false
+    }
+
     fn identifier_quote_style(&self, _: &str) -> Option<char> {
         Some('`')
     }
@@ -485,6 +504,10 @@ impl Dialect for MySqlDialect {
 pub struct SqliteDialect {}
 
 impl Dialect for SqliteDialect {
+    fn supports_qualify(&self) -> bool {
+        false
+    }
+
     fn identifier_quote_style(&self, _: &str) -> Option<char> {
         Some('`')
     }
diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
index b6c65614995a..e7535338b767 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -32,11 +32,11 @@ use super::{
     },
     Unparser,
 };
-use crate::unparser::ast::UnnestRelationBuilder;
 use crate::unparser::extension_unparser::{
     UnparseToStatementResult, UnparseWithinStatementResult,
 };
 use crate::unparser::utils::{find_unnest_node_until_relation, unproject_agg_exprs};
+use crate::unparser::{ast::UnnestRelationBuilder, rewrite::rewrite_qualify};
 use crate::utils::UNNEST_PLACEHOLDER;
 use datafusion_common::{
     internal_err, not_impl_err,
@@ -95,7 +95,10 @@ pub fn plan_to_sql(plan: &LogicalPlan) -> Result<ast::Statement> {
 
 impl Unparser<'_> {
     pub fn plan_to_sql(&self, plan: &LogicalPlan) -> Result<ast::Statement> {
-        let plan = normalize_union_schema(plan)?;
+        let mut plan = normalize_union_schema(plan)?;
+        if !self.dialect.supports_qualify() {
+            plan = rewrite_qualify(plan)?;
+        }
 
         match plan {
             LogicalPlan::Projection(_)
@@ -428,6 +431,18 @@ impl Unparser<'_> {
                         unproject_agg_exprs(filter.predicate.clone(), agg, None)?;
                     let filter_expr = self.expr_to_sql(&unprojected)?;
                     select.having(Some(filter_expr));
+                } else if let (Some(window), true) = (
+                    find_window_nodes_within_select(
+                        plan,
+                        None,
+                        select.already_projected(),
+                    ),
+                    self.dialect.supports_qualify(),
+                ) {
+                    let unprojected =
+                        unproject_window_exprs(filter.predicate.clone(), &window)?;
+                    let filter_expr = self.expr_to_sql(&unprojected)?;
+                    select.qualify(Some(filter_expr));
                 } else {
                     let filter_expr = self.expr_to_sql(&filter.predicate)?;
                     select.selection(Some(filter_expr));
diff --git a/datafusion/sql/src/unparser/rewrite.rs b/datafusion/sql/src/unparser/rewrite.rs
index aa480cf4fff9..c961f1d6f1f0 100644
--- a/datafusion/sql/src/unparser/rewrite.rs
+++ b/datafusion/sql/src/unparser/rewrite.rs
@@ -100,6 +100,72 @@ fn rewrite_sort_expr_for_union(exprs: Vec<SortExpr>) -> Result<Vec<SortExpr>> {
     Ok(sort_exprs)
 }
 
+/// Rewrite Filter plans that have a Window as their input by inserting a SubqueryAlias.
+///
+/// When a Filter directly operates on a Window plan, it can cause issues during SQL unparsing
+/// because window functions in a WHERE clause are not valid SQL. The solution is to wrap
+/// the Window plan in a SubqueryAlias, effectively creating a derived table.
+///
+/// Example transformation:
+///
+/// Filter: condition
+///   Window: window_function
+///     TableScan: table
+///
+/// becomes:
+///
+/// Filter: condition
+///   SubqueryAlias: __qualify_subquery
+///     Projection: table.column1, table.column2
+///       Window: window_function
+///         TableScan: table
+///
+pub(super) fn rewrite_qualify(plan: LogicalPlan) -> Result<LogicalPlan> {
+    let transformed_plan = plan.transform_up(|plan| match plan {
+        // Check if the filter's input is a Window plan
+        LogicalPlan::Filter(mut filter) => {
+            if matches!(&*filter.input, LogicalPlan::Window(_)) {
+                // Create a SubqueryAlias around the Window plan
+                let qualifier = filter
+                    .input
+                    .schema()
+                    .iter()
+                    .find_map(|(q, _)| q)
+                    .map(|q| q.to_string())
+                    .unwrap_or_else(|| "__qualify_subquery".to_string());
+
+                // for Postgres, name of column for 'rank() over (...)' is 'rank'
+                // but in Datafusion, it is 'rank() over (...)'
+                // without projection, it's still an invalid sql in Postgres
+
+                let project_exprs = filter
+                    .input
+                    .schema()
+                    .iter()
+                    .map(|(_, f)| datafusion_expr::col(f.name()).alias(f.name()))
+                    .collect::<Vec<_>>();
+
+                let input =
+                    datafusion_expr::LogicalPlanBuilder::from(Arc::clone(&filter.input))
+                        .project(project_exprs)?
+                        .build()?;
+
+                let subquery_alias =
+                    datafusion_expr::SubqueryAlias::try_new(Arc::new(input), qualifier)?;
+
+                filter.input = Arc::new(LogicalPlan::SubqueryAlias(subquery_alias));
+                Ok(Transformed::yes(LogicalPlan::Filter(filter)))
+            } else {
+                Ok(Transformed::no(LogicalPlan::Filter(filter)))
+            }
+        }
+
+        _ => Ok(Transformed::no(plan)),
+    });
+
+    transformed_plan.data()
+}
+
 /// Rewrite logic plan for query that order by columns are not in projections
 /// Plan before rewrite:
 ///
diff --git a/datafusion/sql/tests/cases/params.rs b/datafusion/sql/tests/cases/params.rs
index 343a90af3efb..147628656d8f 100644
--- a/datafusion/sql/tests/cases/params.rs
+++ b/datafusion/sql/tests/cases/params.rs
@@ -16,8 +16,12 @@
 // under the License.
 
 use crate::logical_plan;
-use arrow::datatypes::DataType;
-use datafusion_common::{assert_contains, ParamValues, ScalarValue};
+use arrow::datatypes::{DataType, Field, FieldRef};
+use datafusion_common::{
+    assert_contains,
+    metadata::{format_type_and_metadata, ScalarAndMetadata},
+    ParamValues, ScalarValue,
+};
 use datafusion_expr::{LogicalPlan, Prepare, Statement};
 use insta::assert_snapshot;
 use itertools::Itertools as _;
@@ -51,12 +55,42 @@ impl ParameterTest<'_> {
     }
 }
 
+pub struct ParameterTestWithMetadata<'a> {
+    pub sql: &'a str,
+    pub expected_types: Vec<(&'a str, Option<FieldRef>)>,
+    pub param_values: Vec<ScalarAndMetadata>,
+}
+
+impl ParameterTestWithMetadata<'_> {
+    pub fn run(&self) -> String {
+        let plan = logical_plan(self.sql).unwrap();
+
+        let actual_types = plan.get_parameter_fields().unwrap();
+        let expected_types: HashMap<String, Option<FieldRef>> = self
+            .expected_types
+            .iter()
+            .map(|(k, v)| ((*k).to_string(), v.clone()))
+            .collect();
+
+        assert_eq!(actual_types, expected_types);
+
+        let plan_with_params = plan
+            .clone()
+            .with_param_values(ParamValues::List(self.param_values.clone()))
+            .unwrap();
+
+        format!("** Initial Plan:\n{plan}\n** Final Plan:\n{plan_with_params}")
+    }
+}
+
 fn generate_prepare_stmt_and_data_types(sql: &str) -> (LogicalPlan, String) {
     let plan = logical_plan(sql).unwrap();
     let data_types = match &plan {
-        LogicalPlan::Statement(Statement::Prepare(Prepare { data_types, .. })) => {
-            data_types.iter().join(", ").to_string()
-        }
+        LogicalPlan::Statement(Statement::Prepare(Prepare { fields, .. })) => fields
+            .iter()
+            .map(|f| format_type_and_metadata(f.data_type(), Some(f.metadata())))
+            .join(", ")
+            .to_string(),
         _ => panic!("Expected a Prepare statement"),
     };
     (plan, data_types)
@@ -633,11 +667,11 @@ fn test_insert_infer() {
         @r#"
     ** Initial Plan:
     Dml: op=[Insert Into] table=[person]
-      Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(Nanosecond, None)) AS birth_date, CAST(NULL AS Int32) AS 😀
+      Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(ns)) AS birth_date, CAST(NULL AS Int32) AS 😀
         Values: ($1, $2, $3)
     ** Final Plan:
     Dml: op=[Insert Into] table=[person]
-      Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(Nanosecond, None)) AS birth_date, CAST(NULL AS Int32) AS 😀
+      Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(ns)) AS birth_date, CAST(NULL AS Int32) AS 😀
         Values: (UInt32(1) AS $1, Utf8("Alan") AS $2, Utf8("Turing") AS $3)
     "#
     );
@@ -664,11 +698,11 @@ fn test_prepare_statement_insert_infer() {
     ** Initial Plan:
     Prepare: "my_plan" [UInt32, Utf8, Utf8]
       Dml: op=[Insert Into] table=[person]
-        Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(Nanosecond, None)) AS birth_date, CAST(NULL AS Int32) AS 😀
+        Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(ns)) AS birth_date, CAST(NULL AS Int32) AS 😀
           Values: ($1, $2, $3)
     ** Final Plan:
     Dml: op=[Insert Into] table=[person]
-      Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(Nanosecond, None)) AS birth_date, CAST(NULL AS Int32) AS 😀
+      Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(ns)) AS birth_date, CAST(NULL AS Int32) AS 😀
         Values: (UInt32(1) AS $1, Utf8("Alan") AS $2, Utf8("Turing") AS $3)
     "#
     );
@@ -704,6 +738,147 @@ fn test_prepare_statement_to_plan_one_param() {
     );
 }
 
+#[test]
+fn test_update_infer_with_metadata() {
+    // Here the uuid field is inferred as nullable because it appears in the filter
+    // (and not in the update values, where its nullability would be inferred)
+    let uuid_field = Field::new("", DataType::FixedSizeBinary(16), true).with_metadata(
+        [("ARROW:extension:name".to_string(), "arrow.uuid".to_string())].into(),
+    );
+    let uuid_bytes = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+    let expected_types = vec![
+        (
+            "$1",
+            Some(Field::new("last_name", DataType::Utf8, false).into()),
+        ),
+        ("$2", Some(uuid_field.clone().with_name("id").into())),
+    ];
+    let param_values = vec![
+        ScalarAndMetadata::from(ScalarValue::from("Turing")),
+        ScalarAndMetadata::new(
+            ScalarValue::FixedSizeBinary(16, Some(uuid_bytes)),
+            Some(uuid_field.metadata().into()),
+        ),
+    ];
+
+    // Check a normal update
+    let test = ParameterTestWithMetadata {
+        sql: "update person_with_uuid_extension set last_name=$1 where id=$2",
+        expected_types: expected_types.clone(),
+        param_values: param_values.clone(),
+    };
+
+    assert_snapshot!(
+        test.run(),
+        @r#"
+    ** Initial Plan:
+    Dml: op=[Update] table=[person_with_uuid_extension]
+      Projection: person_with_uuid_extension.id AS id, person_with_uuid_extension.first_name AS first_name, $1 AS last_name
+        Filter: person_with_uuid_extension.id = $2
+          TableScan: person_with_uuid_extension
+    ** Final Plan:
+    Dml: op=[Update] table=[person_with_uuid_extension]
+      Projection: person_with_uuid_extension.id AS id, person_with_uuid_extension.first_name AS first_name, Utf8("Turing") AS last_name
+        Filter: person_with_uuid_extension.id = FixedSizeBinary(16, "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16") FieldMetadata { inner: {"ARROW:extension:name": "arrow.uuid"} }
+          TableScan: person_with_uuid_extension
+    "#
+    );
+
+    // Check a prepared update
+    let test = ParameterTestWithMetadata {
+        sql: "PREPARE my_plan AS update person_with_uuid_extension set last_name=$1 where id=$2",
+        expected_types,
+        param_values
+    };
+
+    assert_snapshot!(
+        test.run(),
+        @r#"
+    ** Initial Plan:
+    Prepare: "my_plan" [Utf8, FixedSizeBinary(16)<{"ARROW:extension:name": "arrow.uuid"}>]
+      Dml: op=[Update] table=[person_with_uuid_extension]
+        Projection: person_with_uuid_extension.id AS id, person_with_uuid_extension.first_name AS first_name, $1 AS last_name
+          Filter: person_with_uuid_extension.id = $2
+            TableScan: person_with_uuid_extension
+    ** Final Plan:
+    Dml: op=[Update] table=[person_with_uuid_extension]
+      Projection: person_with_uuid_extension.id AS id, person_with_uuid_extension.first_name AS first_name, Utf8("Turing") AS last_name
+        Filter: person_with_uuid_extension.id = FixedSizeBinary(16, "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16") FieldMetadata { inner: {"ARROW:extension:name": "arrow.uuid"} }
+          TableScan: person_with_uuid_extension
+    "#
+    );
+}
+
+#[test]
+fn test_insert_infer_with_metadata() {
+    let uuid_field = Field::new("", DataType::FixedSizeBinary(16), false).with_metadata(
+        [("ARROW:extension:name".to_string(), "arrow.uuid".to_string())].into(),
+    );
+    let uuid_bytes = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+    let expected_types = vec![
+        ("$1", Some(uuid_field.clone().with_name("id").into())),
+        (
+            "$2",
+            Some(Field::new("first_name", DataType::Utf8, false).into()),
+        ),
+        (
+            "$3",
+            Some(Field::new("last_name", DataType::Utf8, false).into()),
+        ),
+    ];
+    let param_values = vec![
+        ScalarAndMetadata::new(
+            ScalarValue::FixedSizeBinary(16, Some(uuid_bytes)),
+            Some(uuid_field.metadata().into()),
+        ),
+        ScalarAndMetadata::from(ScalarValue::from("Alan")),
+        ScalarAndMetadata::from(ScalarValue::from("Turing")),
+    ];
+
+    // Check a normal insert
+    let test = ParameterTestWithMetadata {
+        sql: "insert into person_with_uuid_extension (id, first_name, last_name) values ($1, $2, $3)",
+        expected_types: expected_types.clone(),
+        param_values: param_values.clone()
+    };
+
+    assert_snapshot!(
+        test.run(),
+        @r#"
+    ** Initial Plan:
+    Dml: op=[Insert Into] table=[person_with_uuid_extension]
+      Projection: column1 AS id, column2 AS first_name, column3 AS last_name
+        Values: ($1, $2, $3)
+    ** Final Plan:
+    Dml: op=[Insert Into] table=[person_with_uuid_extension]
+      Projection: column1 AS id, column2 AS first_name, column3 AS last_name
+        Values: (FixedSizeBinary(16, "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16") FieldMetadata { inner: {"ARROW:extension:name": "arrow.uuid"} } AS $1, Utf8("Alan") AS $2, Utf8("Turing") AS $3)
+    "#
+    );
+
+    // Check a prepared insert
+    let test = ParameterTestWithMetadata {
+        sql: "PREPARE my_plan AS insert into person_with_uuid_extension (id, first_name, last_name) values ($1, $2, $3)",
+        expected_types,
+        param_values
+    };
+
+    assert_snapshot!(
+        test.run(),
+        @r#"
+    ** Initial Plan:
+    Prepare: "my_plan" [FixedSizeBinary(16)<{"ARROW:extension:name": "arrow.uuid"}>, Utf8, Utf8]
+      Dml: op=[Insert Into] table=[person_with_uuid_extension]
+        Projection: column1 AS id, column2 AS first_name, column3 AS last_name
+          Values: ($1, $2, $3)
+    ** Final Plan:
+    Dml: op=[Insert Into] table=[person_with_uuid_extension]
+      Projection: column1 AS id, column2 AS first_name, column3 AS last_name
+        Values: (FixedSizeBinary(16, "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16") FieldMetadata { inner: {"ARROW:extension:name": "arrow.uuid"} } AS $1, Utf8("Alan") AS $2, Utf8("Turing") AS $3)
+    "#
+    );
+}
+
 #[test]
 fn test_prepare_statement_to_plan_data_type() {
     let sql = "PREPARE my_plan(DOUBLE) AS SELECT id, age  FROM person WHERE age = $1";
diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs
index 7aa982dcf3dd..5f76afb763cf 100644
--- a/datafusion/sql/tests/cases/plan_to_sql.rs
+++ b/datafusion/sql/tests/cases/plan_to_sql.rs
@@ -21,12 +21,14 @@ use datafusion_common::{
     assert_contains, Column, DFSchema, DFSchemaRef, DataFusionError, Result,
     TableReference,
 };
+use datafusion_expr::expr::{WindowFunction, WindowFunctionParams};
 use datafusion_expr::test::function_stub::{
     count_udaf, max_udaf, min_udaf, sum, sum_udaf,
 };
 use datafusion_expr::{
     cast, col, lit, table_scan, wildcard, EmptyRelation, Expr, Extension, LogicalPlan,
     LogicalPlanBuilder, Union, UserDefinedLogicalNode, UserDefinedLogicalNodeCore,
+    WindowFrame, WindowFunctionDefinition,
 };
 use datafusion_functions::unicode;
 use datafusion_functions_aggregate::grouping::grouping_udaf;
@@ -2521,6 +2523,90 @@ fn test_unparse_left_semi_join_with_table_scan_projection() -> Result<()> {
     Ok(())
 }
 
+#[test]
+fn test_unparse_window() -> Result<()> {
+    // SubqueryAlias: t
+    // Projection: t.k, t.v, rank() PARTITION BY [t.k] ORDER BY [t.v ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS r
+    //     Filter: rank() PARTITION BY [t.k] ORDER BY [t.v ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW = UInt64(1)
+    //     WindowAggr: windowExpr=[[rank() PARTITION BY [t.k] ORDER BY [t.v ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+    //         TableScan: t projection=[k, v]
+
+    let schema = Schema::new(vec![
+        Field::new("k", DataType::Int32, false),
+        Field::new("v", DataType::Int32, false),
+    ]);
+    let window_expr = Expr::WindowFunction(Box::new(WindowFunction {
+        fun: WindowFunctionDefinition::WindowUDF(rank_udwf()),
+        params: WindowFunctionParams {
+            args: vec![],
+            partition_by: vec![col("k")],
+            order_by: vec![col("v").sort(true, true)],
+            window_frame: WindowFrame::new(None),
+            null_treatment: None,
+            distinct: false,
+            filter: None,
+        },
+    }));
+    let table = table_scan(Some("test"), &schema, Some(vec![0, 1]))?.build()?;
+    let plan = LogicalPlanBuilder::window_plan(table, vec![window_expr.clone()])?;
+
+    let name = plan.schema().fields().last().unwrap().name().clone();
+    let plan = LogicalPlanBuilder::from(plan)
+        .filter(col(name.clone()).eq(lit(1i64)))?
+        .project(vec![col("k"), col("v"), col(name)])?
+        .build()?;
+
+    let unparser = Unparser::new(&UnparserPostgreSqlDialect {});
+    let sql = unparser.plan_to_sql(&plan)?;
+    assert_snapshot!(
+        sql,
+        @r#"SELECT "test"."k", "test"."v", "rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" FROM (SELECT "test"."k" AS "k", "test"."v" AS "v", rank() OVER (PARTITION BY "test"."k" ORDER BY "test"."v" ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" FROM "test") AS "test" WHERE ("rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" = 1)"#
+    );
+
+    let unparser = Unparser::new(&UnparserMySqlDialect {});
+    let sql = unparser.plan_to_sql(&plan)?;
+    assert_snapshot!(
+        sql,
+        @r#"SELECT `test`.`k`, `test`.`v`, `rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` FROM (SELECT `test`.`k` AS `k`, `test`.`v` AS `v`, rank() OVER (PARTITION BY `test`.`k` ORDER BY `test`.`v` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` FROM `test`) AS `test` WHERE (`rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` = 1)"#
+    );
+
+    let unparser = Unparser::new(&SqliteDialect {});
+    let sql = unparser.plan_to_sql(&plan)?;
+    assert_snapshot!(
+        sql,
+        @r#"SELECT `test`.`k`, `test`.`v`, `rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` FROM (SELECT `test`.`k` AS `k`, `test`.`v` AS `v`, rank() OVER (PARTITION BY `test`.`k` ORDER BY `test`.`v` ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` FROM `test`) AS `test` WHERE (`rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` = 1)"#
+    );
+
+    let unparser = Unparser::new(&DefaultDialect {});
+    let sql = unparser.plan_to_sql(&plan)?;
+    assert_snapshot!(
+        sql,
+        @r#"SELECT test.k, test.v, rank() OVER (PARTITION BY test.k ORDER BY test.v ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) FROM test QUALIFY (rank() OVER (PARTITION BY test.k ORDER BY test.v ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) = 1)"#
+    );
+
+    // without table qualifier
+    let table = table_scan(Some("test"), &schema, Some(vec![0, 1]))?.build()?;
+    let table = LogicalPlanBuilder::from(table)
+        .project(vec![col("k").alias("k"), col("v").alias("v")])?
+        .build()?;
+    let plan = LogicalPlanBuilder::window_plan(table, vec![window_expr])?;
+
+    let name = plan.schema().fields().last().unwrap().name().clone();
+    let plan = LogicalPlanBuilder::from(plan)
+        .filter(col(name.clone()).eq(lit(1i64)))?
+        .project(vec![col("k"), col("v"), col(name)])?
+        .build()?;
+
+    let unparser = Unparser::new(&UnparserPostgreSqlDialect {});
+    let sql = unparser.plan_to_sql(&plan)?;
+    assert_snapshot!(
+        sql,
+        @r#"SELECT "k", "v", "rank() PARTITION BY [k] ORDER BY [v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" FROM (SELECT "k" AS "k", "v" AS "v", rank() OVER (PARTITION BY "k" ORDER BY "v" ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "rank() PARTITION BY [k] ORDER BY [v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" FROM (SELECT "test"."k" AS "k", "test"."v" AS "v" FROM "test") AS "derived_projection") AS "__qualify_subquery" WHERE ("rank() PARTITION BY [k] ORDER BY [v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" = 1)"#
+    );
+
+    Ok(())
+}
+
 #[test]
 fn test_like_filter() {
     let statement = generate_round_trip_statement(
diff --git a/datafusion/sql/tests/common/mod.rs b/datafusion/sql/tests/common/mod.rs
index ee1b761970de..5d9fd9f2c374 100644
--- a/datafusion/sql/tests/common/mod.rs
+++ b/datafusion/sql/tests/common/mod.rs
@@ -151,6 +151,14 @@ impl ContextProvider for MockContextProvider {
                 ),
                 Field::new("😀", DataType::Int32, false),
             ])),
+            "person_with_uuid_extension" => Ok(Schema::new(vec![
+                Field::new("id", DataType::FixedSizeBinary(16), false).with_metadata(
+                    [("ARROW:extension:name".to_string(), "arrow.uuid".to_string())]
+                        .into(),
+                ),
+                Field::new("first_name", DataType::Utf8, false),
+                Field::new("last_name", DataType::Utf8, false),
+            ])),
             "orders" => Ok(Schema::new(vec![
                 Field::new("order_id", DataType::UInt32, false),
                 Field::new("customer_id", DataType::UInt32, false),
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index f66af28f436e..96d9f23522f1 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -669,10 +669,10 @@ fn plan_insert() {
     assert_snapshot!(
         plan,
         @r#"
-        Dml: op=[Insert Into] table=[person]
-          Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(Nanosecond, None)) AS birth_date, CAST(NULL AS Int32) AS 😀
-            Values: (CAST(Int64(1) AS UInt32), Utf8("Alan"), Utf8("Turing"))
-        "#
+    Dml: op=[Insert Into] table=[person]
+      Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(ns)) AS birth_date, CAST(NULL AS Int32) AS 😀
+        Values: (CAST(Int64(1) AS UInt32), Utf8("Alan"), Utf8("Turing"))
+    "#
     );
 }
 
@@ -875,11 +875,11 @@ fn test_timestamp_filter() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.state
-          Filter: person.birth_date < CAST(CAST(Int64(158412331400600000) AS Timestamp(Second, None)) AS Timestamp(Nanosecond, None))
-            TableScan: person
-        "#
+        @r"
+    Projection: person.state
+      Filter: person.birth_date < CAST(CAST(Int64(158412331400600000) AS Timestamp(s)) AS Timestamp(ns))
+        TableScan: person
+    "
     );
 }
 
@@ -1586,11 +1586,11 @@ fn select_from_typed_string_values() {
     assert_snapshot!(
         plan,
         @r#"
-        Projection: t.col1, t.col2
-          SubqueryAlias: t
-            Projection: column1 AS col1, column2 AS col2
-              Values: (CAST(Utf8("2021-06-10 17:01:00Z") AS Timestamp(Nanosecond, None)), CAST(Utf8("2004-04-09") AS Date32))
-        "#
+    Projection: t.col1, t.col2
+      SubqueryAlias: t
+        Projection: column1 AS col1, column2 AS col2
+          Values: (CAST(Utf8("2021-06-10 17:01:00Z") AS Timestamp(ns)), CAST(Utf8("2004-04-09") AS Date32))
+    "#
     );
 }
 
@@ -3151,7 +3151,7 @@ fn select_typed_time_string() {
     assert_snapshot!(
         plan,
         @r#"
-    Projection: CAST(Utf8("08:09:10.123") AS Time64(Nanosecond)) AS time
+    Projection: CAST(Utf8("08:09:10.123") AS Time64(ns)) AS time
       EmptyRelation: rows=1
     "#
     );
@@ -4686,7 +4686,7 @@ fn test_custom_type_plan() -> Result<()> {
     assert_snapshot!(
         plan,
         @r#"
-    Projection: CAST(Utf8("2001-01-01 18:00:00") AS Timestamp(Nanosecond, None))
+    Projection: CAST(Utf8("2001-01-01 18:00:00") AS Timestamp(ns))
       EmptyRelation: rows=1
     "#
     );
@@ -4696,7 +4696,7 @@ fn test_custom_type_plan() -> Result<()> {
     assert_snapshot!(
         plan,
         @r#"
-    Projection: CAST(CAST(Utf8("2001-01-01 18:00:00") AS Timestamp(Nanosecond, None)) AS Timestamp(Nanosecond, None))
+    Projection: CAST(CAST(Utf8("2001-01-01 18:00:00") AS Timestamp(ns)) AS Timestamp(ns))
       EmptyRelation: rows=1
     "#
     );
@@ -4708,7 +4708,7 @@ fn test_custom_type_plan() -> Result<()> {
     assert_snapshot!(
         plan,
         @r#"
-    Projection: make_array(CAST(Utf8("2001-01-01 18:00:00") AS Timestamp(Nanosecond, None)), CAST(Utf8("2001-01-02 18:00:00") AS Timestamp(Nanosecond, None)))
+    Projection: make_array(CAST(Utf8("2001-01-01 18:00:00") AS Timestamp(ns)), CAST(Utf8("2001-01-02 18:00:00") AS Timestamp(ns)))
       EmptyRelation: rows=1
     "#
     );
diff --git a/datafusion/sqllogictest/Cargo.toml b/datafusion/sqllogictest/Cargo.toml
index d02d5f9cb5e4..8ab3932e8433 100644
--- a/datafusion/sqllogictest/Cargo.toml
+++ b/datafusion/sqllogictest/Cargo.toml
@@ -42,7 +42,7 @@ async-trait = { workspace = true }
 bigdecimal = { workspace = true }
 bytes = { workspace = true, optional = true }
 chrono = { workspace = true, optional = true }
-clap = { version = "4.5.47", features = ["derive", "env"] }
+clap = { version = "4.5.50", features = ["derive", "env"] }
 datafusion = { workspace = true, default-features = true, features = ["avro", "parquet_encryption"] }
 datafusion-spark = { workspace = true, default-features = true }
 datafusion-substrait = { workspace = true, default-features = true }
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index 9d6c7b11add6..a5973afc0a93 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -144,7 +144,7 @@ statement error Failed to coerce arguments to satisfy a call to 'approx_percenti
 SELECT approx_percentile_cont_with_weight(c2, c1) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100
 
 # csv_query_approx_percentile_cont_with_histogram_bins
-statement error DataFusion error: This feature is not implemented: Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be UInt > 0 literal \(got data type Int64\)\.
+statement error DataFusion error: Error during planning: Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be UInt > 0 literal \(got data type Int64\)\.
 SELECT c1, approx_percentile_cont(0.95, -1000) WITHIN GROUP (ORDER BY c3) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
 
 statement error Failed to coerce arguments to satisfy a call to 'approx_percentile_cont' function
@@ -156,10 +156,10 @@ SELECT approx_percentile_cont(0.95, 111.1) WITHIN GROUP (ORDER BY c3) FROM aggre
 statement error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'approx_percentile_cont' function: coercion from Float64, Float64, Float64 to the signature OneOf(.*) failed(.|\n)*
 SELECT approx_percentile_cont(0.95, 111.1) WITHIN GROUP (ORDER BY c12) FROM aggregate_test_100
 
-statement error DataFusion error: This feature is not implemented: Percentile value for 'APPROX_PERCENTILE_CONT' must be a literal
+statement error DataFusion error: Error during planning: Percentile value for 'APPROX_PERCENTILE_CONT' must be a literal
 SELECT approx_percentile_cont(c12) WITHIN GROUP (ORDER BY c12) FROM aggregate_test_100
 
-statement error DataFusion error: This feature is not implemented: Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be a literal
+statement error DataFusion error: Error during planning: Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be a literal
 SELECT approx_percentile_cont(0.95, c5) WITHIN GROUP (ORDER BY c12) FROM aggregate_test_100
 
 statement error DataFusion error: Error during planning: \[IGNORE | RESPECT\] NULLS are not permitted for approx_percentile_cont
@@ -3356,6 +3356,342 @@ c 4
 d 4
 e 4
 
+#####################
+## percentile_cont tests (exact percentile calculation)
+#####################
+
+# Test error conditions for percentile_cont
+statement error DataFusion error: Error during planning: Percentile value must be between 0.0 and 1.0 inclusive
+SELECT percentile_cont(1.5) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100
+
+statement error DataFusion error: Error during planning: Percentile value must be between 0.0 and 1.0 inclusive
+SELECT percentile_cont(-0.1) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100
+
+statement error DataFusion error: Error during planning: Percentile value for 'PERCENTILE_CONT' must be a literal
+SELECT percentile_cont(c2) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100
+
+statement error DataFusion error: Error during planning: \[IGNORE | RESPECT\] NULLS are not permitted for percentile_cont
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY c3) IGNORE NULLS FROM aggregate_test_100
+
+statement error DataFusion error: Error during planning: \[IGNORE | RESPECT\] NULLS are not permitted for percentile_cont
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY c3) RESPECT NULLS FROM aggregate_test_100
+
+statement error DataFusion error: This feature is not implemented: Only a single ordering expression is permitted in a WITHIN GROUP clause
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY c3, c2) FROM aggregate_test_100
+
+# Not supported over sliding windows
+query error DataFusion error: Error during planning: OVER and WITHIN GROUP clause cannot be used together
+SELECT percentile_cont(0.5)
+WITHIN GROUP (ORDER BY c3)
+OVER (ROWS BETWEEN 4 PRECEDING AND CURRENT ROW)
+FROM aggregate_test_100
+
+# Test basic percentile_cont with WITHIN GROUP syntax
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY c2) FROM aggregate_test_100
+----
+3
+
+query R
+SELECT percentile_cont(0.0) WITHIN GROUP (ORDER BY c2) FROM aggregate_test_100
+----
+1
+
+query R
+SELECT percentile_cont(1.0) WITHIN GROUP (ORDER BY c2) FROM aggregate_test_100
+----
+5
+
+query R
+SELECT percentile_cont(0.25) WITHIN GROUP (ORDER BY c2) FROM aggregate_test_100
+----
+2
+
+query R
+SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY c2) FROM aggregate_test_100
+----
+4
+
+# Test that percentile_cont(0.5) equals median
+query I
+SELECT median(c2) FROM aggregate_test_100
+----
+3
+
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY c2) FROM aggregate_test_100
+----
+3
+
+# Test with descending order
+query R
+SELECT percentile_cont(0.95) WITHIN GROUP (ORDER BY c3 DESC) FROM aggregate_test_100
+----
+-101.25
+
+query R
+SELECT percentile_cont(0.05) WITHIN GROUP (ORDER BY c3 DESC) FROM aggregate_test_100
+----
+118.099998
+
+# Test with GROUP BY
+query TR
+SELECT c1, percentile_cont(0.5) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100 GROUP BY c1 ORDER BY c1
+----
+a -25
+b 17
+c 1
+d 46.5
+e 64
+
+query TR
+SELECT c1, percentile_cont(0.95) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100 GROUP BY c1 ORDER BY c1
+----
+a 65
+b 68
+c 118
+d 123.299998
+e 112
+
+# Test with NULLs
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY v) FROM (VALUES (1), (2), (3), (NULL), (NULL), (NULL)) as t (v)
+----
+2
+
+# Test with all NULLs
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY v) FROM (VALUES (CAST(NULL as INT))) as t (v)
+----
+NULL
+
+# Test with empty set
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY v) FROM (VALUES (1)) as t (v) WHERE v > 10
+----
+NULL
+
+# Test with single value
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY v) FROM (VALUES (42)) as t (v)
+----
+42
+
+# Test with float values for interpolation
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY v) FROM (VALUES (1.0), (2.0), (3.0), (4.0)) as t (v)
+----
+2.5
+
+query R
+SELECT percentile_cont(0.25) WITHIN GROUP (ORDER BY v) FROM (VALUES (1.0), (2.0), (3.0), (4.0)) as t (v)
+----
+1.75
+
+query R
+SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY v) FROM (VALUES (1.0), (2.0), (3.0), (4.0)) as t (v)
+----
+3.25
+
+# Test with various numeric types
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY c7) FROM aggregate_test_100
+----
+134.5
+
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY c8) FROM aggregate_test_100
+----
+30634
+
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY c11) FROM aggregate_test_100
+----
+0.4906719
+
+# Test edge case with two values (tests interpolation)
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY v) FROM (VALUES (10.0), (20.0)) as t (v)
+----
+15
+
+query R
+SELECT percentile_cont(0.25) WITHIN GROUP (ORDER BY v) FROM (VALUES (10.0), (20.0)) as t (v)
+----
+12.5
+
+query R
+SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY v) FROM (VALUES (10.0), (20.0)) as t (v)
+----
+17.5
+
+# Test integer inputs requiring interpolation (should return float)
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY v) FROM (VALUES (1), (2), (3), (4)) as t (v)
+----
+2.5
+
+query R
+SELECT percentile_cont(0.25) WITHIN GROUP (ORDER BY v) FROM (VALUES (1), (2), (3), (4)) as t (v)
+----
+1.75
+
+query R
+SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY v) FROM (VALUES (1), (2), (3), (4)) as t (v)
+----
+3.25
+
+# Test with exact percentile values (no interpolation needed)
+query R
+SELECT percentile_cont(0.0) WITHIN GROUP (ORDER BY v) FROM (VALUES (1), (2), (3), (4), (5)) as t (v)
+----
+1
+
+query R
+SELECT percentile_cont(0.25) WITHIN GROUP (ORDER BY v) FROM (VALUES (1), (2), (3), (4), (5)) as t (v)
+----
+2
+
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY v) FROM (VALUES (1), (2), (3), (4), (5)) as t (v)
+----
+3
+
+query R
+SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY v) FROM (VALUES (1), (2), (3), (4), (5)) as t (v)
+----
+4
+
+query R
+SELECT percentile_cont(1.0) WITHIN GROUP (ORDER BY v) FROM (VALUES (1), (2), (3), (4), (5)) as t (v)
+----
+5
+
+# Test with negative numbers
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY v) FROM (VALUES (-10), (-5), (0), (5), (10)) as t (v)
+----
+0
+
+query R
+SELECT percentile_cont(0.25) WITHIN GROUP (ORDER BY v) FROM (VALUES (-10), (-5), (0), (5), (10)) as t (v)
+----
+-5
+
+query R
+SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY v) FROM (VALUES (-10), (-5), (0), (5), (10)) as t (v)
+----
+5
+
+# Test comparison: percentile_cont should give exact results
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100
+----
+15.5
+
+# Compare with approx_percentile_cont (should be close but may not be exact)
+query B
+SELECT ABS(percentile_cont(0.5) WITHIN GROUP (ORDER BY c3) - approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY c3)) < 5 FROM aggregate_test_100
+----
+true
+
+# Test percentile_cont without WITHIN GROUP clause (alternate syntax)
+query R
+SELECT percentile_cont(c2, 0.5) FROM aggregate_test_100
+----
+3
+
+query R
+SELECT percentile_cont(c2, 0.0) FROM aggregate_test_100
+----
+1
+
+query R
+SELECT percentile_cont(c2, 1.0) FROM aggregate_test_100
+----
+5
+
+query R
+SELECT percentile_cont(c2, 0.25) FROM aggregate_test_100
+----
+2
+
+query R
+SELECT percentile_cont(c2, 0.75) FROM aggregate_test_100
+----
+4
+
+# Verify alternate syntax gives same results as WITHIN GROUP syntax
+query B
+SELECT percentile_cont(c2, 0.5) = percentile_cont(0.5) WITHIN GROUP (ORDER BY c2) FROM aggregate_test_100
+----
+true
+
+query B
+SELECT percentile_cont(c3, 0.5) = percentile_cont(0.5) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100
+----
+true
+
+# Test alternate syntax with GROUP BY
+query TR
+SELECT c1, percentile_cont(c3, 0.5) FROM aggregate_test_100 GROUP BY c1 ORDER BY c1
+----
+a -25
+b 17
+c 1
+d 46.5
+e 64
+
+# Verify alternate syntax with GROUP BY gives same results as WITHIN GROUP
+query TB
+SELECT c1, percentile_cont(c3, 0.95) = percentile_cont(0.95) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100 GROUP BY c1 ORDER BY c1
+----
+a true
+b true
+c true
+d true
+e true
+
+# Test ascending vs descending equivalence: percentile_cont(0.4) ASC should equal percentile_cont(0.6) DESC
+# This tests the mathematical property that the pth percentile ascending = (1-p)th percentile descending
+# Using a simple controlled dataset to demonstrate the property
+
+# Show 0.4 ascending
+query R
+SELECT percentile_cont(0.4) WITHIN GROUP (ORDER BY v) FROM (VALUES (1), (2), (3), (4), (5)) as t (v)
+----
+2.6
+
+# Show 0.6 descending (should be same as 0.4 ascending)
+query R
+SELECT percentile_cont(0.6) WITHIN GROUP (ORDER BY v DESC) FROM (VALUES (1), (2), (3), (4), (5)) as t (v)
+----
+2.6
+
+# Show 0.3 ascending
+query R
+SELECT percentile_cont(0.3) WITHIN GROUP (ORDER BY v) FROM (VALUES (10), (20), (30), (40), (50)) as t (v)
+----
+21.99999
+
+# Show 0.7 descending (should be same as 0.3 ascending)
+query R
+SELECT percentile_cont(0.7) WITHIN GROUP (ORDER BY v DESC) FROM (VALUES (10), (20), (30), (40), (50)) as t (v)
+----
+22
+
+# Show 0.25 ascending on larger dataset
+query R
+SELECT percentile_cont(0.25) WITHIN GROUP (ORDER BY v) FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) as t (v)
+----
+2.75
+
+# Show 0.75 descending (should be same as 0.25 ascending)
+query R
+SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY v DESC) FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) as t (v)
+----
+2.75
+
 # array_agg_zero
 query ?
 SELECT ARRAY_AGG([])
diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt
index d8c29a323e94..29f0241c8862 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -710,13 +710,13 @@ select
 query TTT
 select arrow_typeof(column1), arrow_typeof(column2), arrow_typeof(column3) from arrays;
 ----
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
+List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
+List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
+List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
+List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
+List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
+List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
 
 # arrays table
 query ???
@@ -1182,7 +1182,7 @@ select make_array(make_array(1), arrow_cast(make_array(-1), 'LargeList(Int8)'))
 query T
 select arrow_typeof(make_array(make_array(1), arrow_cast(make_array(-1), 'LargeList(Int8)')));
 ----
-List(Field { name: "item", data_type: LargeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable LargeList(nullable Int64))
 
 
 query ???
@@ -3292,7 +3292,7 @@ select
     array_concat([arrow_cast('1', 'Utf8'), arrow_cast('2', 'Utf8')], [arrow_cast('3', 'Utf8View')]),
     arrow_typeof(array_concat([arrow_cast('1', 'Utf8'), arrow_cast('2', 'Utf8')], [arrow_cast('3', 'Utf8View')]));
 ----
-[1, 2, 3] List(Field { name: "item", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+[1, 2, 3] List(nullable Utf8View)
 
 # array_concat error
 query error DataFusion error: Error during planning: Execution error: Function 'array_concat' user-defined coercion failed with "Error during planning: array_concat does not support type Int64"
@@ -4585,7 +4585,7 @@ NULL [baz] baz
 query T
 SELECT arrow_typeof(make_array(arrow_cast('a', 'Utf8View'), 'b', 'c', 'd'));
 ----
-List(Field { name: "item", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable Utf8View)
 
 # expect a,b,c,d. make_array forces all types to be of a common type (see above)
 query T
@@ -4855,7 +4855,7 @@ h,-,-,-,o nil-2-nil-4-5 1|0|3
 query T
 select array_to_string(arrow_cast([arrow_cast([NULL, 'a'], 'FixedSizeList(2, Utf8)'), NULL], 'FixedSizeList(2, FixedSizeList(2, Utf8))'), ',', '-');
 ----
--,a,-,-
+-,a,-
 
 # array_to_string with columns #1
 
@@ -6040,13 +6040,13 @@ false
 # array_has([1, 3, 5], 1) -> true (array contains element)
 # array_has([], 1) -> false (empty array, not null)
 # array_has(null, 1) -> null (null array)
-query B
-select array_has(column1, column2)
+query BB
+select array_has(column1, column2), array_has(null, column2)
 from array_has_table_empty;
 ----
-true
-false
-NULL
+true NULL
+false NULL
+NULL NULL
 
 # Test for issue: array_has should return false for empty arrays, not null
 # This test demonstrates the correct behavior with COALESCE to show the distinction
@@ -7653,8 +7653,8 @@ CREATE EXTERNAL TABLE fixed_size_list_array STORED AS PARQUET LOCATION '../core/
 query T
 select arrow_typeof(f0) from fixed_size_list_array;
 ----
-FixedSizeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2)
-FixedSizeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2)
+FixedSizeList(2 x nullable Int64)
+FixedSizeList(2 x nullable Int64)
 
 query ?
 select * from fixed_size_list_array;
@@ -7683,8 +7683,8 @@ select make_array(arrow_cast(f0, 'List(Int64)')) from fixed_size_list_array
 query T
 select arrow_typeof(make_array(arrow_cast(f0, 'List(Int64)'))) from fixed_size_list_array
 ----
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable List(nullable Int64))
+List(nullable List(nullable Int64))
 
 query ?
 select make_array(f0) from fixed_size_list_array
@@ -7695,8 +7695,8 @@ select make_array(f0) from fixed_size_list_array
 query T
 select arrow_typeof(make_array(f0)) from fixed_size_list_array
 ----
-List(Field { name: "item", data_type: FixedSizeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
-List(Field { name: "item", data_type: FixedSizeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable FixedSizeList(2 x nullable Int64))
+List(nullable FixedSizeList(2 x nullable Int64))
 
 query ?
 select array_concat(column1, [7]) from arrays_values_v2;
@@ -8275,19 +8275,19 @@ select * from test_create_array_table;
 query T
 select arrow_typeof(a) from test_create_array_table;
 ----
-List(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable Int32)
 
 query T
 select arrow_typeof(c) from test_create_array_table;
 ----
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable List(nullable Int32))
 
 # Test casting to array types
 # issue: https://github.com/apache/datafusion/issues/9440
 query ??T
 select [1,2,3]::int[], [['1']]::int[][], arrow_typeof([]::text[]);
 ----
-[1, 2, 3] [[1]] List(Field { name: "item", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+[1, 2, 3] [[1]] List(nullable Utf8View)
 
 # test empty arrays return length
 # issue: https://github.com/apache/datafusion/pull/12459
@@ -8307,8 +8307,8 @@ create table fixed_size_col_table (a int[3]) as values ([1,2,3]), ([4,5,6]);
 query T
 select arrow_typeof(a) from fixed_size_col_table;
 ----
-FixedSizeList(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3)
-FixedSizeList(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3)
+FixedSizeList(3 x nullable Int32)
+FixedSizeList(3 x nullable Int32)
 
 query ? rowsort
 SELECT DISTINCT a FROM fixed_size_col_table
diff --git a/datafusion/sqllogictest/test_files/arrow_files.slt b/datafusion/sqllogictest/test_files/arrow_files.slt
index 62453ec4bf3e..b3975e0c3f47 100644
--- a/datafusion/sqllogictest/test_files/arrow_files.slt
+++ b/datafusion/sqllogictest/test_files/arrow_files.slt
@@ -29,7 +29,7 @@ statement ok
 
 CREATE EXTERNAL TABLE arrow_simple
 STORED AS ARROW
-LOCATION '../core/tests/data/example.arrow';
+LOCATION '../datasource-arrow/tests/data/example.arrow';
 
 
 # physical plan
@@ -37,7 +37,7 @@ query TT
 EXPLAIN SELECT * FROM arrow_simple
 ----
 logical_plan TableScan: arrow_simple projection=[f0, f1, f2]
-physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/example.arrow]]}, projection=[f0, f1, f2], file_type=arrow
+physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/datasource-arrow/tests/data/example.arrow]]}, projection=[f0, f1, f2], file_type=arrow
 
 # correct content
 query ITB
@@ -50,8 +50,8 @@ SELECT * FROM arrow_simple
 
 # Ensure that local files can not be read by default (a potential security issue)
 # (url table is only supported when DynamicFileCatalog is enabled)
-statement error DataFusion error: Error during planning: table 'datafusion.public.../core/tests/data/example.arrow' not found
-SELECT * FROM '../core/tests/data/example.arrow';
+statement error DataFusion error: Error during planning: table 'datafusion.public.../datasource-arrow/tests/data/example.arrow' not found
+SELECT * FROM '../datasource-arrow/tests/data/example.arrow';
 
 # ARROW partitioned table
 statement ok
diff --git a/datafusion/sqllogictest/test_files/arrow_typeof.slt b/datafusion/sqllogictest/test_files/arrow_typeof.slt
index 654218531f1d..ac32ef821bc4 100644
--- a/datafusion/sqllogictest/test_files/arrow_typeof.slt
+++ b/datafusion/sqllogictest/test_files/arrow_typeof.slt
@@ -61,13 +61,13 @@ Decimal128(38, 10)
 query T
 SELECT arrow_typeof(now()::timestamp)
 ----
-Timestamp(Nanosecond, None)
+Timestamp(ns)
 
 # arrow_typeof_timestamp_utc
 query T
 SELECT arrow_typeof(now())
 ----
-Timestamp(Nanosecond, Some("+00:00"))
+Timestamp(ns, "+00:00")
 
 # arrow_typeof_timestamp_date32(
 query T
@@ -98,7 +98,7 @@ SELECT arrow_cast('1')
 query error DataFusion error: Execution error: arrow_cast requires its second argument to be a non\-empty constant string
 SELECT arrow_cast('1', 43)
 
-query error Error unrecognized word: unknown
+query error DataFusion error: Execution error: Unsupported type 'unknown'\. Must be a supported arrow type name such as 'Int32' or 'Timestamp\(ns\)'\. Error unknown token: unknown
 SELECT arrow_cast('1', 'unknown')
 
 # Round Trip tests:
@@ -130,7 +130,7 @@ SELECT
   arrow_typeof(arrow_cast(to_timestamp('2020-01-02 01:01:11.1234567890Z'), 'Timestamp(Nanosecond, Some("+08:00"))')) as col_tstz_ns,
   arrow_typeof(arrow_cast('foo', 'Dictionary(Int32, Utf8)')) as col_dict
 ----
-Int8 Int16 Int32 Int64 UInt8 UInt16 UInt32 UInt64 Float16 Float32 Float64 Utf8 LargeUtf8 Utf8View Binary LargeBinary Timestamp(Second, None) Timestamp(Millisecond, None) Timestamp(Microsecond, None) Timestamp(Nanosecond, None) Timestamp(Second, Some("+08:00")) Timestamp(Millisecond, Some("+08:00")) Timestamp(Microsecond, Some("+08:00")) Timestamp(Nanosecond, Some("+08:00")) Dictionary(Int32, Utf8)
+Int8 Int16 Int32 Int64 UInt8 UInt16 UInt32 UInt64 Float16 Float32 Float64 Utf8 LargeUtf8 Utf8View Binary LargeBinary Timestamp(s) Timestamp(ms) Timestamp(µs) Timestamp(ns) Timestamp(s, "+08:00") Timestamp(ms, "+08:00") Timestamp(µs, "+08:00") Timestamp(ns, "+08:00") Dictionary(Int32, Utf8)
 
 
 
@@ -255,7 +255,7 @@ SELECT
   arrow_typeof(col_ts_ns)
   FROM foo;
 ----
-Timestamp(Second, None) Timestamp(Millisecond, None) Timestamp(Microsecond, None) Timestamp(Nanosecond, None)
+Timestamp(s) Timestamp(ms) Timestamp(µs) Timestamp(ns)
 
 
 statement ok
@@ -316,7 +316,7 @@ select arrow_cast(interval '30 minutes', 'Duration(Second)');
 ----
 0 days 0 hours 30 mins 0 secs
 
-query error DataFusion error: This feature is not implemented: Unsupported CAST from Utf8 to Duration\(Second\)
+query error DataFusion error: This feature is not implemented: Unsupported CAST from Utf8 to Duration\(s\)
 select arrow_cast('30 minutes', 'Duration(Second)');
 
 
@@ -357,12 +357,12 @@ select arrow_cast(make_array(1, 2, 3), 'List(Int64)');
 query T
 select arrow_typeof(arrow_cast(make_array(1, 2, 3), 'List(Int64)'));
 ----
-List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable Int64)
 
 query T
 select arrow_typeof(arrow_cast(arrow_cast(make_array([1, 2, 3]), 'LargeList(LargeList(Int64))'), 'List(List(Int64))'));
 ----
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable List(nullable Int64))
 
 ## LargeList
 
@@ -380,12 +380,12 @@ select arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)');
 query T
 select arrow_typeof(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'));
 ----
-LargeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+LargeList(nullable Int64)
 
 query T
 select arrow_typeof(arrow_cast(make_array([1, 2, 3]), 'LargeList(LargeList(Int64))'));
 ----
-LargeList(Field { name: "item", data_type: LargeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+LargeList(nullable LargeList(nullable Int64))
 
 ## FixedSizeList
 
@@ -417,7 +417,7 @@ select arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)');
 query T
 select arrow_typeof(arrow_cast(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 'FixedSizeList(3, Int64)'));
 ----
-FixedSizeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3)
+FixedSizeList(3 x nullable Int64)
 
 query ?
 select arrow_cast([1, 2, 3], 'FixedSizeList(3, Int64)');
diff --git a/datafusion/sqllogictest/test_files/case.slt b/datafusion/sqllogictest/test_files/case.slt
index 9bc1f83ed119..352300e753a7 100644
--- a/datafusion/sqllogictest/test_files/case.slt
+++ b/datafusion/sqllogictest/test_files/case.slt
@@ -519,3 +519,79 @@ query I
 SELECT case when false then 1 / 0 else 1 / 1 end;
 ----
 1
+
+# Else branch evaluation with case expression, 1 when branch, null input
+query I
+SELECT CASE a WHEN 'a' THEN 0 ELSE 1 END FROM (VALUES (NULL)) t(a)
+----
+1
+
+# Else branch evaluation with case expression, 2 when branches, null input
+query I
+SELECT CASE a WHEN 'a' THEN 0 WHEN 'b' THEN 1 ELSE 2 END FROM (VALUES (NULL)) t(a)
+----
+2
+
+# Else branch evaluation without case expression, 1 when branch, null input
+query I
+SELECT CASE WHEN a = 'a' THEN 0 ELSE 1 END FROM (VALUES (NULL)) t(a)
+----
+1
+
+# Else branch evaluation without case expression, 2 when branches, null input
+query I
+SELECT CASE WHEN a = 'a' THEN 0 WHEN a = 'b' THEN 1 ELSE 2 END FROM (VALUES (NULL)) t(a)
+----
+2
+
+# Else branch evaluation with case expression, 1 when branch, non-null input
+query I
+SELECT CASE a WHEN 'a' THEN 0 ELSE 1 END FROM (VALUES ('z')) t(a)
+----
+1
+
+# Else branch evaluation with case expression, 2 when branches, non-null input
+query I
+SELECT CASE a WHEN 'a' THEN 0 WHEN 'b' THEN 1 ELSE 2 END FROM (VALUES ('z')) t(a)
+----
+2
+
+# Else branch evaluation without case expression, 1 when branch, non-null input
+query I
+SELECT CASE WHEN a = 'a' THEN 0 ELSE 1 END FROM (VALUES ('z')) t(a)
+----
+1
+
+# Else branch evaluation without case expression, 2 when branches, non-null input
+query I
+SELECT CASE WHEN a = 'a' THEN 0 WHEN a = 'b' THEN 1 ELSE 2 END FROM (VALUES ('z')) t(a)
+----
+2
+
+# Else branch evaluation with case expression, 1 when branch, mixed input
+query I
+SELECT CASE a WHEN 'a' THEN 0 ELSE 1 END FROM (VALUES (NULL), ('z')) t(a)
+----
+1
+1
+
+# Else branch evaluation with case expression, 2 when branches, mixed input
+query I
+SELECT CASE a WHEN 'a' THEN 0 WHEN 'b' THEN 1 ELSE 2 END FROM (VALUES (NULL), ('z')) t(a)
+----
+2
+2
+
+# Else branch evaluation without case expression, 1 when branch, mixed input
+query I
+SELECT CASE WHEN a = 'a' THEN 0 ELSE 1 END FROM (VALUES (NULL), ('z')) t(a)
+----
+1
+1
+
+# Else branch evaluation without case expression, 2 when branches, mixed input
+query I
+SELECT CASE WHEN a = 'a' THEN 0 WHEN a = 'b' THEN 1 ELSE 2 END FROM (VALUES (NULL), ('z')) t(a)
+----
+2
+2
diff --git a/datafusion/sqllogictest/test_files/coalesce.slt b/datafusion/sqllogictest/test_files/coalesce.slt
index 9740bade5e27..e34a601851d7 100644
--- a/datafusion/sqllogictest/test_files/coalesce.slt
+++ b/datafusion/sqllogictest/test_files/coalesce.slt
@@ -199,14 +199,14 @@ select
   coalesce(array[1, 2], array[3, 4]),
   arrow_typeof(coalesce(array[1, 2], array[3, 4]));
 ----
-[1, 2] List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+[1, 2] List(nullable Int64)
 
 query ?T
 select
   coalesce(null, array[3, 4]),
   arrow_typeof(coalesce(array[1, 2], array[3, 4]));
 ----
-[3, 4] List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+[3, 4] List(nullable Int64)
 
 # coalesce with array
 query ?T
@@ -214,7 +214,7 @@ select
   coalesce(array[1, 2], array[arrow_cast(3, 'Int32'), arrow_cast(4, 'Int32')]),
   arrow_typeof(coalesce(array[1, 2], array[arrow_cast(3, 'Int32'), arrow_cast(4, 'Int32')]));
 ----
-[1, 2] List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+[1, 2] List(nullable Int64)
 
 # test dict(int32, utf8)
 statement ok
diff --git a/datafusion/sqllogictest/test_files/copy.slt b/datafusion/sqllogictest/test_files/copy.slt
index 096cde86f26f..9af0dc63936a 100644
--- a/datafusion/sqllogictest/test_files/copy.slt
+++ b/datafusion/sqllogictest/test_files/copy.slt
@@ -426,6 +426,21 @@ select * from validate_parquet_single;
 1 Foo
 2 Bar
 
+# copy 0 rows to a single parquet file output
+query I
+COPY (SELECT 1 AS id WHERE FALSE) TO 'test_files/scratch/copy/table_no_rows.parquet';
+----
+0
+
+statement ok
+CREATE EXTERNAL TABLE validate_parquet_single_no_rows STORED AS PARQUET LOCATION 'test_files/scratch/copy/table_no_rows.parquet';
+
+# validate the parquet file contains 0 rows.
+query I
+SELECT count(id) FROM validate_parquet_single_no_rows;
+----
+0
+
 # copy from table to folder of compressed json files
 query I
 COPY source_table  to 'test_files/scratch/copy/table_json_gz' STORED AS JSON OPTIONS ('format.compression' gzip);
diff --git a/datafusion/sqllogictest/test_files/count_star_rule.slt b/datafusion/sqllogictest/test_files/count_star_rule.slt
index 826742267290..b78c021a565c 100644
--- a/datafusion/sqllogictest/test_files/count_star_rule.slt
+++ b/datafusion/sqllogictest/test_files/count_star_rule.slt
@@ -88,7 +88,7 @@ logical_plan
 03)----TableScan: t1 projection=[a]
 physical_plan
 01)ProjectionExec: expr=[a@0 as a, count(Int64(1)) PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as count_a]
-02)--WindowAggExec: wdw=[count(Int64(1)) PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(Int64(1)) PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+02)--WindowAggExec: wdw=[count(Int64(1)) PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(Int64(1)) PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 03)----SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[false]
 04)------DataSourceExec: partitions=1, partition_sizes=[1]
 
diff --git a/datafusion/sqllogictest/test_files/current_date_timezone.slt b/datafusion/sqllogictest/test_files/current_date_timezone.slt
index b30373acfaa0..1b9c3cddeece 100644
--- a/datafusion/sqllogictest/test_files/current_date_timezone.slt
+++ b/datafusion/sqllogictest/test_files/current_date_timezone.slt
@@ -19,64 +19,62 @@
 ## current_date with timezone tests
 ##########
 
-# CI Fails https://github.com/apache/datafusion/issues/18062
-
 # Test 1: Verify current_date is consistent within the same query (default UTC)
-# query B
-# SELECT current_date() = current_date();
-# ----
-# true
+query B
+SELECT current_date() = current_date();
+----
+true
 
 # Test 2: Verify alias 'today' works the same as current_date
-# query B
-# SELECT current_date() = today();
-# ----
-# true
+query B
+SELECT current_date() = today();
+----
+true
 
 # Test 3: Set timezone to +05:00 and verify current_date is still stable
-# statement ok
-# SET datafusion.execution.time_zone = '+05:00';
+statement ok
+SET datafusion.execution.time_zone = '+05:00';
 
-# query B
-# SELECT current_date() = current_date();
-# ----
-# true
+query B
+SELECT current_date() = current_date();
+----
+true
 
-# Test 4: Verify current_date matches cast(now() as date) in the same timezone
-# query B
-# SELECT current_date() = cast(now() as date);
-# ----
-# true
+#Test 4: Verify current_date matches cast(now() as date) in the same timezone
+query B
+SELECT current_date() = cast(now() as date);
+----
+true
 
 # Test 5: Test with negative offset timezone
-# statement ok
-# SET datafusion.execution.time_zone = '-08:00';
+statement ok
+SET datafusion.execution.time_zone = '-08:00';
 
-# query B
-# SELECT current_date() = today();
-# ----
-# true
+query B
+SELECT current_date() = today();
+----
+true
 
 # Test 6: Test with named timezone (America/New_York)
-# statement ok
-# SET datafusion.execution.time_zone = 'America/New_York';
+statement ok
+SET datafusion.execution.time_zone = 'America/New_York';
 
-# query B
-# SELECT current_date() = current_date();
-# ----
-# true
+query B
+SELECT current_date() = current_date();
+----
+true
 
 # Test 7: Verify date type is preserved
-# query T
-# SELECT arrow_typeof(current_date());
-# ----
-# Date32
+query T
+SELECT arrow_typeof(current_date());
+----
+Date32
 
 # Test 8: Reset to UTC
-# statement ok
-# SET datafusion.execution.time_zone = '+00:00';
+statement ok
+SET datafusion.execution.time_zone = '+00:00';
 
-# query B
-# SELECT current_date() = today();
-# ----
-# true
+query B
+SELECT current_date() = today();
+----
+true
diff --git a/datafusion/sqllogictest/test_files/current_time_timezone.slt b/datafusion/sqllogictest/test_files/current_time_timezone.slt
new file mode 100644
index 000000000000..c80c4b51d5ac
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/current_time_timezone.slt
@@ -0,0 +1,100 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+##########
+## current_time with timezone tests
+##########
+
+# Test 1: Verify current_time is consistent within the same query (default UTC)
+query B
+SELECT current_time() = current_time();
+----
+true
+
+# Test 2: Verify data type is correct
+query T
+SELECT arrow_typeof(current_time());
+----
+Time64(ns)
+
+# Test 3: Set timezone to +08:00 and verify current_time is still stable
+statement ok
+SET datafusion.execution.time_zone = '+08:00';
+
+query B
+SELECT current_time() = current_time();
+----
+true
+
+# Test 4: Verify current_time returns Time64 type in different timezone
+query T
+SELECT arrow_typeof(current_time());
+----
+Time64(ns)
+
+# Test 5: Test with negative offset timezone
+statement ok
+SET datafusion.execution.time_zone = '-05:00';
+
+query B
+SELECT current_time() = current_time();
+----
+true
+
+# Test 6: Test with named timezone (America/New_York)
+statement ok
+SET datafusion.execution.time_zone = 'America/New_York';
+
+query B
+SELECT current_time() = current_time();
+----
+true
+
+# Test 7: Verify current_time is stable within a query
+query B
+SELECT 
+  current_time() = current_time() AND
+  current_time() = current_time();
+----
+true
+
+# Test 8: Reset to UTC
+statement ok
+SET datafusion.execution.time_zone = '+00:00';
+
+query B
+SELECT current_time() = current_time();
+----
+true
+
+# Test 9: Verify current_time with Asia/Tokyo timezone
+statement ok
+SET datafusion.execution.time_zone = 'Asia/Tokyo';
+
+query B
+SELECT current_time() = current_time();
+----
+true
+
+# Test 10: Verify current_time with Europe/London timezone
+statement ok
+SET datafusion.execution.time_zone = 'Europe/London';
+
+query B
+SELECT current_time() = current_time();
+----
+true
diff --git a/datafusion/sqllogictest/test_files/dates.slt b/datafusion/sqllogictest/test_files/dates.slt
index 2e91a0363db0..a309be114809 100644
--- a/datafusion/sqllogictest/test_files/dates.slt
+++ b/datafusion/sqllogictest/test_files/dates.slt
@@ -85,9 +85,14 @@ g
 h
 
 ## Plan error when compare Utf8 and timestamp in where clause
-statement error DataFusion error: type_coercion\ncaused by\nError during planning: Cannot coerce arithmetic expression Timestamp\(Nanosecond, Some\("\+00:00"\)\) \+ Utf8 to valid types
+statement error
 select i_item_desc from test
 where d3_date > now() + '5 days';
+----
+DataFusion error: type_coercion
+caused by
+Error during planning: Cannot coerce arithmetic expression Timestamp(ns, "+00:00") + Utf8 to valid types
+
 
 # DATE minus DATE
 # https://github.com/apache/arrow-rs/issues/4383
diff --git a/datafusion/sqllogictest/test_files/ddl.slt b/datafusion/sqllogictest/test_files/ddl.slt
index 03ef08e1a5f8..64c78284594f 100644
--- a/datafusion/sqllogictest/test_files/ddl.slt
+++ b/datafusion/sqllogictest/test_files/ddl.slt
@@ -312,7 +312,7 @@ DROP TABLE aggregate_simple
 
 # Arrow format
 statement ok
-CREATE external table arrow_simple STORED as ARROW LOCATION '../core/tests/data/example.arrow';
+CREATE external table arrow_simple STORED as ARROW LOCATION '../datasource-arrow/tests/data/example.arrow';
 
 query ITB rowsort
 SELECT * FROM arrow_simple order by f1 LIMIT 1
@@ -796,7 +796,7 @@ logical_plan
 02)--Values: (Int64(1), Int64(2), Int64(3))
 
 query TT
-explain CREATE EXTERNAL TEMPORARY TABLE tty STORED as ARROW LOCATION '../core/tests/data/example.arrow';
+explain CREATE EXTERNAL TEMPORARY TABLE tty STORED as ARROW LOCATION '../datasource-arrow/tests/data/example.arrow';
 ----
 logical_plan CreateExternalTable: Bare { table: "tty" }
 
@@ -804,7 +804,7 @@ statement ok
 set datafusion.explain.logical_plan_only=false;
 
 statement error DataFusion error: This feature is not implemented: Temporary tables not supported
-CREATE EXTERNAL TEMPORARY TABLE tty STORED as ARROW LOCATION '../core/tests/data/example.arrow';
+CREATE EXTERNAL TEMPORARY TABLE tty STORED as ARROW LOCATION '../datasource-arrow/tests/data/example.arrow';
 
 statement error DataFusion error: This feature is not implemented: Temporary views not supported
 CREATE TEMPORARY VIEW y AS VALUES (1,2,3);
@@ -867,7 +867,7 @@ query TTTTTT
 show columns FROM table_with_pk;
 ----
 datafusion public table_with_pk sn Int32 NO
-datafusion public table_with_pk ts Timestamp(Nanosecond, Some("+00:00")) NO
+datafusion public table_with_pk ts Timestamp(ns, "+00:00") NO
 datafusion public table_with_pk currency Utf8View NO
 datafusion public table_with_pk amount Float32 YES
 
diff --git a/datafusion/sqllogictest/test_files/describe.slt b/datafusion/sqllogictest/test_files/describe.slt
index de5208b5483a..88347965c67a 100644
--- a/datafusion/sqllogictest/test_files/describe.slt
+++ b/datafusion/sqllogictest/test_files/describe.slt
@@ -83,7 +83,7 @@ float_col Float32 YES
 double_col Float64 YES
 date_string_col Utf8View YES
 string_col Utf8View YES
-timestamp_col Timestamp(Nanosecond, None) YES
+timestamp_col Timestamp(ns) YES
 year Int32 YES
 month Int32 YES
 
@@ -116,3 +116,29 @@ col1 Int32 YES
 # Test error cases
 statement error
 DESC nonexistent_table;
+
+##########
+# Describe statement
+##########
+
+# Test describing the schema of a simple statement
+query TTT
+DESCRIBE SELECT 1;
+----
+Int64(1) Int64 NO
+
+# Insert some data into the existing test table...
+statement ok
+INSERT INTO test_desc_table (id, name) VALUES (1, 'Alice'), (2, 'Bob'), (3, 'Charlie'), (4, 'Alice');
+
+# ... and describe the schema of a more complex query
+query TTT
+DESCRIBE SELECT name, COUNT(*) AS name_count FROM test_desc_table
+    GROUP BY name HAVING COUNT(*) > 1 ORDER BY name_count DESC;
+----
+name Utf8View YES
+name_count Int64 NO
+
+# Describing a statement that's not a query is not supported
+statement error Describing statements other than SELECT not supported
+DESCRIBE CREATE TABLE test_desc_table (id INT, name VARCHAR);
diff --git a/datafusion/sqllogictest/test_files/dictionary.slt b/datafusion/sqllogictest/test_files/dictionary.slt
index 9e8a39494095..fd9a7fb9ce44 100644
--- a/datafusion/sqllogictest/test_files/dictionary.slt
+++ b/datafusion/sqllogictest/test_files/dictionary.slt
@@ -85,7 +85,7 @@ f1 Float64 YES
 f2 Utf8 YES
 f3 Utf8 YES
 f4 Float64 YES
-time Timestamp(Nanosecond, None) YES
+time Timestamp(ns) YES
 
 # in list with dictionary input
 query BBB
@@ -157,7 +157,7 @@ DESCRIBE m2;
 type Dictionary(Int32, Utf8) YES
 tag_id Dictionary(Int32, Utf8) YES
 f5 Float64 YES
-time Timestamp(Nanosecond, None) YES
+time Timestamp(ns) YES
 
 query I
 select count(*) from m1 where tag_id = '1000' and time < '2024-01-03T14:46:35+01:00';
diff --git a/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt b/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt
new file mode 100644
index 000000000000..e5cd6d88b08f
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt
@@ -0,0 +1,339 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Tests for dynamic filter pushdown configuration options
+# - enable_topk_dynamic_filter_pushdown (for TopK dynamic filters)
+# - enable_join_dynamic_filter_pushdown (for Join dynamic filters)
+# - enable_dynamic_filter_pushdown (controls both)
+
+# Setup: Create parquet test files
+statement ok
+CREATE TABLE test_data(id INT, value INT, name VARCHAR) AS VALUES
+(1, 100, 'a'),
+(2, 200, 'b'),
+(3, 300, 'c'),
+(4, 400, 'd'),
+(5, 500, 'e'),
+(6, 600, 'f'),
+(7, 700, 'g'),
+(8, 800, 'h'),
+(9, 900, 'i'),
+(10, 1000, 'j');
+
+statement ok
+CREATE TABLE join_left(id INT, data VARCHAR) AS VALUES
+(1, 'left1'),
+(2, 'left2'),
+(3, 'left3'),
+(4, 'left4'),
+(5, 'left5');
+
+statement ok
+CREATE TABLE join_right(id INT, info VARCHAR) AS VALUES
+(1, 'right1'),
+(3, 'right3'),
+(5, 'right5');
+
+# Copy data to parquet files
+query I
+COPY test_data TO 'test_files/scratch/dynamic_filter_pushdown_config/test_data.parquet' STORED AS PARQUET;
+----
+10
+
+query I
+COPY join_left TO 'test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet' STORED AS PARQUET;
+----
+5
+
+query I
+COPY join_right TO 'test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet' STORED AS PARQUET;
+----
+3
+
+# Create external tables from parquet files
+statement ok
+CREATE EXTERNAL TABLE test_parquet(id INT, value INT, name VARCHAR)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/dynamic_filter_pushdown_config/test_data.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE left_parquet(id INT, data VARCHAR)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE right_parquet(id INT, info VARCHAR)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet';
+
+# Test 1: TopK dynamic filter pushdown with Parquet
+query TT
+EXPLAIN SELECT * FROM test_parquet ORDER BY value DESC LIMIT 3;
+----
+logical_plan
+01)Sort: test_parquet.value DESC NULLS FIRST, fetch=3
+02)--TableScan: test_parquet projection=[id, value, name]
+physical_plan
+01)SortExec: TopK(fetch=3), expr=[value@1 DESC], preserve_partitioning=[false]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/test_data.parquet]]}, projection=[id, value, name], file_type=parquet, predicate=DynamicFilter [ empty ]
+
+# Disable TopK dynamic filter pushdown
+statement ok
+SET datafusion.optimizer.enable_topk_dynamic_filter_pushdown = false;
+
+query TT
+EXPLAIN SELECT * FROM test_parquet ORDER BY value DESC LIMIT 3;
+----
+logical_plan
+01)Sort: test_parquet.value DESC NULLS FIRST, fetch=3
+02)--TableScan: test_parquet projection=[id, value, name]
+physical_plan
+01)SortExec: TopK(fetch=3), expr=[value@1 DESC], preserve_partitioning=[false]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/test_data.parquet]]}, projection=[id, value, name], file_type=parquet
+
+# Re-enable for next tests
+statement ok
+SET datafusion.optimizer.enable_topk_dynamic_filter_pushdown = true;
+
+# Test 2: Join dynamic filter pushdown with Parquet
+query TT
+EXPLAIN SELECT l.*, r.info
+FROM left_parquet l
+INNER JOIN right_parquet r ON l.id = r.id;
+----
+logical_plan
+01)Projection: l.id, l.data, r.info
+02)--Inner Join: l.id = r.id
+03)----SubqueryAlias: l
+04)------TableScan: left_parquet projection=[id, data]
+05)----SubqueryAlias: r
+06)------TableScan: right_parquet projection=[id, info]
+physical_plan
+01)ProjectionExec: expr=[id@1 as id, data@2 as data, info@0 as info]
+02)--CoalesceBatchesExec: target_batch_size=8192
+03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3]
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet
+05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet, predicate=DynamicFilter [ empty ]
+
+# Disable Join dynamic filter pushdown
+statement ok
+SET datafusion.optimizer.enable_join_dynamic_filter_pushdown = false;
+
+# Without Join filter, HashJoin should NOT have filter=DynamicFilter
+query TT
+EXPLAIN SELECT l.*, r.info
+FROM left_parquet l
+INNER JOIN right_parquet r ON l.id = r.id;
+----
+logical_plan
+01)Projection: l.id, l.data, r.info
+02)--Inner Join: l.id = r.id
+03)----SubqueryAlias: l
+04)------TableScan: left_parquet projection=[id, data]
+05)----SubqueryAlias: r
+06)------TableScan: right_parquet projection=[id, info]
+physical_plan
+01)ProjectionExec: expr=[id@1 as id, data@2 as data, info@0 as info]
+02)--CoalesceBatchesExec: target_batch_size=8192
+03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3]
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet
+05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet
+
+# Re-enable for next tests
+statement ok
+SET datafusion.optimizer.enable_join_dynamic_filter_pushdown = true;
+
+# Test 3: Test independent control
+
+# Disable TopK, keep Join enabled
+statement ok
+SET datafusion.optimizer.enable_topk_dynamic_filter_pushdown = false;
+
+statement ok
+SET datafusion.optimizer.enable_join_dynamic_filter_pushdown = true;
+
+# Join should still have dynamic filter
+query TT
+EXPLAIN SELECT l.*, r.info
+FROM left_parquet l
+INNER JOIN right_parquet r ON l.id = r.id;
+----
+logical_plan
+01)Projection: l.id, l.data, r.info
+02)--Inner Join: l.id = r.id
+03)----SubqueryAlias: l
+04)------TableScan: left_parquet projection=[id, data]
+05)----SubqueryAlias: r
+06)------TableScan: right_parquet projection=[id, info]
+physical_plan
+01)ProjectionExec: expr=[id@1 as id, data@2 as data, info@0 as info]
+02)--CoalesceBatchesExec: target_batch_size=8192
+03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3]
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet
+05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet, predicate=DynamicFilter [ empty ]
+
+# Enable TopK, disable Join
+statement ok
+SET datafusion.optimizer.enable_topk_dynamic_filter_pushdown = true;
+
+statement ok
+SET datafusion.optimizer.enable_join_dynamic_filter_pushdown = false;
+
+# Join should NOT have dynamic filter
+query TT
+EXPLAIN SELECT l.*, r.info
+FROM left_parquet l
+INNER JOIN right_parquet r ON l.id = r.id;
+----
+logical_plan
+01)Projection: l.id, l.data, r.info
+02)--Inner Join: l.id = r.id
+03)----SubqueryAlias: l
+04)------TableScan: left_parquet projection=[id, data]
+05)----SubqueryAlias: r
+06)------TableScan: right_parquet projection=[id, info]
+physical_plan
+01)ProjectionExec: expr=[id@1 as id, data@2 as data, info@0 as info]
+02)--CoalesceBatchesExec: target_batch_size=8192
+03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3]
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet
+05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet
+
+# Test 4: Backward compatibility
+
+# First, set both new configs to specific values
+statement ok
+SET datafusion.optimizer.enable_topk_dynamic_filter_pushdown = true;
+
+statement ok
+SET datafusion.optimizer.enable_join_dynamic_filter_pushdown = true;
+
+statement ok
+set datafusion.catalog.information_schema = true
+
+# Setting the config should override both
+statement ok
+SET datafusion.optimizer.enable_dynamic_filter_pushdown = false;
+
+# Verify both configs are now false
+query T
+SELECT value FROM information_schema.df_settings
+WHERE name = 'datafusion.optimizer.enable_topk_dynamic_filter_pushdown';
+----
+false
+
+query T
+SELECT value FROM information_schema.df_settings
+WHERE name = 'datafusion.optimizer.enable_join_dynamic_filter_pushdown';
+----
+false
+
+statement ok
+set datafusion.catalog.information_schema = false
+
+# Join should NOT have dynamic filter
+query TT
+EXPLAIN SELECT l.*, r.info
+FROM left_parquet l
+INNER JOIN right_parquet r ON l.id = r.id;
+----
+logical_plan
+01)Projection: l.id, l.data, r.info
+02)--Inner Join: l.id = r.id
+03)----SubqueryAlias: l
+04)------TableScan: left_parquet projection=[id, data]
+05)----SubqueryAlias: r
+06)------TableScan: right_parquet projection=[id, info]
+physical_plan
+01)ProjectionExec: expr=[id@1 as id, data@2 as data, info@0 as info]
+02)--CoalesceBatchesExec: target_batch_size=8192
+03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3]
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet
+05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet
+
+# Re-enable
+statement ok
+SET datafusion.optimizer.enable_dynamic_filter_pushdown = true;
+
+statement ok
+set datafusion.catalog.information_schema = true
+
+# Verify both configs are now true
+query T
+SELECT value FROM information_schema.df_settings
+WHERE name = 'datafusion.optimizer.enable_topk_dynamic_filter_pushdown';
+----
+true
+
+query T
+SELECT value FROM information_schema.df_settings
+WHERE name = 'datafusion.optimizer.enable_join_dynamic_filter_pushdown';
+----
+true
+
+statement ok
+set datafusion.catalog.information_schema = false
+
+# Join should have dynamic filter again
+query TT
+EXPLAIN SELECT l.*, r.info
+FROM left_parquet l
+INNER JOIN right_parquet r ON l.id = r.id;
+----
+logical_plan
+01)Projection: l.id, l.data, r.info
+02)--Inner Join: l.id = r.id
+03)----SubqueryAlias: l
+04)------TableScan: left_parquet projection=[id, data]
+05)----SubqueryAlias: r
+06)------TableScan: right_parquet projection=[id, info]
+physical_plan
+01)ProjectionExec: expr=[id@1 as id, data@2 as data, info@0 as info]
+02)--CoalesceBatchesExec: target_batch_size=8192
+03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3]
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet
+05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet, predicate=DynamicFilter [ empty ]
+
+# Cleanup
+
+statement ok
+DROP TABLE test_data;
+
+statement ok
+DROP TABLE join_left;
+
+statement ok
+DROP TABLE join_right;
+
+statement ok
+DROP TABLE test_parquet;
+
+statement ok
+DROP TABLE left_parquet;
+
+statement ok
+DROP TABLE right_parquet;
+
+# Reset configs to defaults
+statement ok
+SET datafusion.optimizer.enable_topk_dynamic_filter_pushdown = true;
+
+statement ok
+SET datafusion.optimizer.enable_join_dynamic_filter_pushdown = true;
+
+statement ok
+SET datafusion.optimizer.enable_dynamic_filter_pushdown = true;
diff --git a/datafusion/sqllogictest/test_files/encoding.slt b/datafusion/sqllogictest/test_files/encoding.slt
index 960e81f4d14c..300294f6e115 100644
--- a/datafusion/sqllogictest/test_files/encoding.slt
+++ b/datafusion/sqllogictest/test_files/encoding.slt
@@ -23,7 +23,7 @@ CREATE TABLE test(
   hex_field TEXT
 ) as VALUES
   (0, 'abc',  encode('abc', 'base64'), encode('abc', 'hex')),
-  (1, 'qweqwe', encode('qweqwe', 'base64'), encode('qweqwe', 'hex')),
+  (1, 'qweqw', encode('qweqw', 'base64') || '=', encode('qweqw', 'hex')),
   (2, NULL, NULL, NULL),
   (3, X'8f50d3f60eae370ddbf85c86219c55108a350165', encode('8f50d3f60eae370ddbf85c86219c55108a350165', 'base64'), encode('8f50d3f60eae370ddbf85c86219c55108a350165', 'hex'))
 ;
@@ -52,7 +52,7 @@ query T
 SELECT encode(bin_field, 'hex') FROM test ORDER BY num;
 ----
 616263
-717765717765
+7177657177
 NULL
 8f50d3f60eae370ddbf85c86219c55108a350165
 
@@ -60,7 +60,7 @@ query T
 SELECT arrow_cast(decode(base64_field, 'base64'), 'Utf8') FROM test ORDER BY num;
 ----
 abc
-qweqwe
+qweqw
 NULL
 8f50d3f60eae370ddbf85c86219c55108a350165
 
@@ -68,7 +68,7 @@ query T
 SELECT arrow_cast(decode(hex_field, 'hex'), 'Utf8') FROM test ORDER BY num;
 ----
 abc
-qweqwe
+qweqw
 NULL
 8f50d3f60eae370ddbf85c86219c55108a350165
 
@@ -110,7 +110,6 @@ SELECT
   column1_utf8view,
   encode(column1_utf8view, 'base64') AS column1_base64,
   encode(column1_utf8view, 'hex') AS column1_hex,
-  
   column2_utf8view,
   encode(column2_utf8view, 'base64') AS column2_base64,
   encode(column2_utf8view, 'hex') AS column2_hex
diff --git a/datafusion/sqllogictest/test_files/expr/date_part.slt b/datafusion/sqllogictest/test_files/expr/date_part.slt
index 64f16f72421a..bee8602d80bd 100644
--- a/datafusion/sqllogictest/test_files/expr/date_part.slt
+++ b/datafusion/sqllogictest/test_files/expr/date_part.slt
@@ -1005,10 +1005,10 @@ SELECT extract(day from arrow_cast(864000, 'Duration(Second)'))
 ----
 10
 
-query error DataFusion error: Arrow error: Compute error: Month does not support: Duration\(Second\)
+query error DataFusion error: Arrow error: Compute error: Month does not support: Duration\(s\)
 SELECT extract(month from arrow_cast(864000, 'Duration(Second)'))
 
-query error DataFusion error: Arrow error: Compute error: Year does not support: Duration\(Second\)
+query error DataFusion error: Arrow error: Compute error: Year does not support: Duration\(s\)
 SELECT extract(year from arrow_cast(864000, 'Duration(Second)'))
 
 query I
diff --git a/datafusion/sqllogictest/test_files/float16.slt b/datafusion/sqllogictest/test_files/float16.slt
index 5e59c730f078..699eb81844a4 100644
--- a/datafusion/sqllogictest/test_files/float16.slt
+++ b/datafusion/sqllogictest/test_files/float16.slt
@@ -51,13 +51,14 @@ NULL NULL NULL NULL NULL NULL
 NaN NaN NaN NaN NaN NaN
 
 # Try coercing with literal NULL
-query error
+query R
 select column1 + NULL from float16s;
 ----
-DataFusion error: type_coercion
-caused by
-Error during planning: Cannot automatically convert Null to Float16
-
+NULL
+NULL
+NULL
+NULL
+NULL
 
 # Test coercions with equality
 query BBBBBB
@@ -78,11 +79,14 @@ false false false false false false
 
 
 # Try coercing with literal NULL
-query error
+query B
 select column1 = NULL from float16s;
 ----
-DataFusion error: Error during planning: Cannot infer common argument type for comparison operation Float16 = Null
-
+NULL
+NULL
+NULL
+NULL
+NULL
 
 # Cleanup
 statement ok
diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt
index b72f73d44698..08636b482e38 100644
--- a/datafusion/sqllogictest/test_files/group_by.slt
+++ b/datafusion/sqllogictest/test_files/group_by.slt
@@ -3646,7 +3646,7 @@ physical_plan
 07)------------AggregateExec: mode=Partial, gby=[sn@2 as sn, zip_code@0 as zip_code, country@1 as country, ts@3 as ts, currency@4 as currency, amount@5 as amount, sum_amount@6 as sum_amount], aggr=[]
 08)--------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
 09)----------------ProjectionExec: expr=[zip_code@0 as zip_code, country@1 as country, sn@2 as sn, ts@3 as ts, currency@4 as currency, amount@5 as amount, sum(l.amount) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@6 as sum_amount]
-10)------------------BoundedWindowAggExec: wdw=[sum(l.amount) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(l.amount) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+10)------------------BoundedWindowAggExec: wdw=[sum(l.amount) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(l.amount) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Float64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 11)--------------------DataSourceExec: partitions=1, partition_sizes=[2]
 
 
@@ -3943,7 +3943,7 @@ physical_plan
 04)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(d@1, d@1)], filter=CAST(a@0 AS Int64) >= CAST(a@1 AS Int64) - 10, projection=[a@0, d@1, row_n@4]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], file_type=csv, has_header=true
 06)--------ProjectionExec: expr=[a@0 as a, d@1 as d, row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as row_n]
-07)----------BoundedWindowAggExec: wdw=[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+07)----------BoundedWindowAggExec: wdw=[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 08)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 # reset partition number to 8.
@@ -4065,7 +4065,7 @@ logical_plan
 05)--------TableScan: multiple_ordered_table_with_pk projection=[b, c, d]
 physical_plan
 01)ProjectionExec: expr=[c@0 as c, sum1@2 as sum1, sum(multiple_ordered_table_with_pk.b) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@3 as sumb]
-02)--WindowAggExec: wdw=[sum(multiple_ordered_table_with_pk.b) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(multiple_ordered_table_with_pk.b) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+02)--WindowAggExec: wdw=[sum(multiple_ordered_table_with_pk.b) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(multiple_ordered_table_with_pk.b) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 03)----ProjectionExec: expr=[c@0 as c, b@1 as b, sum(multiple_ordered_table_with_pk.d)@2 as sum1]
 04)------AggregateExec: mode=Single, gby=[c@1 as c, b@0 as b], aggr=[sum(multiple_ordered_table_with_pk.d)], ordering_mode=PartiallySorted([0])
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[b, c, d], output_ordering=[c@1 ASC NULLS LAST], constraints=[PrimaryKey([3])], file_type=csv, has_header=true
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
index 670992633bb8..b15ec026372d 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -269,6 +269,7 @@ datafusion.execution.split_file_groups_by_statistics false
 datafusion.execution.target_partitions 7
 datafusion.execution.time_zone +00:00
 datafusion.execution.use_row_number_estimates_to_optimize_partitioning false
+datafusion.explain.analyze_level dev
 datafusion.explain.format indent
 datafusion.explain.logical_plan_only false
 datafusion.explain.physical_plan_only false
@@ -289,8 +290,11 @@ datafusion.optimizer.allow_symmetric_joins_without_pruning true
 datafusion.optimizer.default_filter_selectivity 20
 datafusion.optimizer.enable_distinct_aggregation_soft_limit true
 datafusion.optimizer.enable_dynamic_filter_pushdown true
+datafusion.optimizer.enable_join_dynamic_filter_pushdown true
+datafusion.optimizer.enable_piecewise_merge_join false
 datafusion.optimizer.enable_round_robin_repartition true
 datafusion.optimizer.enable_topk_aggregation true
+datafusion.optimizer.enable_topk_dynamic_filter_pushdown true
 datafusion.optimizer.enable_window_limits true
 datafusion.optimizer.expand_views_at_output false
 datafusion.optimizer.filter_null_join_keys false
@@ -385,6 +389,7 @@ datafusion.execution.split_file_groups_by_statistics false Attempt to eliminate
 datafusion.execution.target_partitions 7 Number of partitions for query execution. Increasing partitions can increase concurrency. Defaults to the number of CPU cores on the system
 datafusion.execution.time_zone +00:00 The default time zone Some functions, e.g. `EXTRACT(HOUR from SOME_TIME)`, shift the underlying datetime according to this time zone, and then extract the hour
 datafusion.execution.use_row_number_estimates_to_optimize_partitioning false Should DataFusion use row number estimates at the input to decide whether increasing parallelism is beneficial or not. By default, only exact row numbers (not estimates) are used for this decision. Setting this flag to `true` will likely produce better plans. if the source of statistics is accurate. We plan to make this the default in the future.
+datafusion.explain.analyze_level dev Verbosity level for "EXPLAIN ANALYZE". Default is "dev" "summary" shows common metrics for high-level insights. "dev" provides deep operator-level introspection for developers.
 datafusion.explain.format indent Display format of explain. Default is "indent". When set to "tree", it will print the plan in a tree-rendered format.
 datafusion.explain.logical_plan_only false When set to true, the explain statement will only print logical plans
 datafusion.explain.physical_plan_only false When set to true, the explain statement will only print physical plans
@@ -404,9 +409,12 @@ datafusion.format.types_info false Show types in visual representation batches
 datafusion.optimizer.allow_symmetric_joins_without_pruning true Should DataFusion allow symmetric hash joins for unbounded data sources even when its inputs do not have any ordering or filtering If the flag is not enabled, the SymmetricHashJoin operator will be unable to prune its internal buffers, resulting in certain join types - such as Full, Left, LeftAnti, LeftSemi, Right, RightAnti, and RightSemi - being produced only at the end of the execution. This is not typical in stream processing. Additionally, without proper design for long runner execution, all types of joins may encounter out-of-memory errors.
 datafusion.optimizer.default_filter_selectivity 20 The default filter selectivity used by Filter Statistics when an exact selectivity cannot be determined. Valid values are between 0 (no selectivity) and 100 (all rows are selected).
 datafusion.optimizer.enable_distinct_aggregation_soft_limit true When set to true, the optimizer will push a limit operation into grouped aggregations which have no aggregate expressions, as a soft limit, emitting groups once the limit is reached, before all rows in the group are read.
-datafusion.optimizer.enable_dynamic_filter_pushdown true When set to true attempts to push down dynamic filters generated by operators into the file scan phase. For example, for a query such as `SELECT * FROM t ORDER BY timestamp DESC LIMIT 10`, the optimizer will attempt to push down the current top 10 timestamps that the TopK operator references into the file scans. This means that if we already have 10 timestamps in the year 2025 any files that only have timestamps in the year 2024 can be skipped / pruned at various stages in the scan.
+datafusion.optimizer.enable_dynamic_filter_pushdown true When set to true attempts to push down dynamic filters generated by operators (topk & join) into the file scan phase. For example, for a query such as `SELECT * FROM t ORDER BY timestamp DESC LIMIT 10`, the optimizer will attempt to push down the current top 10 timestamps that the TopK operator references into the file scans. This means that if we already have 10 timestamps in the year 2025 any files that only have timestamps in the year 2024 can be skipped / pruned at various stages in the scan. The config will suppress `enable_join_dynamic_filter_pushdown` & `enable_topk_dynamic_filter_pushdown` So if you disable `enable_topk_dynamic_filter_pushdown`, then enable `enable_dynamic_filter_pushdown`, the `enable_topk_dynamic_filter_pushdown` will be overridden.
+datafusion.optimizer.enable_join_dynamic_filter_pushdown true When set to true, the optimizer will attempt to push down Join dynamic filters into the file scan phase.
+datafusion.optimizer.enable_piecewise_merge_join false When set to true, piecewise merge join is enabled. PiecewiseMergeJoin is currently experimental. Physical planner will opt for PiecewiseMergeJoin when there is only one range filter.
 datafusion.optimizer.enable_round_robin_repartition true When set to true, the physical plan optimizer will try to add round robin repartitioning to increase parallelism to leverage more CPU cores
 datafusion.optimizer.enable_topk_aggregation true When set to true, the optimizer will attempt to perform limit operations during aggregations, if possible
+datafusion.optimizer.enable_topk_dynamic_filter_pushdown true When set to true, the optimizer will attempt to push down TopK dynamic filters into the file scan phase.
 datafusion.optimizer.enable_window_limits true When set to true, the optimizer will attempt to push limit operations past window functions, if possible
 datafusion.optimizer.expand_views_at_output false When set to true, if the returned type is a view type then the output will be coerced to a non-view. Coerces `Utf8View` to `LargeUtf8`, and `BinaryView` to `LargeBinary`.
 datafusion.optimizer.filter_null_join_keys false When set to true, the optimizer will insert filters before a join between a nullable and non-nullable column to filter out nulls on the nullable side. This filter can add additional overhead when the file format does not fully support predicate push down.
diff --git a/datafusion/sqllogictest/test_files/information_schema_columns.slt b/datafusion/sqllogictest/test_files/information_schema_columns.slt
index d348a764fa85..c733b3baa7a4 100644
--- a/datafusion/sqllogictest/test_files/information_schema_columns.slt
+++ b/datafusion/sqllogictest/test_files/information_schema_columns.slt
@@ -42,7 +42,7 @@ my_catalog my_schema table_with_many_types float64_col 1 NULL YES Float64 NULL N
 my_catalog my_schema table_with_many_types int32_col 0 NULL NO Int32 NULL NULL 32 2 NULL NULL NULL
 my_catalog my_schema table_with_many_types large_binary_col 5 NULL NO LargeBinary NULL 9223372036854775807 NULL NULL NULL NULL NULL
 my_catalog my_schema table_with_many_types large_utf8_col 3 NULL NO LargeUtf8 NULL 9223372036854775807 NULL NULL NULL NULL NULL
-my_catalog my_schema table_with_many_types timestamp_nanos 6 NULL NO Timestamp(Nanosecond, None) NULL NULL NULL NULL NULL NULL NULL
+my_catalog my_schema table_with_many_types timestamp_nanos 6 NULL NO Timestamp(ns) NULL NULL NULL NULL NULL NULL NULL
 my_catalog my_schema table_with_many_types utf8_col 2 NULL YES Utf8 NULL 2147483647 NULL NULL NULL NULL NULL
 
 # Cleanup
diff --git a/datafusion/sqllogictest/test_files/insert.slt b/datafusion/sqllogictest/test_files/insert.slt
index 9a3c959884aa..b8b2a7c37276 100644
--- a/datafusion/sqllogictest/test_files/insert.slt
+++ b/datafusion/sqllogictest/test_files/insert.slt
@@ -68,7 +68,7 @@ physical_plan
 02)--ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@0 as field1, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@1 as field2]
 03)----SortPreservingMergeExec: [c1@2 ASC NULLS LAST]
 04)------ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, c1@0 as c1]
-05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=8192
 08)--------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
@@ -128,7 +128,7 @@ physical_plan
 01)DataSinkExec: sink=MemoryTable (partitions=1)
 02)--CoalescePartitionsExec
 03)----ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as field1, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as field2]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 05)--------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true]
 06)----------CoalesceBatchesExec: target_batch_size=8192
 07)------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
@@ -179,7 +179,7 @@ physical_plan
 02)--ProjectionExec: expr=[a1@0 as a1, a2@1 as a2]
 03)----SortPreservingMergeExec: [c1@2 ASC NULLS LAST]
 04)------ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as a1, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as a2, c1@0 as c1]
-05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=8192
 08)--------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
diff --git a/datafusion/sqllogictest/test_files/insert_to_external.slt b/datafusion/sqllogictest/test_files/insert_to_external.slt
index 075256ae4b92..dc8ef59bbedc 100644
--- a/datafusion/sqllogictest/test_files/insert_to_external.slt
+++ b/datafusion/sqllogictest/test_files/insert_to_external.slt
@@ -422,7 +422,7 @@ physical_plan
 02)--ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@0 as field1, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@1 as field2]
 03)----SortPreservingMergeExec: [c1@2 ASC NULLS LAST]
 04)------ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, c1@0 as c1]
-05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=8192
 08)--------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
@@ -483,7 +483,7 @@ physical_plan
 01)DataSinkExec: sink=ParquetSink(file_groups=[])
 02)--CoalescePartitionsExec
 03)----ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as field1, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as field2]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 05)--------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true]
 06)----------CoalesceBatchesExec: target_batch_size=8192
 07)------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
diff --git a/datafusion/sqllogictest/test_files/interval.slt b/datafusion/sqllogictest/test_files/interval.slt
index 1ef3048ddc66..8c5a4382ed2c 100644
--- a/datafusion/sqllogictest/test_files/interval.slt
+++ b/datafusion/sqllogictest/test_files/interval.slt
@@ -444,7 +444,7 @@ select '1 month'::interval + '1980-01-01T12:00:00'::timestamp;
 query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Date32 to valid types
 select '1 month'::interval - '1980-01-01'::date;
 
-query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types
+query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(ns\) to valid types
 select '1 month'::interval - '1980-01-01T12:00:00'::timestamp;
 
 # interval (array) + date / timestamp (array)
@@ -466,7 +466,7 @@ select i + ts from t;
 query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Date32 to valid types
 select i - d from t;
 
-query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types
+query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(ns\) to valid types
 select i - ts from t;
 
 # interval unit abreiviation and plurals
@@ -530,7 +530,7 @@ SELECT interval '5 day' hour
 query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Date32 to valid types
 select '1 month'::interval - d from t;
 
-query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types
+query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(ns\) to valid types
 select '1 month'::interval - ts from t;
 
 # interval + date
diff --git a/datafusion/sqllogictest/test_files/join.slt.part b/datafusion/sqllogictest/test_files/join.slt.part
index 2abe654a96c8..fe3356af88fc 100644
--- a/datafusion/sqllogictest/test_files/join.slt.part
+++ b/datafusion/sqllogictest/test_files/join.slt.part
@@ -849,9 +849,10 @@ logical_plan
 05)----TableScan: department projection=[dept_name]
 physical_plan
 01)ProjectionExec: expr=[emp_id@1 as emp_id, name@2 as name, dept_name@0 as dept_name]
-02)--NestedLoopJoinExec: join_type=Right, filter=name@0 = Alice OR name@0 = Bob
+02)--NestedLoopJoinExec: join_type=Right, filter=join_proj_push_down_1@0, projection=[dept_name@0, emp_id@1, name@2]
 03)----DataSourceExec: partitions=1, partition_sizes=[1]
-04)----DataSourceExec: partitions=1, partition_sizes=[1]
+04)----ProjectionExec: expr=[emp_id@0 as emp_id, name@1 as name, name@1 = Alice OR name@1 = Bob as join_proj_push_down_1]
+05)------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query ITT rowsort
 SELECT e.emp_id, e.name, d.dept_name
diff --git a/datafusion/sqllogictest/test_files/join_lists.slt b/datafusion/sqllogictest/test_files/join_lists.slt
new file mode 100644
index 000000000000..0a48a4f9203e
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/join_lists.slt
@@ -0,0 +1,62 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+
+## Ensure test coverage for NLJ using joining on LISTS
+
+## Reproducer for https://github.com/apache/datafusion/issues/18070
+
+statement ok
+CREATE TABLE categories_raw
+AS SELECT arrow_cast('cat_' || value, 'Utf8View')  AS category_id FROM generate_series(1, 5);
+
+statement ok
+CREATE TABLE places
+AS SELECT column1 as id, column2 as fsq_category_ids, column3 as date_refreshed
+FROM VALUES
+    (1, ['cat_1', 'cat_2', 'cat_3'], DATE '2023-05-10'),
+    (2, ['cat_4', 'cat_5'], DATE '2021-12-01'),
+    (3, ['cat_6', 'cat_7', 'cat_8', 'cat_9'], DATE '2024-01-15'); --> NOTE these categories do not exist in categories_raw
+
+
+query I
+WITH categories_arr AS (
+    SELECT array_agg(category_id) AS category_ids FROM categories_raw LIMIT 500
+)
+SELECT COUNT(*)
+    FROM places p
+    WHERE array_has_any(p.fsq_category_ids, (SELECT category_ids FROM categories_arr));
+----
+2
+
+query I
+WITH categories_arr AS (
+    SELECT array_agg(category_id) AS category_ids FROM categories_raw LIMIT 500
+)
+SELECT COUNT(*)
+    FROM places p
+    WHERE id <> 1 AND array_has_any(p.fsq_category_ids, (SELECT category_ids FROM categories_arr));
+----
+1
+
+# cleanup
+statement ok
+DROP TABLE categories_raw;
+
+statement ok
+DROP TABLE places;
diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt
index 96d2bad086e6..4bdf2e5da963 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -3199,7 +3199,7 @@ physical_plan
 04)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, rn1@5 ASC NULLS LAST
 05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
 06)----------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-07)------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+07)------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 09)----CoalesceBatchesExec: target_batch_size=2
 10)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST
@@ -3237,7 +3237,7 @@ physical_plan
 08)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, rn1@5 ASC NULLS LAST
 09)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
 10)----------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-11)------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+11)------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 12)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 statement ok
@@ -3276,14 +3276,14 @@ physical_plan
 06)----------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2
 07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
 08)--------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-09)----------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+09)----------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 10)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 11)------SortExec: expr=[a@1 ASC], preserve_partitioning=[true]
 12)--------CoalesceBatchesExec: target_batch_size=2
 13)----------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2
 14)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
 15)--------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-16)----------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+16)----------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 17)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 statement ok
@@ -3318,7 +3318,7 @@ physical_plan
 02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@1, a@1)]
 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 04)----ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-05)------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+05)------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 06)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 # hash join should propagate ordering equivalence of the right side for RIGHT ANTI join.
@@ -3345,7 +3345,7 @@ physical_plan
 02)--HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(a@0, a@1)]
 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a], output_ordering=[a@0 ASC], file_type=csv, has_header=true
 04)----ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-05)------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+05)------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 06)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 # Test ordering preservation for RIGHT join
@@ -3441,7 +3441,7 @@ physical_plan
 04)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(d@1, d@1)], filter=CAST(a@0 AS Int64) >= CAST(a@1 AS Int64) - 10, projection=[a@0, d@1, row_n@4]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], file_type=csv, has_header=true
 06)--------ProjectionExec: expr=[a@0 as a, d@1 as d, row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as row_n]
-07)----------BoundedWindowAggExec: wdw=[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+07)----------BoundedWindowAggExec: wdw=[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 08)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 # run query above in multiple partitions
@@ -3519,10 +3519,12 @@ logical_plan
 04)--SubqueryAlias: t2
 05)----TableScan: annotated_data projection=[a0, a, b, c, d]
 physical_plan
-01)NestedLoopJoinExec: join_type=Inner, filter=example(CAST(a@0 AS Float64), CAST(a@1 AS Float64)) > 3
-02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
-03)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+01)NestedLoopJoinExec: join_type=Inner, filter=example(join_proj_push_down_1@0, join_proj_push_down_2@1) > 3, projection=[a0@0, a@1, b@2, c@3, d@4, a0@6, a@7, b@8, c@9, d@10]
+02)--ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, CAST(a@1 AS Float64) as join_proj_push_down_1]
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+04)--ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, CAST(a@1 AS Float64) as join_proj_push_down_2]
+05)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+06)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 ####
 # Config teardown
@@ -4034,12 +4036,12 @@ logical_plan
 09)------------Unnest: lists[__unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int)))|depth=1] structs[]
 10)--------------Projection: generate_series(Int64(1), CAST(outer_ref(t1.t1_int) AS Int64)) AS __unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int)))
 11)----------------EmptyRelation: rows=1
-physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "t1_int", data_type: UInt32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Column { relation: Some(Bare { table: "t1" }), name: "t1_int" })
+physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "t1_int", data_type: UInt32, nullable: true }, Column { relation: Some(Bare { table: "t1" }), name: "t1_int" })
 
 
 # Test CROSS JOIN LATERAL syntax (execution)
 # TODO: https://github.com/apache/datafusion/issues/10048
-query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "t1_int", data_type: UInt32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}, Column \{ relation: Some\(Bare \{ table: "t1" \}\), name: "t1_int" \}\)
+query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "t1_int", data_type: UInt32, nullable: true \}, Column \{ relation: Some\(Bare \{ table: "t1" \}\), name: "t1_int" \}\)
 select t1_id, t1_name, i from join_t1 t1 cross join lateral (select * from unnest(generate_series(1, t1_int))) as series(i);
 
 
@@ -4059,12 +4061,12 @@ logical_plan
 09)------------Unnest: lists[__unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int)))|depth=1] structs[]
 10)--------------Projection: generate_series(Int64(1), CAST(outer_ref(t2.t1_int) AS Int64)) AS __unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int)))
 11)----------------EmptyRelation: rows=1
-physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "t1_int", data_type: UInt32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Column { relation: Some(Bare { table: "t2" }), name: "t1_int" })
+physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "t1_int", data_type: UInt32, nullable: true }, Column { relation: Some(Bare { table: "t2" }), name: "t1_int" })
 
 
 # Test INNER JOIN LATERAL syntax (execution)
 # TODO: https://github.com/apache/datafusion/issues/10048
-query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "t1_int", data_type: UInt32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}, Column \{ relation: Some\(Bare \{ table: "t2" \}\), name: "t1_int" \}\)
+query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "t1_int", data_type: UInt32, nullable: true \}, Column \{ relation: Some\(Bare \{ table: "t2" \}\), name: "t1_int" \}\)
 select t1_id, t1_name, i from join_t1 t2 inner join lateral (select * from unnest(generate_series(1, t1_int))) as series(i) on(t1_id > i);
 
 # Test RIGHT JOIN LATERAL syntax (unsupported)
@@ -4238,7 +4240,7 @@ physical_plan
 03)----DataSourceExec: partitions=1, partition_sizes=[2]
 04)----DataSourceExec: partitions=1, partition_sizes=[2]
 
-## Test join.on.is_empty() && join.filter.is_some()
+## Test join.on.is_empty() && join.filter.is_some() -> single filter now a PWMJ
 query TT
 EXPLAIN SELECT * FROM t0 FULL JOIN t1 ON t0.c2 >= t1.c2 LIMIT 2;
 ----
@@ -4669,7 +4671,7 @@ logical_plan
 05)------Subquery:
 06)--------Filter: outer_ref(j1.j1_id) < j2.j2_id
 07)----------TableScan: j2 projection=[j2_string, j2_id]
-physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" })
+physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" })
 
 query TT
 explain SELECT * FROM j1 JOIN (j2 JOIN j3 ON(j2_id = j3_id - 2)) ON(j1_id = j2_id), LATERAL (SELECT * FROM j3 WHERE j3_string = j2_string) as j4
@@ -4685,7 +4687,7 @@ logical_plan
 08)----Subquery:
 09)------Filter: j3.j3_string = outer_ref(j2.j2_string)
 10)--------TableScan: j3 projection=[j3_string, j3_id]
-physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j2_string", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Column { relation: Some(Bare { table: "j2" }), name: "j2_string" })
+physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j2_string", data_type: Utf8View, nullable: true }, Column { relation: Some(Bare { table: "j2" }), name: "j2_string" })
 
 query TT
 explain SELECT * FROM j1, LATERAL (SELECT * FROM j1, LATERAL (SELECT * FROM j2 WHERE j1_id = j2_id) as j2) as j2;
@@ -4701,7 +4703,7 @@ logical_plan
 08)----------Subquery:
 09)------------Filter: outer_ref(j1.j1_id) = j2.j2_id
 10)--------------TableScan: j2 projection=[j2_string, j2_id]
-physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" })
+physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" })
 
 query TT
 explain SELECT j1_string, j2_string FROM j1 LEFT JOIN LATERAL (SELECT * FROM j2 WHERE j1_id < j2_id) AS j2 ON(true);
@@ -4714,7 +4716,7 @@ logical_plan
 05)------Subquery:
 06)--------Filter: outer_ref(j1.j1_id) < j2.j2_id
 07)----------TableScan: j2 projection=[j2_string, j2_id]
-physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" })
+physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" })
 
 query TT
 explain SELECT * FROM j1, (j2 LEFT JOIN LATERAL (SELECT * FROM j3 WHERE j1_id + j2_id = j3_id) AS j3 ON(true));
@@ -4728,7 +4730,7 @@ logical_plan
 06)------Subquery:
 07)--------Filter: outer_ref(j1.j1_id) + outer_ref(j2.j2_id) = j3.j3_id
 08)----------TableScan: j3 projection=[j3_string, j3_id]
-physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" })
+physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" })
 
 query TT
 explain SELECT * FROM j1, LATERAL (SELECT 1) AS j2;
@@ -5191,6 +5193,40 @@ SELECT c
 8
 9
 
+# PiecewiseMergeJoin Test
+statement ok
+set datafusion.optimizer.enable_piecewise_merge_join = true;
+
+query II
+SELECT join_t1.t1_id, join_t2.t2_id
+FROM join_t1
+INNER JOIN join_t2 ON join_t1.t1_id > join_t2.t2_id
+WHERE join_t1.t1_id > 10 AND join_t2.t2_int > 1
+ORDER BY 1
+----
+22 11
+33 11
+44 11
+
+query TT
+EXPLAIN
+SELECT join_t1.t1_id, join_t2.t2_id
+FROM join_t1
+INNER JOIN join_t2 ON join_t1.t1_id > join_t2.t2_id
+WHERE join_t1.t1_id > 10 AND join_t2.t2_int > 1
+ORDER BY 1
+----
+physical_plan
+01)SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[false]
+02)--PiecewiseMergeJoin: operator=Gt, join_type=Inner, on=(t1_id > t2_id)
+03)----SortExec: expr=[t1_id@0 ASC], preserve_partitioning=[false]
+04)------CoalesceBatchesExec: target_batch_size=3
+05)--------FilterExec: t1_id@0 > 10
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
+07)----CoalesceBatchesExec: target_batch_size=3
+08)------FilterExec: t2_int@1 > 1, projection=[t2_id@0]
+09)--------DataSourceExec: partitions=1, partition_sizes=[1]
+
 statement ok
 DROP TABLE t1;
 
@@ -5199,3 +5235,6 @@ DROP TABLE t2;
 
 statement ok
 set datafusion.explain.physical_plan_only = false;
+
+statement ok
+set datafusion.optimizer.enable_piecewise_merge_join = false;
diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt
index 4f1e5ef39a00..fc21638b3f3c 100644
--- a/datafusion/sqllogictest/test_files/map.slt
+++ b/datafusion/sqllogictest/test_files/map.slt
@@ -43,8 +43,8 @@ LOCATION '../core/tests/data/parquet_map.parquet';
 query TTT
 describe data;
 ----
-ints Map(Field { name: "entries", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "value", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false) NO
-strings Map(Field { name: "entries", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "value", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false) NO
+ints Map("entries": Struct("key": Utf8, "value": Int64), unsorted) NO
+strings Map("entries": Struct("key": Utf8, "value": Utf8), unsorted) NO
 timestamp Utf8View NO
 
 query ??T
diff --git a/datafusion/sqllogictest/test_files/nullif.slt b/datafusion/sqllogictest/test_files/nullif.slt
index 18642f6971ca..6acb9aea26d5 100644
--- a/datafusion/sqllogictest/test_files/nullif.slt
+++ b/datafusion/sqllogictest/test_files/nullif.slt
@@ -174,3 +174,39 @@ query T
 SELECT NULLIF(arrow_cast('a', 'Utf8View'), null);
 ----
 a
+
+# Test with dictionary-encoded strings
+# This tests the fix for: "Dictionary(UInt32, Utf8) and Utf8 is not comparable"
+statement ok
+CREATE TABLE dict_test_base(
+  col1 TEXT,
+  col2 TEXT
+) as VALUES
+  ('foo', 'bar'),
+  ('bar', 'bar'),
+  ('baz', 'bar')
+;
+
+# Dictionary cast with string literal
+query T rowsort
+SELECT NULLIF(arrow_cast(col1, 'Dictionary(Int32, Utf8)'), 'bar') FROM dict_test_base;
+----
+NULL
+baz
+foo
+
+# String with dictionary cast
+query T rowsort
+SELECT NULLIF(col2, arrow_cast(col1, 'Dictionary(Int32, Utf8)')) FROM dict_test_base;
+----
+NULL
+bar
+bar
+
+# Both as dictionaries
+query T rowsort
+SELECT NULLIF(arrow_cast(col1, 'Dictionary(Int32, Utf8)'), arrow_cast('bar', 'Dictionary(Int32, Utf8)')) FROM dict_test_base;
+----
+NULL
+baz
+foo
diff --git a/datafusion/sqllogictest/test_files/nvl.slt b/datafusion/sqllogictest/test_files/nvl.slt
index daab54307cc2..f4225148ab78 100644
--- a/datafusion/sqllogictest/test_files/nvl.slt
+++ b/datafusion/sqllogictest/test_files/nvl.slt
@@ -148,3 +148,38 @@ query T
 SELECT NVL(arrow_cast('a', 'Utf8View'), NULL);
 ----
 a
+
+# nvl is implemented as a case, and short-circuits evaluation
+# so the following query should not error
+query I
+SELECT NVL(1, 1/0);
+----
+1
+
+# but this one should
+query error DataFusion error: Arrow error: Divide by zero error
+SELECT NVL(NULL, 1/0);
+
+# Expect the query plan to show nvl as a case expression
+query I
+select NVL(int_field, 9999) FROM test;
+----
+1
+2
+3
+9999
+4
+9999
+
+# Expect the query plan to show nvl as a case expression
+query TT
+EXPLAIN select NVL(int_field, 9999) FROM test;
+----
+logical_plan
+01)Projection: CASE WHEN __common_expr_1 IS NOT NULL THEN __common_expr_1 ELSE Int64(9999) END AS nvl(test.int_field,Int64(9999))
+02)--Projection: CAST(test.int_field AS Int64) AS __common_expr_1
+03)----TableScan: test projection=[int_field]
+physical_plan
+01)ProjectionExec: expr=[CASE WHEN __common_expr_1@0 IS NOT NULL THEN __common_expr_1@0 ELSE 9999 END as nvl(test.int_field,Int64(9999))]
+02)--ProjectionExec: expr=[CAST(int_field@0 AS Int64) as __common_expr_1]
+03)----DataSourceExec: partitions=1, partition_sizes=[1]
diff --git a/datafusion/sqllogictest/test_files/parquet.slt b/datafusion/sqllogictest/test_files/parquet.slt
index e722005bf0f0..c21f3129d4ee 100644
--- a/datafusion/sqllogictest/test_files/parquet.slt
+++ b/datafusion/sqllogictest/test_files/parquet.slt
@@ -268,7 +268,7 @@ FROM (
 ) t
 GROUP BY 1
 ----
-Timestamp(Millisecond, Some("UTC")) 2014-08-27T14:00:00Z 131072
+Timestamp(ms, "UTC") 2014-08-27T14:00:00Z 131072
 
 # Test config listing_table_ignore_subdirectory:
 
@@ -689,7 +689,7 @@ LOCATION '../../parquet-testing/data/int96_from_spark.parquet';
 query TTT
 describe int96_from_spark
 ----
-a Timestamp(Nanosecond, None) YES
+a Timestamp(ns) YES
 
 # Note that the values are read as nanosecond precision
 query P
@@ -718,7 +718,7 @@ LOCATION '../../parquet-testing/data/int96_from_spark.parquet';
 query TTT
 describe int96_from_spark;
 ----
-a Timestamp(Millisecond, None) YES
+a Timestamp(ms) YES
 
 # Per https://github.com/apache/parquet-testing/blob/6e851ddd768d6af741c7b15dc594874399fc3cff/data/int96_from_spark.md?plain=1#L37
 # these values should be
@@ -742,7 +742,7 @@ select * from int96_from_spark
 9999-12-31T03:00:00
 2024-12-30T23:00:00
 NULL
-ERROR: Cast error: Failed to convert -9357363680509551 to datetime for Timestamp(Millisecond, None)
+ERROR: Cast error: Failed to convert -9357363680509551 to datetime for Timestamp(ms)
 
 # Cleanup / reset default setting
 statement ok
@@ -862,3 +862,30 @@ select part, k, v from t order by k
 
 statement ok
 DROP TABLE t;
+
+# Regression test for files with stats on some columns and not others
+# See https://github.com/apache/datafusion/pull/18276
+
+query I
+COPY (SELECT 1::int AS a, 2::int as b)
+TO 'test_files/scratch/parquet/mixed_stats.parquet'
+STORED AS PARQUET OPTIONS (
+  'STATISTICS_ENABLED::b' 'none'
+);
+----
+1
+
+statement ok
+CREATE EXTERNAL TABLE t
+STORED AS PARQUET
+LOCATION 'test_files/scratch/parquet/mixed_stats.parquet';
+
+query I
+SELECT b
+FROM t
+WHERE b = 2;
+----
+2
+
+statement ok
+DROP TABLE t;
diff --git a/datafusion/sqllogictest/test_files/pwmj.slt b/datafusion/sqllogictest/test_files/pwmj.slt
new file mode 100644
index 000000000000..eafa4d0ba394
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/pwmj.slt
@@ -0,0 +1,354 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+statement ok
+set datafusion.optimizer.enable_piecewise_merge_join = true;
+
+statement ok
+CREATE TABLE join_t1 (t1_id INT);
+
+statement ok
+CREATE TABLE join_t2 (t2_id INT, t2_name TEXT, t2_int INT);
+
+statement ok
+INSERT INTO join_t1 VALUES (11), (22), (33), (44);
+
+statement ok
+INSERT INTO join_t2 VALUES
+  (11, 'z', 3),
+  (22, 'y', 1),
+  (44, 'x', 3),
+  (55, 'w', 3);
+
+query II
+SELECT t1.t1_id, t2.t2_id
+FROM join_t1 t1
+JOIN join_t2 t2
+  ON t1.t1_id > t2.t2_id          
+WHERE t1.t1_id > 10              
+  AND t2.t2_int > 1               
+ORDER BY 1;
+----
+22 11
+33 11
+44 11
+
+# Checking `SELECT *`
+query IITI
+SELECT *
+FROM join_t1 t1
+JOIN join_t2 t2
+  ON t1.t1_id > t2.t2_id          
+WHERE t1.t1_id > 10              
+  AND t2.t2_int > 1               
+ORDER BY 1;
+----
+22 11 z 3
+33 11 z 3
+44 11 z 3
+
+query TT
+EXPLAIN
+SELECT t1.t1_id, t2.t2_id
+FROM join_t1 t1
+JOIN join_t2 t2
+  ON t1.t1_id > t2.t2_id          
+WHERE t1.t1_id > 10              
+  AND t2.t2_int > 1               
+ORDER BY 1;
+----
+logical_plan
+01)Sort: t1.t1_id ASC NULLS LAST
+02)--Inner Join:  Filter: t1.t1_id > t2.t2_id
+03)----SubqueryAlias: t1
+04)------Filter: join_t1.t1_id > Int32(10)
+05)--------TableScan: join_t1 projection=[t1_id]
+06)----SubqueryAlias: t2
+07)------Projection: join_t2.t2_id
+08)--------Filter: join_t2.t2_int > Int32(1)
+09)----------TableScan: join_t2 projection=[t2_id, t2_int]
+physical_plan
+01)SortPreservingMergeExec: [t1_id@0 ASC NULLS LAST]
+02)--SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
+03)----PiecewiseMergeJoin: operator=Gt, join_type=Inner, on=(t1_id > t2_id)
+04)------SortExec: expr=[t1_id@0 ASC], preserve_partitioning=[false]
+05)--------CoalesceBatchesExec: target_batch_size=8192
+06)----------FilterExec: t1_id@0 > 10
+07)------------DataSourceExec: partitions=1, partition_sizes=[1]
+08)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+09)--------CoalesceBatchesExec: target_batch_size=8192
+10)----------FilterExec: t2_int@1 > 1, projection=[t2_id@0]
+11)------------DataSourceExec: partitions=1, partition_sizes=[1]
+
+query II
+SELECT t1.t1_id, t2.t2_id
+FROM join_t1 t1
+JOIN join_t2 t2
+  ON t1.t1_id >= t2.t2_id
+WHERE t1.t1_id >= 22
+  AND t2.t2_int = 3
+ORDER BY 1,2;
+----
+22 11
+33 11
+44 11
+44 44
+
+query TT
+EXPLAIN
+SELECT t1.t1_id, t2.t2_id
+FROM join_t1 t1
+JOIN join_t2 t2
+  ON t1.t1_id >= t2.t2_id
+WHERE t1.t1_id >= 22
+  AND t2.t2_int = 3
+ORDER BY 1,2;
+----
+logical_plan
+01)Sort: t1.t1_id ASC NULLS LAST, t2.t2_id ASC NULLS LAST
+02)--Inner Join:  Filter: t1.t1_id >= t2.t2_id
+03)----SubqueryAlias: t1
+04)------Filter: join_t1.t1_id >= Int32(22)
+05)--------TableScan: join_t1 projection=[t1_id]
+06)----SubqueryAlias: t2
+07)------Projection: join_t2.t2_id
+08)--------Filter: join_t2.t2_int = Int32(3)
+09)----------TableScan: join_t2 projection=[t2_id, t2_int]
+physical_plan
+01)SortPreservingMergeExec: [t1_id@0 ASC NULLS LAST, t2_id@1 ASC NULLS LAST]
+02)--SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
+03)----PiecewiseMergeJoin: operator=GtEq, join_type=Inner, on=(t1_id >= t2_id)
+04)------SortExec: expr=[t1_id@0 ASC], preserve_partitioning=[false]
+05)--------CoalesceBatchesExec: target_batch_size=8192
+06)----------FilterExec: t1_id@0 >= 22
+07)------------DataSourceExec: partitions=1, partition_sizes=[1]
+08)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+09)--------CoalesceBatchesExec: target_batch_size=8192
+10)----------FilterExec: t2_int@1 = 3, projection=[t2_id@0]
+11)------------DataSourceExec: partitions=1, partition_sizes=[1]
+
+query II
+SELECT t1.t1_id, t2.t2_id
+FROM join_t1 t1
+JOIN join_t2 t2
+  ON t1.t1_id < t2.t2_id
+WHERE t2.t2_int >= 3
+ORDER BY 1,2;
+----
+11 55
+11 44
+22 55
+22 44
+33 55
+33 44
+44 55
+
+query TT
+EXPLAIN
+SELECT t1.t1_id, t2.t2_id
+FROM join_t1 t1
+JOIN join_t2 t2
+  ON t1.t1_id < t2.t2_id
+WHERE t2.t2_int >= 3
+ORDER BY 1,2;
+----
+logical_plan
+01)Sort: t1.t1_id ASC NULLS LAST, t2.t2_id ASC NULLS LAST
+02)--Inner Join:  Filter: t1.t1_id < t2.t2_id
+03)----SubqueryAlias: t1
+04)------TableScan: join_t1 projection=[t1_id]
+05)----SubqueryAlias: t2
+06)------Projection: join_t2.t2_id
+07)--------Filter: join_t2.t2_int >= Int32(3)
+08)----------TableScan: join_t2 projection=[t2_id, t2_int]
+physical_plan
+01)SortPreservingMergeExec: [t1_id@0 ASC NULLS LAST, t2_id@1 ASC NULLS LAST]
+02)--SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
+03)----PiecewiseMergeJoin: operator=Lt, join_type=Inner, on=(t1_id < t2_id)
+04)------SortExec: expr=[t1_id@0 DESC], preserve_partitioning=[false]
+05)--------DataSourceExec: partitions=1, partition_sizes=[1]
+06)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+07)--------CoalesceBatchesExec: target_batch_size=8192
+08)----------FilterExec: t2_int@1 >= 3, projection=[t2_id@0]
+09)------------DataSourceExec: partitions=1, partition_sizes=[1]
+
+
+query II
+SELECT t1.t1_id, t2.t2_id
+FROM join_t1 t1
+JOIN join_t2 t2
+  ON t1.t1_id < (t2.t2_id + 1)
+WHERE t2.t2_int >= 3
+ORDER BY 1,2;
+----
+11 11
+11 44
+11 55
+22 44
+22 55
+33 44
+33 55
+44 44
+44 55
+
+query TT
+EXPLAIN
+SELECT t1.t1_id, t2.t2_id
+FROM join_t1 t1
+JOIN join_t2 t2
+  ON t1.t1_id < (t2.t2_id + 1)
+WHERE t2.t2_int >= 3
+ORDER BY 1,2;
+----
+logical_plan
+01)Sort: t1.t1_id ASC NULLS LAST, t2.t2_id ASC NULLS LAST
+02)--Inner Join:  Filter: CAST(t1.t1_id AS Int64) < CAST(t2.t2_id AS Int64) + Int64(1)
+03)----SubqueryAlias: t1
+04)------TableScan: join_t1 projection=[t1_id]
+05)----SubqueryAlias: t2
+06)------Projection: join_t2.t2_id
+07)--------Filter: join_t2.t2_int >= Int32(3)
+08)----------TableScan: join_t2 projection=[t2_id, t2_int]
+physical_plan
+01)SortPreservingMergeExec: [t1_id@0 ASC NULLS LAST, t2_id@1 ASC NULLS LAST]
+02)--SortExec: expr=[t1_id@0 ASC NULLS LAST, t2_id@1 ASC NULLS LAST], preserve_partitioning=[true]
+03)----PiecewiseMergeJoin: operator=Lt, join_type=Inner, on=(CAST(t1_id AS Int64) < CAST(t2_id AS Int64) + 1)
+04)------SortExec: expr=[CAST(t1_id@0 AS Int64) DESC], preserve_partitioning=[false]
+05)--------DataSourceExec: partitions=1, partition_sizes=[1]
+06)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+07)--------CoalesceBatchesExec: target_batch_size=8192
+08)----------FilterExec: t2_int@1 >= 3, projection=[t2_id@0]
+09)------------DataSourceExec: partitions=1, partition_sizes=[1]
+
+query II
+SELECT t1.t1_id, t2.t2_id
+FROM join_t1 t1
+JOIN join_t2 t2
+  ON t1.t1_id <= t2.t2_id
+WHERE t1.t1_id IN (11, 44)
+  AND t2.t2_name <> 'y'
+ORDER BY 1,2;
+----
+11 55
+11 44
+11 11
+44 55
+44 44
+
+query TT
+EXPLAIN
+SELECT t1.t1_id, t2.t2_id
+FROM join_t1 t1
+JOIN join_t2 t2
+  ON t1.t1_id <= t2.t2_id
+WHERE t1.t1_id IN (11, 44)
+  AND t2.t2_name <> 'y'
+ORDER BY 1,2;
+----
+logical_plan
+01)Sort: t1.t1_id ASC NULLS LAST, t2.t2_id ASC NULLS LAST
+02)--Inner Join:  Filter: t1.t1_id <= t2.t2_id
+03)----SubqueryAlias: t1
+04)------Filter: join_t1.t1_id = Int32(11) OR join_t1.t1_id = Int32(44)
+05)--------TableScan: join_t1 projection=[t1_id]
+06)----SubqueryAlias: t2
+07)------Projection: join_t2.t2_id
+08)--------Filter: join_t2.t2_name != Utf8View("y")
+09)----------TableScan: join_t2 projection=[t2_id, t2_name]
+physical_plan
+01)SortPreservingMergeExec: [t1_id@0 ASC NULLS LAST, t2_id@1 ASC NULLS LAST]
+02)--SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
+03)----PiecewiseMergeJoin: operator=LtEq, join_type=Inner, on=(t1_id <= t2_id)
+04)------SortExec: expr=[t1_id@0 DESC], preserve_partitioning=[false]
+05)--------CoalesceBatchesExec: target_batch_size=8192
+06)----------FilterExec: t1_id@0 = 11 OR t1_id@0 = 44
+07)------------DataSourceExec: partitions=1, partition_sizes=[1]
+08)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+09)--------CoalesceBatchesExec: target_batch_size=8192
+10)----------FilterExec: t2_name@1 != y, projection=[t2_id@0]
+11)------------DataSourceExec: partitions=1, partition_sizes=[1]
+
+statement ok
+CREATE TABLE null_join_t1 (id INT);
+
+statement ok
+CREATE TABLE null_join_t2 (id INT);
+
+statement ok
+INSERT INTO null_join_t1 VALUES (1), (2), (NULL);
+
+statement ok
+INSERT INTO null_join_t2 VALUES (1), (NULL), (3);
+
+query II
+SELECT t1.id AS left_id, t2.id AS right_id
+FROM null_join_t1 t1
+JOIN null_join_t2 t2
+  ON t1.id > t2.id
+ORDER BY 1,2;
+----
+2 1
+
+# Verify this will offload this query to Nested Loop Join
+query II
+SELECT t1.id AS left_id, t2.id AS right_id
+FROM null_join_t1 t1
+JOIN null_join_t2 t2
+  ON t1.id < (t1.id + t2.id)
+ORDER BY 1,2;
+----
+1 1
+1 3
+2 1
+2 3
+
+query TT
+EXPLAIN
+SELECT t1.id AS left_id, t2.id AS right_id
+FROM null_join_t1 t1
+JOIN null_join_t2 t2
+  ON t1.id < (t1.id + t2.id)
+ORDER BY 1,2;
+----
+logical_plan
+01)Sort: left_id ASC NULLS LAST, right_id ASC NULLS LAST
+02)--Projection: t1.id AS left_id, t2.id AS right_id
+03)----Inner Join:  Filter: t1.id < t1.id + t2.id
+04)------SubqueryAlias: t1
+05)--------TableScan: null_join_t1 projection=[id]
+06)------SubqueryAlias: t2
+07)--------TableScan: null_join_t2 projection=[id]
+physical_plan
+01)SortExec: expr=[left_id@0 ASC NULLS LAST, right_id@1 ASC NULLS LAST], preserve_partitioning=[false]
+02)--ProjectionExec: expr=[id@0 as left_id, id@1 as right_id]
+03)----NestedLoopJoinExec: join_type=Inner, filter=id@0 < id@0 + id@1
+04)------DataSourceExec: partitions=1, partition_sizes=[1]
+05)------DataSourceExec: partitions=1, partition_sizes=[1]
+
+query II
+SELECT t1.id AS left_id, t2.id AS right_id
+FROM null_join_t1 t1
+JOIN null_join_t2 t2
+  ON t1.id < t2.id
+ORDER BY 1,2;
+----
+1 3 
+2 3 
+
+statement ok
+set datafusion.optimizer.enable_piecewise_merge_join = false;
diff --git a/datafusion/sqllogictest/test_files/qualify.slt b/datafusion/sqllogictest/test_files/qualify.slt
index d53b56ce58de..366d65df6792 100644
--- a/datafusion/sqllogictest/test_files/qualify.slt
+++ b/datafusion/sqllogictest/test_files/qualify.slt
@@ -275,7 +275,7 @@ physical_plan
 01)SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false]
 02)--CoalesceBatchesExec: target_batch_size=8192
 03)----FilterExec: count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 > 1, projection=[id@0, name@1]
-04)------WindowAggExec: wdw=[count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+04)------WindowAggExec: wdw=[count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 05)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # plan row_number()
@@ -293,7 +293,7 @@ physical_plan
 02)--CoalesceBatchesExec: target_batch_size=8192
 03)----FilterExec: row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@0 > 1
 04)------ProjectionExec: expr=[row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]
-05)--------BoundedWindowAggExec: wdw=[row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 06)----------SortExec: expr=[dept@0 ASC NULLS LAST], preserve_partitioning=[false]
 07)------------DataSourceExec: partitions=1, partition_sizes=[1]
 
@@ -321,7 +321,7 @@ physical_plan
 02)--CoalesceBatchesExec: target_batch_size=8192
 03)----FilterExec: avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 > Some(60000000000),14,6
 04)------ProjectionExec: expr=[dept@0 as dept, avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 as avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]
-05)--------WindowAggExec: wdw=[avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Decimal128(14, 6), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+05)--------WindowAggExec: wdw=[avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Decimal128(14, 6), nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 06)----------SortExec: expr=[dept@0 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=8192
 08)--------------RepartitionExec: partitioning=Hash([dept@0], 4), input_partitions=4
@@ -358,7 +358,7 @@ physical_plan
 04)------CoalesceBatchesExec: target_batch_size=8192
 05)--------FilterExec: rank() ORDER BY [sum(users.salary) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 = 1, projection=[dept@0, sum(users.salary)@1]
 06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)------------BoundedWindowAggExec: wdw=[rank() ORDER BY [sum(users.salary) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() ORDER BY [sum(users.salary) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+07)------------BoundedWindowAggExec: wdw=[rank() ORDER BY [sum(users.salary) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() ORDER BY [sum(users.salary) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 08)--------------SortPreservingMergeExec: [sum(users.salary)@1 DESC]
 09)----------------SortExec: expr=[sum(users.salary)@1 DESC], preserve_partitioning=[true]
 10)------------------AggregateExec: mode=FinalPartitioned, gby=[dept@0 as dept], aggr=[sum(users.salary)]
diff --git a/datafusion/sqllogictest/test_files/repartition_scan.slt b/datafusion/sqllogictest/test_files/repartition_scan.slt
index c536c8165c5a..41718b3aebc2 100644
--- a/datafusion/sqllogictest/test_files/repartition_scan.slt
+++ b/datafusion/sqllogictest/test_files/repartition_scan.slt
@@ -244,7 +244,7 @@ DROP TABLE json_table;
 statement ok
 CREATE EXTERNAL TABLE arrow_table
 STORED AS ARROW
-LOCATION '../core/tests/data/example.arrow';
+LOCATION '../datasource-arrow/tests/data/example.arrow';
 
 
 # It would be great to see the file read as "4" groups with even sizes (offsets) eventually
@@ -253,7 +253,7 @@ query TT
 EXPLAIN SELECT * FROM arrow_table
 ----
 logical_plan TableScan: arrow_table projection=[f0, f1, f2]
-physical_plan DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/core/tests/data/example.arrow:0..461], [WORKSPACE_ROOT/datafusion/core/tests/data/example.arrow:461..922], [WORKSPACE_ROOT/datafusion/core/tests/data/example.arrow:922..1383], [WORKSPACE_ROOT/datafusion/core/tests/data/example.arrow:1383..1842]]}, projection=[f0, f1, f2], file_type=arrow
+physical_plan DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/datasource-arrow/tests/data/example.arrow:0..461], [WORKSPACE_ROOT/datafusion/datasource-arrow/tests/data/example.arrow:461..922], [WORKSPACE_ROOT/datafusion/datasource-arrow/tests/data/example.arrow:922..1383], [WORKSPACE_ROOT/datafusion/datasource-arrow/tests/data/example.arrow:1383..1842]]}, projection=[f0, f1, f2], file_type=arrow
 
 # correct content
 query ITB
diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt
index b0e200015dfd..faa0d69ae84b 100644
--- a/datafusion/sqllogictest/test_files/scalar.slt
+++ b/datafusion/sqllogictest/test_files/scalar.slt
@@ -309,6 +309,14 @@ select ceil(a), ceil(b), ceil(c) from small_floats;
 1 0 0
 1 0 1
 
+# ceil with scale parameter(Scale not supported)
+query error DataFusion error: This feature is not implemented: CEIL with scale is not supported
+select ceil(100.1234, 1)
+
+# ceil with datetime parameter (not supported)
+query error DataFusion error: This feature is not implemented: CEIL with datetime is not supported
+select ceil(100.1234 to year)
+
 ## degrees
 
 # degrees scalar function
@@ -448,6 +456,14 @@ select floor(a), floor(b), floor(c) from signed_integers;
 2 -1000 123
 4 NULL NULL
 
+# floor with scale parameter(Scale not supported)
+query error DataFusion error: This feature is not implemented: FLOOR with scale is not supported
+select floor(a, 1)
+
+# floor with datetime parameter ( not supported)
+query error DataFusion error: This feature is not implemented: FLOOR with datetime is not supported
+select floor(a to year)
+
 ## ln
 
 # ln scalar function
diff --git a/datafusion/sqllogictest/test_files/simplify_predicates.slt b/datafusion/sqllogictest/test_files/simplify_predicates.slt
index 31ce1efd21c7..c2a21ea7103c 100644
--- a/datafusion/sqllogictest/test_files/simplify_predicates.slt
+++ b/datafusion/sqllogictest/test_files/simplify_predicates.slt
@@ -230,5 +230,17 @@ logical_plan
 01)Filter: test_data.int_col > Int32(5) AND test_data.int_col > Int32(6) OR test_data.float_col < Float32(10) AND test_data.float_col < Float32(8)
 02)--TableScan: test_data projection=[int_col, float_col, str_col, date_col, bool_col]
 
+
+query TT
+EXPLAIN SELECT * FROM (
+  SELECT * FROM test_data 
+  WHERE int_col > 1 AND int_col < 10
+) WHERE int_col >= 1 AND int_col <= 10;
+----
+logical_plan
+01)Filter: test_data.int_col > Int32(1) AND test_data.int_col < Int32(10)
+02)--TableScan: test_data projection=[int_col, float_col, str_col, date_col, bool_col]
+
+
 statement ok
 set datafusion.explain.logical_plan_only=false;
diff --git a/datafusion/sqllogictest/test_files/spark/aggregate/avg.slt b/datafusion/sqllogictest/test_files/spark/aggregate/avg.slt
index a5bed6ea324a..6ae647989aee 100644
--- a/datafusion/sqllogictest/test_files/spark/aggregate/avg.slt
+++ b/datafusion/sqllogictest/test_files/spark/aggregate/avg.slt
@@ -53,4 +53,4 @@ FROM (VALUES (0::INT), (0::INT)) AS t(a)
 GROUP BY a
 ORDER BY a;
 ----
-0 0
\ No newline at end of file
+0 0
diff --git a/datafusion/sqllogictest/test_files/spark/array/shuffle.slt b/datafusion/sqllogictest/test_files/spark/array/shuffle.slt
new file mode 100644
index 000000000000..7614caef666b
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/spark/array/shuffle.slt
@@ -0,0 +1,111 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Test shuffle function with simple arrays
+query B
+SELECT array_sort(shuffle([1, 2, 3, 4, 5, NULL])) = [NULL,1, 2, 3, 4, 5];
+----
+true
+
+query B
+SELECT shuffle([1, 2, 3, 4, 5, NULL]) != [1, 2, 3, 4, 5, NULL];
+----
+true
+
+# Test shuffle function with string arrays
+
+query B
+SELECT array_sort(shuffle(['a', 'b', 'c', 'd', 'e', 'f'])) = ['a', 'b', 'c', 'd', 'e', 'f'];
+----
+true
+
+query B
+SELECT shuffle(['a', 'b', 'c', 'd', 'e', 'f']) != ['a', 'b', 'c', 'd', 'e', 'f'];;
+----
+true
+
+# Test shuffle function with empty array
+query ?
+SELECT shuffle([]);
+----
+[]
+
+# Test shuffle function with single element
+query ?
+SELECT shuffle([42]);
+----
+[42]
+
+# Test shuffle function with null array
+query ?
+SELECT shuffle(NULL);
+----
+NULL
+
+# Test shuffle function with fixed size list arrays
+query B
+SELECT array_sort(shuffle(arrow_cast([1, 2, NULL, 3, 4, 5], 'FixedSizeList(6, Int64)'))) = [NULL, 1, 2, 3, 4, 5];
+----
+true
+
+query B
+SELECT shuffle(arrow_cast([1, 2, NULL, 3, 4, 5], 'FixedSizeList(6, Int64)')) != [1, 2, NULL, 3, 4, 5];
+----
+true
+
+# Test shuffle on table data with different list types
+statement ok
+CREATE TABLE test_shuffle_list_types AS VALUES
+  ([1, 2, 3, 4]),
+  ([5, 6, 7, 8, 9]),
+  ([10]),
+  (NULL),
+  ([]);
+
+# Test shuffle with large list from table
+query ?
+SELECT array_sort(shuffle(column1)) FROM test_shuffle_list_types;
+----
+[1, 2, 3, 4]
+[5, 6, 7, 8, 9]
+[10]
+NULL
+[]
+
+# Test fixed size list table
+statement ok
+CREATE TABLE test_shuffle_fixed_size AS VALUES
+  (arrow_cast([1, 2, 3], 'FixedSizeList(3, Int64)')),
+  (arrow_cast([4, 5, 6], 'FixedSizeList(3, Int64)')),
+  (arrow_cast([NULL, 8, 9], 'FixedSizeList(3, Int64)')),
+  (NULL);
+
+# Test shuffle with fixed size list from table
+query ?
+SELECT array_sort(shuffle(column1)) FROM test_shuffle_fixed_size;
+----
+[1, 2, 3]
+[4, 5, 6]
+[NULL, 8, 9]
+NULL
+
+# Clean up
+statement ok
+DROP TABLE test_shuffle_list_types;
+
+statement ok
+DROP TABLE test_shuffle_fixed_size;
diff --git a/datafusion/sqllogictest/test_files/spark/bitmap/bitmap_count.slt b/datafusion/sqllogictest/test_files/spark/bitmap/bitmap_count.slt
index 2789efef7bf3..39dca512226b 100644
--- a/datafusion/sqllogictest/test_files/spark/bitmap/bitmap_count.slt
+++ b/datafusion/sqllogictest/test_files/spark/bitmap/bitmap_count.slt
@@ -59,3 +59,35 @@ SELECT bitmap_count(arrow_cast(a, 'FixedSizeBinary(2)')) FROM (VALUES (X'1010'),
 5
 16
 NULL
+
+query I
+SELECT bitmap_count(arrow_cast(a, 'Dictionary(Int32, Binary)')) FROM (VALUES (X'1010'), (X'0AB0'), (X'FFFF'), (NULL)) AS t(a);
+----
+2
+5
+16
+NULL
+
+query I
+SELECT bitmap_count(arrow_cast(a, 'Dictionary(Int8, Binary)')) FROM (VALUES (X'1010'), (X'0AB0'), (X'FFFF'), (NULL)) AS t(a);
+----
+2
+5
+16
+NULL
+
+query I
+SELECT bitmap_count(arrow_cast(a, 'Dictionary(Int16, Binary)')) FROM (VALUES (X'1010'), (X'0AB0'), (X'FFFF'), (NULL)) AS t(a);
+----
+2
+5
+16
+NULL
+
+query I
+SELECT bitmap_count(arrow_cast(a, 'Dictionary(Int64, Binary)')) FROM (VALUES (X'1010'), (X'0AB0'), (X'FFFF'), (NULL)) AS t(a);
+----
+2
+5
+16
+NULL
diff --git a/datafusion/sqllogictest/test_files/spark/string/concat.slt b/datafusion/sqllogictest/test_files/spark/string/concat.slt
new file mode 100644
index 000000000000..258cb829d7d4
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/spark/string/concat.slt
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+query T
+SELECT concat('Spark', 'SQL');
+----
+SparkSQL
+
+query T
+SELECT concat('Spark', 'SQL', NULL);
+----
+NULL
+
+query T
+SELECT concat('', '1', '', '2');
+----
+12
+
+query T
+SELECT concat();
+----
+(empty)
+
+query T
+SELECT concat('');
+----
+(empty)
+
+
+query T
+SELECT concat(a, b, c) from (select 'a' a, 'b' b, 'c' c union all select null a, 'b', 'c') order by 1 nulls last;
+----
+abc
+NULL
diff --git a/datafusion/sqllogictest/test_files/spark/string/format_string.slt b/datafusion/sqllogictest/test_files/spark/string/format_string.slt
index 07c8cd10d1a9..048863ebfbed 100644
--- a/datafusion/sqllogictest/test_files/spark/string/format_string.slt
+++ b/datafusion/sqllogictest/test_files/spark/string/format_string.slt
@@ -70,7 +70,7 @@ SELECT format_string('Value: %d', 42);
 Value: 42
 
 ## Hexadecimal integer formatting (lowercase)
-query T  
+query T
 SELECT format_string('Hex: %x', 255);
 ----
 Hex: ff
@@ -118,7 +118,7 @@ SELECT format_string('Negative: %d', -42);
 Negative: -42
 
 # ================================
-# Float formatting tests  
+# Float formatting tests
 # ================================
 
 ## Basic float formatting
@@ -254,7 +254,7 @@ SELECT format_string('Bool: %b', true);
 ----
 Bool: true
 
-## Boolean uppercase  
+## Boolean uppercase
 query T
 SELECT format_string('Bool: %B', false);
 ----
@@ -370,7 +370,7 @@ SELECT format_string('Minute: %tM', TIMESTAMP '2023-12-25 14:30:45');
 ----
 Minute: 30
 
-## Second formatting  
+## Second formatting
 query T
 SELECT format_string('Second: %tS', TIMESTAMP '2023-12-25 14:30:45');
 ----
diff --git a/datafusion/sqllogictest/test_files/string/string_view.slt b/datafusion/sqllogictest/test_files/string/string_view.slt
index fb67daa0b840..4d30f572ad6f 100644
--- a/datafusion/sqllogictest/test_files/string/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string/string_view.slt
@@ -988,7 +988,7 @@ query TT
 EXPLAIN SELECT NVL(column1_utf8view, 'a') as c2 FROM test;
 ----
 logical_plan
-01)Projection: nvl(test.column1_utf8view, Utf8View("a")) AS c2
+01)Projection: CASE WHEN test.column1_utf8view IS NOT NULL THEN test.column1_utf8view ELSE Utf8View("a") END AS c2
 02)--TableScan: test projection=[column1_utf8view]
 
 ## Ensure no casts for nullif
diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt
index 95eeffc31903..0e3c5145d156 100644
--- a/datafusion/sqllogictest/test_files/struct.slt
+++ b/datafusion/sqllogictest/test_files/struct.slt
@@ -53,9 +53,9 @@ select * from struct_values;
 query TT
 select arrow_typeof(s1), arrow_typeof(s2) from struct_values;
 ----
-Struct(c0 Int32) Struct(a Int32, b Utf8View)
-Struct(c0 Int32) Struct(a Int32, b Utf8View)
-Struct(c0 Int32) Struct(a Int32, b Utf8View)
+Struct("c0": nullable Int32) Struct("a": nullable Int32, "b": nullable Utf8View)
+Struct("c0": nullable Int32) Struct("a": nullable Int32, "b": nullable Utf8View)
+Struct("c0": nullable Int32) Struct("a": nullable Int32, "b": nullable Utf8View)
 
 
 # struct[i]
@@ -229,12 +229,12 @@ select named_struct('field_a', 1, 'field_b', 2);
 query T
 select arrow_typeof(named_struct('first', 1, 'second', 2, 'third', 3));
 ----
-Struct(first Int64, second Int64, third Int64)
+Struct("first": nullable Int64, "second": nullable Int64, "third": nullable Int64)
 
 query T
 select arrow_typeof({'first': 1, 'second': 2, 'third': 3});
 ----
-Struct(first Int64, second Int64, third Int64)
+Struct("first": nullable Int64, "second": nullable Int64, "third": nullable Int64)
 
 # test nested struct literal
 query ?
@@ -413,7 +413,7 @@ create table t(a struct<r varchar, c int>, b struct<r varchar, c float>) as valu
 query T
 select arrow_typeof([a, b]) from t;
 ----
-List(Field { name: "item", data_type: Struct([Field { name: "r", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "c", data_type: Float32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable Struct("r": nullable Utf8View, "c": nullable Float32))
 
 query ?
 select [a, b] from t;
@@ -464,12 +464,12 @@ select * from t;
 query T
 select arrow_typeof(c1) from t;
 ----
-Struct(r Utf8View, b Int32)
+Struct("r": nullable Utf8View, "b": nullable Int32)
 
 query T
 select arrow_typeof(c2) from t;
 ----
-Struct(r Utf8View, b Float32)
+Struct("r": nullable Utf8View, "b": nullable Float32)
 
 statement ok
 drop table t;
@@ -486,8 +486,8 @@ select * from t;
 query T
 select arrow_typeof(column1) from t;
 ----
-Struct(r Utf8, c Float64)
-Struct(r Utf8, c Float64)
+Struct("r": nullable Utf8, "c": nullable Float64)
+Struct("r": nullable Utf8, "c": nullable Float64)
 
 statement ok
 drop table t;
@@ -519,9 +519,9 @@ select coalesce(s1) from t;
 query T
 select arrow_typeof(coalesce(s1, s2)) from t;
 ----
-Struct(a Float32, b Utf8View)
-Struct(a Float32, b Utf8View)
-Struct(a Float32, b Utf8View)
+Struct("a": nullable Float32, "b": nullable Utf8View)
+Struct("a": nullable Float32, "b": nullable Utf8View)
+Struct("a": nullable Float32, "b": nullable Utf8View)
 
 statement ok
 drop table t;
@@ -546,9 +546,9 @@ select coalesce(s1, s2) from t;
 query T
 select arrow_typeof(coalesce(s1, s2)) from t;
 ----
-Struct(a Float32, b Utf8View)
-Struct(a Float32, b Utf8View)
-Struct(a Float32, b Utf8View)
+Struct("a": nullable Float32, "b": nullable Utf8View)
+Struct("a": nullable Float32, "b": nullable Utf8View)
+Struct("a": nullable Float32, "b": nullable Utf8View)
 
 statement ok
 drop table t;
@@ -583,7 +583,7 @@ create table t(a struct(r varchar, c int), b struct(r varchar, c float)) as valu
 query T
 select arrow_typeof([a, b]) from t;
 ----
-List(Field { name: "item", data_type: Struct([Field { name: "r", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "c", data_type: Float32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable Struct("r": nullable Utf8View, "c": nullable Float32))
 
 statement ok
 drop table t;
@@ -606,13 +606,13 @@ create table t(a struct(r varchar, c int, g float), b struct(r varchar, c float,
 query T
 select arrow_typeof(a) from t;
 ----
-Struct(r Utf8View, c Int32, g Float32)
+Struct("r": nullable Utf8View, "c": nullable Int32, "g": nullable Float32)
 
 # type of each column should not coerced but perserve as it is
 query T
 select arrow_typeof(b) from t;
 ----
-Struct(r Utf8View, c Float32, g Int32)
+Struct("r": nullable Utf8View, "c": nullable Float32, "g": nullable Int32)
 
 statement ok
 drop table t;
diff --git a/datafusion/sqllogictest/test_files/subquery_sort.slt b/datafusion/sqllogictest/test_files/subquery_sort.slt
index 1e5a3c8f526a..ea7addd8e36f 100644
--- a/datafusion/sqllogictest/test_files/subquery_sort.slt
+++ b/datafusion/sqllogictest/test_files/subquery_sort.slt
@@ -100,7 +100,7 @@ physical_plan
 01)ProjectionExec: expr=[c1@0 as c1, r@1 as r]
 02)--SortExec: TopK(fetch=2), expr=[c1@0 ASC NULLS LAST, c3@2 ASC NULLS LAST, c9@3 ASC NULLS LAST], preserve_partitioning=[false]
 03)----ProjectionExec: expr=[c1@0 as c1, rank() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as r, c3@1 as c3, c9@2 as c9]
-04)------BoundedWindowAggExec: wdw=[rank() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[rank() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------SortExec: expr=[c1@0 DESC], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c3, c9], file_type=csv, has_header=true
 
@@ -126,7 +126,7 @@ physical_plan
 01)ProjectionExec: expr=[c1@0 as c1, r@1 as r]
 02)--SortExec: TopK(fetch=2), expr=[c1@0 ASC NULLS LAST, c3@2 ASC NULLS LAST, c9@3 ASC NULLS LAST], preserve_partitioning=[false]
 03)----ProjectionExec: expr=[c1@0 as c1, rank() ORDER BY [sink_table_with_utf8view.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as r, c3@1 as c3, c9@2 as c9]
-04)------BoundedWindowAggExec: wdw=[rank() ORDER BY [sink_table_with_utf8view.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() ORDER BY [sink_table_with_utf8view.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[rank() ORDER BY [sink_table_with_utf8view.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() ORDER BY [sink_table_with_utf8view.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------SortExec: expr=[c1@0 DESC], preserve_partitioning=[false]
 06)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt
index 1a7ff41d64a6..84dd7098a2ee 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -73,6 +73,21 @@ true
 ##########
 ## Current time Tests
 ##########
+statement ok
+SET TIME ZONE = '+08'
+
+query T
+select arrow_typeof(now());
+----
+Timestamp(ns, "+08")
+
+query I
+SELECT count(1) result FROM (SELECT now() as n) a WHERE n > '2000-01-01'::date;
+----
+1
+
+statement ok
+SET TIME ZONE = '+00'
 
 query B
 select cast(now() as time) = current_time();
@@ -525,6 +540,29 @@ SELECT to_timestamp(123456789.123456789) as c1, cast(123456789.123456789 as time
 ----
 1973-11-29T21:33:09.123456784 1970-01-01T00:00:00.123456789 1970-01-01T00:00:00.123456789
 
+## to_timestamp float vectorized inputs
+query PPP
+SELECT
+  to_timestamp(x) as c1,
+  cast(x as timestamp) as c2,
+  x::timestamp as c3
+FROM (
+  VALUES
+    (1.1),
+    (-1.1),
+    (0.0),
+    (1.23456789),
+    (123456789.123456789),
+    (NULL)
+) t(x);
+----
+1970-01-01T00:00:01.100 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001
+1969-12-31T23:59:58.900 1969-12-31T23:59:59.999999999 1969-12-31T23:59:59.999999999
+1970-01-01T00:00:00 1970-01-01T00:00:00 1970-01-01T00:00:00
+1970-01-01T00:00:01.234567890 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001
+1973-11-29T21:33:09.123456784 1970-01-01T00:00:00.123456789 1970-01-01T00:00:00.123456789
+NULL NULL NULL
+
 # to_timestamp Decimal128 inputs
 
 query PPP
@@ -643,11 +681,7 @@ select date '1994-01-01' - interval '1' day as date;
 ----
 1993-12-31
 
-
 # cast_string_to_time()
-statement ok
-set datafusion.optimizer.skip_failed_rules = false
-
 query DDDD
 select
         time '08:09:10.123456789' as time_nano,
@@ -657,21 +691,17 @@ select
 ----
 08:09:10.123456789 13:14:15.123456 13:14:15.123 13:14:15
 
-query error Cannot cast string 'not a time' to value of Time64\(Nanosecond\) type
+query error DataFusion error: Arrow error: Cast error: Cannot cast string 'not a time' to value of Time64\(ns\) type
 SELECT TIME 'not a time' as time;
 
 # invalid time
-query error Cannot cast string '24:01:02' to value of Time64\(Nanosecond\) type
+query error DataFusion error: Arrow error: Cast error: Cannot cast string '24:01:02' to value of Time64\(ns\) type
 SELECT TIME '24:01:02' as time;
 
 # invalid timezone
 query error Arrow error: Parser error: Invalid timezone "ZZ": failed to parse timezone
 SELECT TIMESTAMP '2023-12-05T21:58:10.45ZZ';
 
-statement ok
-set datafusion.optimizer.skip_failed_rules = true
-
-
 # cast_to_timestamp_twice
 query P
 select to_timestamp(a) from (select to_timestamp(1) as a) A;
@@ -878,7 +908,7 @@ from (values
 query T
 SELECT arrow_typeof(DATE_BIN(INTERVAL '15 minute', to_timestamp_micros(TIMESTAMP '2022-08-03 14:38:50Z'), TIMESTAMP '1970-01-01 00:00:00Z'))
 ----
-Timestamp(Microsecond, None)
+Timestamp(µs)
 
 query P
 SELECT DATE_BIN(INTERVAL '15 minute', to_timestamp_millis(TIMESTAMP '2022-08-03 14:38:50Z'), TIMESTAMP '1970-01-01 00:00:00Z')
@@ -896,7 +926,7 @@ from (values
 query T
 SELECT arrow_typeof(DATE_BIN(INTERVAL '15 minute', to_timestamp_millis(TIMESTAMP '2022-08-03 14:38:50Z'), TIMESTAMP '1970-01-01 00:00:00Z'))
 ----
-Timestamp(Millisecond, None)
+Timestamp(ms)
 
 query P
 SELECT DATE_BIN(INTERVAL '15 minute', to_timestamp_seconds(TIMESTAMP '2022-08-03 14:38:50Z'), TIMESTAMP '1970-01-01 00:00:00Z')
@@ -914,7 +944,7 @@ from (values
 query T
 SELECT arrow_typeof(DATE_BIN(INTERVAL '15 minute', to_timestamp_seconds(TIMESTAMP '2022-08-03 14:38:50Z'), TIMESTAMP '1970-01-01 00:00:00Z'))
 ----
-Timestamp(Second, None)
+Timestamp(s)
 
 # month interval with INTERVAL keyword in date_bin with default start time
 query P
@@ -1510,24 +1540,24 @@ from timestamp_utc; -- have to convert to utc prior to converting to berlin
 query PT
 select ts, arrow_typeof(ts) from timestamp_utc order by ts;
 ----
-2024-10-27T00:00:00Z Timestamp(Nanosecond, Some("UTC"))
-2024-10-27T00:30:00Z Timestamp(Nanosecond, Some("UTC"))
-2024-10-27T01:30:00Z Timestamp(Nanosecond, Some("UTC"))
-2024-10-27T02:00:00Z Timestamp(Nanosecond, Some("UTC"))
-2024-10-27T02:30:00Z Timestamp(Nanosecond, Some("UTC"))
-2024-10-27T03:00:00Z Timestamp(Nanosecond, Some("UTC"))
-2024-10-27T03:30:00Z Timestamp(Nanosecond, Some("UTC"))
+2024-10-27T00:00:00Z Timestamp(ns, "UTC")
+2024-10-27T00:30:00Z Timestamp(ns, "UTC")
+2024-10-27T01:30:00Z Timestamp(ns, "UTC")
+2024-10-27T02:00:00Z Timestamp(ns, "UTC")
+2024-10-27T02:30:00Z Timestamp(ns, "UTC")
+2024-10-27T03:00:00Z Timestamp(ns, "UTC")
+2024-10-27T03:30:00Z Timestamp(ns, "UTC")
 
 query PT
 select ts, arrow_typeof(ts) from timestamp_berlin order by ts;
 ----
-2024-10-27T02:00:00+02:00 Timestamp(Nanosecond, Some("Europe/Berlin"))
-2024-10-27T02:30:00+02:00 Timestamp(Nanosecond, Some("Europe/Berlin"))
-2024-10-27T02:30:00+01:00 Timestamp(Nanosecond, Some("Europe/Berlin"))
-2024-10-27T03:00:00+01:00 Timestamp(Nanosecond, Some("Europe/Berlin"))
-2024-10-27T03:30:00+01:00 Timestamp(Nanosecond, Some("Europe/Berlin"))
-2024-10-27T04:00:00+01:00 Timestamp(Nanosecond, Some("Europe/Berlin"))
-2024-10-27T04:30:00+01:00 Timestamp(Nanosecond, Some("Europe/Berlin"))
+2024-10-27T02:00:00+02:00 Timestamp(ns, "Europe/Berlin")
+2024-10-27T02:30:00+02:00 Timestamp(ns, "Europe/Berlin")
+2024-10-27T02:30:00+01:00 Timestamp(ns, "Europe/Berlin")
+2024-10-27T03:00:00+01:00 Timestamp(ns, "Europe/Berlin")
+2024-10-27T03:30:00+01:00 Timestamp(ns, "Europe/Berlin")
+2024-10-27T04:00:00+01:00 Timestamp(ns, "Europe/Berlin")
+2024-10-27T04:30:00+01:00 Timestamp(ns, "Europe/Berlin")
 
 #  date trunc in utc with DST
 query PPPP
@@ -1594,24 +1624,24 @@ from timestamp_utc; -- have to convert to utc prior to converting to Sau Paulo
 query PT
 select ts, arrow_typeof(ts) from timestamp_utc order by ts;
 ----
-2018-11-04T01:00:00Z Timestamp(Nanosecond, Some("UTC"))
-2018-11-04T01:30:00Z Timestamp(Nanosecond, Some("UTC"))
-2018-11-04T02:30:00Z Timestamp(Nanosecond, Some("UTC"))
-2018-11-04T03:00:00Z Timestamp(Nanosecond, Some("UTC"))
-2018-11-04T03:30:00Z Timestamp(Nanosecond, Some("UTC"))
-2018-11-04T04:00:00Z Timestamp(Nanosecond, Some("UTC"))
-2018-11-04T04:30:00Z Timestamp(Nanosecond, Some("UTC"))
+2018-11-04T01:00:00Z Timestamp(ns, "UTC")
+2018-11-04T01:30:00Z Timestamp(ns, "UTC")
+2018-11-04T02:30:00Z Timestamp(ns, "UTC")
+2018-11-04T03:00:00Z Timestamp(ns, "UTC")
+2018-11-04T03:30:00Z Timestamp(ns, "UTC")
+2018-11-04T04:00:00Z Timestamp(ns, "UTC")
+2018-11-04T04:30:00Z Timestamp(ns, "UTC")
 
 query PT
 select ts, arrow_typeof(ts) from timestamp_sao_paulo order by ts;
 ----
-2018-11-03T22:00:00-03:00 Timestamp(Nanosecond, Some("America/Sao_Paulo"))
-2018-11-03T22:30:00-03:00 Timestamp(Nanosecond, Some("America/Sao_Paulo"))
-2018-11-03T23:30:00-03:00 Timestamp(Nanosecond, Some("America/Sao_Paulo"))
-2018-11-04T01:00:00-02:00 Timestamp(Nanosecond, Some("America/Sao_Paulo"))
-2018-11-04T01:30:00-02:00 Timestamp(Nanosecond, Some("America/Sao_Paulo"))
-2018-11-04T02:00:00-02:00 Timestamp(Nanosecond, Some("America/Sao_Paulo"))
-2018-11-04T02:30:00-02:00 Timestamp(Nanosecond, Some("America/Sao_Paulo"))
+2018-11-03T22:00:00-03:00 Timestamp(ns, "America/Sao_Paulo")
+2018-11-03T22:30:00-03:00 Timestamp(ns, "America/Sao_Paulo")
+2018-11-03T23:30:00-03:00 Timestamp(ns, "America/Sao_Paulo")
+2018-11-04T01:00:00-02:00 Timestamp(ns, "America/Sao_Paulo")
+2018-11-04T01:30:00-02:00 Timestamp(ns, "America/Sao_Paulo")
+2018-11-04T02:00:00-02:00 Timestamp(ns, "America/Sao_Paulo")
+2018-11-04T02:30:00-02:00 Timestamp(ns, "America/Sao_Paulo")
 
 #  date trunc in utc with DST
 query PPPP
@@ -1767,7 +1797,7 @@ SELECT ts1 + i FROM foo;
 2003-07-12T01:31:15.000123463
 
 # Timestamp + Timestamp => error
-query error DataFusion error: Error during planning: Cannot get result type for temporal operation Timestamp\(Nanosecond, None\) \+ Timestamp\(Nanosecond, None\): Invalid argument error: Invalid timestamp arithmetic operation: Timestamp\(Nanosecond, None\) \+ Timestamp\(Nanosecond, None\)
+query error DataFusion error: Error during planning: Cannot get result type for temporal operation Timestamp\(ns\) \+ Timestamp\(ns\): Invalid argument error: Invalid timestamp arithmetic operation: Timestamp\(ns\) \+ Timestamp\(ns\)
 SELECT ts1 + ts2
 FROM foo;
 
@@ -2226,7 +2256,7 @@ SET TIME ZONE = '+00'
 query T
 SELECT arrow_typeof(time) FROM foo LIMIT 1
 ----
-Timestamp(Nanosecond, Some("+05:00"))
+Timestamp(ns, "+05:00")
 
 # check date_trunc
 query P
@@ -2241,27 +2271,27 @@ SELECT date_trunc('day', time) FROM foo
 query T
 SELECT arrow_typeof(date_trunc('day', time)) FROM foo LIMIT 1
 ----
-Timestamp(Nanosecond, Some("+05:00"))
+Timestamp(ns, "+05:00")
 
 query T
 select arrow_typeof(date_trunc('minute', to_timestamp_seconds(61)))
 ----
-Timestamp(Second, None)
+Timestamp(s)
 
 query T
 select arrow_typeof(date_trunc('second', to_timestamp_millis(61)))
 ----
-Timestamp(Millisecond, None)
+Timestamp(ms)
 
 query T
 select arrow_typeof(date_trunc('millisecond', to_timestamp_micros(61)))
 ----
-Timestamp(Microsecond, None)
+Timestamp(µs)
 
 query T
 select arrow_typeof(date_trunc('microsecond', to_timestamp(61)))
 ----
-Timestamp(Nanosecond, None)  
+Timestamp(ns)
 
 # check date_bin
 query P
@@ -2276,7 +2306,7 @@ SELECT date_bin(INTERVAL '1 day', time, '1970-01-01T00:00:00+05:00') FROM foo
 query T
 SELECT arrow_typeof(date_bin(INTERVAL '1 day', time, '1970-01-01T00:00:00+05:00')) FROM foo LIMIT 1
 ----
-Timestamp(Nanosecond, Some("+05:00"))
+Timestamp(ns, "+05:00")
 
 
 # timestamp comparison with and without timezone
@@ -2318,7 +2348,7 @@ true true true true true true true true true true true true true
 query TTT
 SELECT arrow_typeof(to_timestamp(1)), arrow_typeof(to_timestamp(null)), arrow_typeof(to_timestamp('2023-01-10 12:34:56.000'))
 ----
-Timestamp(Nanosecond, None) Timestamp(Nanosecond, None) Timestamp(Nanosecond, None)
+Timestamp(ns) Timestamp(ns) Timestamp(ns)
 
 # verify timestamp output types using timestamp literal syntax
 query BBBBBB
@@ -2354,7 +2384,7 @@ NULL 1970-01-01T00:00:00 2031-01-19T23:33:25 1970-01-01T00:00:01 1969-12-31T23:5
 query TTT
 SELECT arrow_typeof(to_timestamp(1, '%c', '%s')), arrow_typeof(to_timestamp(null, '%+', '%s')), arrow_typeof(to_timestamp('2023-01-10 12:34:56.000', '%Y-%m-%d %H:%M:%S%.f'))
 ----
-Timestamp(Nanosecond, None) Timestamp(Nanosecond, None) Timestamp(Nanosecond, None)
+Timestamp(ns) Timestamp(ns) Timestamp(ns)
 
 # to_timestamp with invalid formatting
 query error input contains invalid characters
@@ -2660,8 +2690,8 @@ SELECT t1.ts, t1.ts + INTERVAL '1' SECOND FROM t1;
 query PT
 SELECT t1.ts::timestamptz, arrow_typeof(t1.ts::timestamptz) FROM t1;
 ----
-2018-07-01T06:00:00Z Timestamp(Nanosecond, Some("+00"))
-2018-07-01T07:00:00Z Timestamp(Nanosecond, Some("+00"))
+2018-07-01T06:00:00Z Timestamp(ns, "+00")
+2018-07-01T07:00:00Z Timestamp(ns, "+00")
 
 query D
 SELECT 0::TIME
@@ -3251,7 +3281,7 @@ from (
   select '2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels' as time
 );
 ----
-2024-04-01T00:00:20+02:00 Timestamp(Nanosecond, Some("Europe/Brussels")) 2024-04-01T00:00:20 Timestamp(Nanosecond, None)
+2024-04-01T00:00:20+02:00 Timestamp(ns, "Europe/Brussels") 2024-04-01T00:00:20 Timestamp(ns)
 
 # use to_local_time() in date_bin()
 query P
@@ -3296,53 +3326,53 @@ from t;
 query PPT
 select column1, to_local_time(column1::timestamp), arrow_typeof(to_local_time(column1::timestamp)) from t_utc;
 ----
-NULL NULL Timestamp(Nanosecond, None)
-2024-01-01T00:00:01Z 2024-01-01T00:00:01 Timestamp(Nanosecond, None)
-2024-02-01T00:00:01Z 2024-02-01T00:00:01 Timestamp(Nanosecond, None)
-2024-03-01T00:00:01Z 2024-03-01T00:00:01 Timestamp(Nanosecond, None)
-2024-04-01T00:00:01Z 2024-04-01T00:00:01 Timestamp(Nanosecond, None)
-2024-05-01T00:00:01Z 2024-05-01T00:00:01 Timestamp(Nanosecond, None)
-2024-06-01T00:00:01Z 2024-06-01T00:00:01 Timestamp(Nanosecond, None)
-2024-07-01T00:00:01Z 2024-07-01T00:00:01 Timestamp(Nanosecond, None)
-2024-08-01T00:00:01Z 2024-08-01T00:00:01 Timestamp(Nanosecond, None)
-2024-09-01T00:00:01Z 2024-09-01T00:00:01 Timestamp(Nanosecond, None)
-2024-10-01T00:00:01Z 2024-10-01T00:00:01 Timestamp(Nanosecond, None)
-2024-11-01T00:00:01Z 2024-11-01T00:00:01 Timestamp(Nanosecond, None)
-2024-12-01T00:00:01Z 2024-12-01T00:00:01 Timestamp(Nanosecond, None)
+NULL NULL Timestamp(ns)
+2024-01-01T00:00:01Z 2024-01-01T00:00:01 Timestamp(ns)
+2024-02-01T00:00:01Z 2024-02-01T00:00:01 Timestamp(ns)
+2024-03-01T00:00:01Z 2024-03-01T00:00:01 Timestamp(ns)
+2024-04-01T00:00:01Z 2024-04-01T00:00:01 Timestamp(ns)
+2024-05-01T00:00:01Z 2024-05-01T00:00:01 Timestamp(ns)
+2024-06-01T00:00:01Z 2024-06-01T00:00:01 Timestamp(ns)
+2024-07-01T00:00:01Z 2024-07-01T00:00:01 Timestamp(ns)
+2024-08-01T00:00:01Z 2024-08-01T00:00:01 Timestamp(ns)
+2024-09-01T00:00:01Z 2024-09-01T00:00:01 Timestamp(ns)
+2024-10-01T00:00:01Z 2024-10-01T00:00:01 Timestamp(ns)
+2024-11-01T00:00:01Z 2024-11-01T00:00:01 Timestamp(ns)
+2024-12-01T00:00:01Z 2024-12-01T00:00:01 Timestamp(ns)
 
 query PPT
 select column1, to_local_time(column1), arrow_typeof(to_local_time(column1)) from t_utc;
 ----
-NULL NULL Timestamp(Nanosecond, None)
-2024-01-01T00:00:01Z 2024-01-01T00:00:01 Timestamp(Nanosecond, None)
-2024-02-01T00:00:01Z 2024-02-01T00:00:01 Timestamp(Nanosecond, None)
-2024-03-01T00:00:01Z 2024-03-01T00:00:01 Timestamp(Nanosecond, None)
-2024-04-01T00:00:01Z 2024-04-01T00:00:01 Timestamp(Nanosecond, None)
-2024-05-01T00:00:01Z 2024-05-01T00:00:01 Timestamp(Nanosecond, None)
-2024-06-01T00:00:01Z 2024-06-01T00:00:01 Timestamp(Nanosecond, None)
-2024-07-01T00:00:01Z 2024-07-01T00:00:01 Timestamp(Nanosecond, None)
-2024-08-01T00:00:01Z 2024-08-01T00:00:01 Timestamp(Nanosecond, None)
-2024-09-01T00:00:01Z 2024-09-01T00:00:01 Timestamp(Nanosecond, None)
-2024-10-01T00:00:01Z 2024-10-01T00:00:01 Timestamp(Nanosecond, None)
-2024-11-01T00:00:01Z 2024-11-01T00:00:01 Timestamp(Nanosecond, None)
-2024-12-01T00:00:01Z 2024-12-01T00:00:01 Timestamp(Nanosecond, None)
+NULL NULL Timestamp(ns)
+2024-01-01T00:00:01Z 2024-01-01T00:00:01 Timestamp(ns)
+2024-02-01T00:00:01Z 2024-02-01T00:00:01 Timestamp(ns)
+2024-03-01T00:00:01Z 2024-03-01T00:00:01 Timestamp(ns)
+2024-04-01T00:00:01Z 2024-04-01T00:00:01 Timestamp(ns)
+2024-05-01T00:00:01Z 2024-05-01T00:00:01 Timestamp(ns)
+2024-06-01T00:00:01Z 2024-06-01T00:00:01 Timestamp(ns)
+2024-07-01T00:00:01Z 2024-07-01T00:00:01 Timestamp(ns)
+2024-08-01T00:00:01Z 2024-08-01T00:00:01 Timestamp(ns)
+2024-09-01T00:00:01Z 2024-09-01T00:00:01 Timestamp(ns)
+2024-10-01T00:00:01Z 2024-10-01T00:00:01 Timestamp(ns)
+2024-11-01T00:00:01Z 2024-11-01T00:00:01 Timestamp(ns)
+2024-12-01T00:00:01Z 2024-12-01T00:00:01 Timestamp(ns)
 
 query PPT
 select column1, to_local_time(column1), arrow_typeof(to_local_time(column1)) from t_timezone;
 ----
-NULL NULL Timestamp(Nanosecond, None)
-2024-01-01T00:00:01+01:00 2024-01-01T00:00:01 Timestamp(Nanosecond, None)
-2024-02-01T00:00:01+01:00 2024-02-01T00:00:01 Timestamp(Nanosecond, None)
-2024-03-01T00:00:01+01:00 2024-03-01T00:00:01 Timestamp(Nanosecond, None)
-2024-04-01T00:00:01+02:00 2024-04-01T00:00:01 Timestamp(Nanosecond, None)
-2024-05-01T00:00:01+02:00 2024-05-01T00:00:01 Timestamp(Nanosecond, None)
-2024-06-01T00:00:01+02:00 2024-06-01T00:00:01 Timestamp(Nanosecond, None)
-2024-07-01T00:00:01+02:00 2024-07-01T00:00:01 Timestamp(Nanosecond, None)
-2024-08-01T00:00:01+02:00 2024-08-01T00:00:01 Timestamp(Nanosecond, None)
-2024-09-01T00:00:01+02:00 2024-09-01T00:00:01 Timestamp(Nanosecond, None)
-2024-10-01T00:00:01+02:00 2024-10-01T00:00:01 Timestamp(Nanosecond, None)
-2024-11-01T00:00:01+01:00 2024-11-01T00:00:01 Timestamp(Nanosecond, None)
-2024-12-01T00:00:01+01:00 2024-12-01T00:00:01 Timestamp(Nanosecond, None)
+NULL NULL Timestamp(ns)
+2024-01-01T00:00:01+01:00 2024-01-01T00:00:01 Timestamp(ns)
+2024-02-01T00:00:01+01:00 2024-02-01T00:00:01 Timestamp(ns)
+2024-03-01T00:00:01+01:00 2024-03-01T00:00:01 Timestamp(ns)
+2024-04-01T00:00:01+02:00 2024-04-01T00:00:01 Timestamp(ns)
+2024-05-01T00:00:01+02:00 2024-05-01T00:00:01 Timestamp(ns)
+2024-06-01T00:00:01+02:00 2024-06-01T00:00:01 Timestamp(ns)
+2024-07-01T00:00:01+02:00 2024-07-01T00:00:01 Timestamp(ns)
+2024-08-01T00:00:01+02:00 2024-08-01T00:00:01 Timestamp(ns)
+2024-09-01T00:00:01+02:00 2024-09-01T00:00:01 Timestamp(ns)
+2024-10-01T00:00:01+02:00 2024-10-01T00:00:01 Timestamp(ns)
+2024-11-01T00:00:01+01:00 2024-11-01T00:00:01 Timestamp(ns)
+2024-12-01T00:00:01+01:00 2024-12-01T00:00:01 Timestamp(ns)
 
 # combine to_local_time() with date_bin()
 query P
@@ -3637,7 +3667,7 @@ SELECT
     arrow_cast(a, 'LargeUtf8')
 FROM (SELECT TIMESTAMP '2005-09-10 13:31:00' AS a)
 ----
-Timestamp(Nanosecond, None) 2005-09-10T13:31:00 2005-09-10T13:31:00 2005-09-10T13:31:00 2005-09-10T13:31:00
+Timestamp(ns) 2005-09-10T13:31:00 2005-09-10T13:31:00 2005-09-10T13:31:00 2005-09-10T13:31:00
 
 query TTTTT
 SELECT
@@ -3648,4 +3678,4 @@ SELECT
     arrow_cast(a, 'LargeUtf8')
 FROM (SELECT CAST('2005-09-10 13:31:00 +02:00' AS timestamp with time zone) AS a)
 ----
-Timestamp(Nanosecond, Some("+00")) 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z
+Timestamp(ns, "+00") 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q11.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q11.slt.part
index a6225daae436..6b03d708c7fa 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q11.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q11.slt.part
@@ -75,51 +75,52 @@ logical_plan
 physical_plan
 01)SortExec: TopK(fetch=10), expr=[value@1 DESC], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[ps_partkey@0 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@1 as value]
-03)----NestedLoopJoinExec: join_type=Inner, filter=CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Decimal128(38, 15)) > sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@1, projection=[ps_partkey@0, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@1]
-04)------CoalescePartitionsExec
-05)--------AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
-06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
-08)--------------AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
-09)----------------CoalesceBatchesExec: target_batch_size=8192
-10)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@3, n_nationkey@0)], projection=[ps_partkey@0, ps_availqty@1, ps_supplycost@2]
-11)--------------------CoalesceBatchesExec: target_batch_size=8192
-12)----------------------RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4
-13)------------------------CoalesceBatchesExec: target_batch_size=8192
-14)--------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, s_suppkey@0)], projection=[ps_partkey@0, ps_availqty@2, ps_supplycost@3, s_nationkey@5]
-15)----------------------------CoalesceBatchesExec: target_batch_size=8192
-16)------------------------------RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4
-17)--------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost], file_type=csv, has_header=false
-18)----------------------------CoalesceBatchesExec: target_batch_size=8192
-19)------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-20)--------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-21)----------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
-22)--------------------CoalesceBatchesExec: target_batch_size=8192
-23)----------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-24)------------------------CoalesceBatchesExec: target_batch_size=8192
-25)--------------------------FilterExec: n_name@1 = GERMANY, projection=[n_nationkey@0]
-26)----------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-27)------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
-28)------ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)]
-29)--------AggregateExec: mode=Final, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
-30)----------CoalescePartitionsExec
-31)------------AggregateExec: mode=Partial, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
-32)--------------CoalesceBatchesExec: target_batch_size=8192
-33)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@2, n_nationkey@0)], projection=[ps_availqty@0, ps_supplycost@1]
-34)------------------CoalesceBatchesExec: target_batch_size=8192
-35)--------------------RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4
-36)----------------------CoalesceBatchesExec: target_batch_size=8192
-37)------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@0, s_suppkey@0)], projection=[ps_availqty@1, ps_supplycost@2, s_nationkey@4]
-38)--------------------------CoalesceBatchesExec: target_batch_size=8192
-39)----------------------------RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4
-40)------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost], file_type=csv, has_header=false
-41)--------------------------CoalesceBatchesExec: target_batch_size=8192
-42)----------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-43)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-44)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
-45)------------------CoalesceBatchesExec: target_batch_size=8192
-46)--------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-47)----------------------CoalesceBatchesExec: target_batch_size=8192
-48)------------------------FilterExec: n_name@1 = GERMANY, projection=[n_nationkey@0]
-49)--------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-50)----------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
+03)----NestedLoopJoinExec: join_type=Inner, filter=join_proj_push_down_1@1 > sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@0, projection=[ps_partkey@0, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@1, sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@3]
+04)------ProjectionExec: expr=[ps_partkey@0 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@1 as sum(partsupp.ps_supplycost * partsupp.ps_availqty), CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@1 AS Decimal128(38, 15)) as join_proj_push_down_1]
+05)--------CoalescePartitionsExec
+06)----------AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
+07)------------CoalesceBatchesExec: target_batch_size=8192
+08)--------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
+09)----------------AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
+10)------------------CoalesceBatchesExec: target_batch_size=8192
+11)--------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@3, n_nationkey@0)], projection=[ps_partkey@0, ps_availqty@1, ps_supplycost@2]
+12)----------------------CoalesceBatchesExec: target_batch_size=8192
+13)------------------------RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4
+14)--------------------------CoalesceBatchesExec: target_batch_size=8192
+15)----------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, s_suppkey@0)], projection=[ps_partkey@0, ps_availqty@2, ps_supplycost@3, s_nationkey@5]
+16)------------------------------CoalesceBatchesExec: target_batch_size=8192
+17)--------------------------------RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4
+18)----------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost], file_type=csv, has_header=false
+19)------------------------------CoalesceBatchesExec: target_batch_size=8192
+20)--------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
+21)----------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+22)------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
+23)----------------------CoalesceBatchesExec: target_batch_size=8192
+24)------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
+25)--------------------------CoalesceBatchesExec: target_batch_size=8192
+26)----------------------------FilterExec: n_name@1 = GERMANY, projection=[n_nationkey@0]
+27)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+28)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
+29)------ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)]
+30)--------AggregateExec: mode=Final, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
+31)----------CoalescePartitionsExec
+32)------------AggregateExec: mode=Partial, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
+33)--------------CoalesceBatchesExec: target_batch_size=8192
+34)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@2, n_nationkey@0)], projection=[ps_availqty@0, ps_supplycost@1]
+35)------------------CoalesceBatchesExec: target_batch_size=8192
+36)--------------------RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4
+37)----------------------CoalesceBatchesExec: target_batch_size=8192
+38)------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@0, s_suppkey@0)], projection=[ps_availqty@1, ps_supplycost@2, s_nationkey@4]
+39)--------------------------CoalesceBatchesExec: target_batch_size=8192
+40)----------------------------RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4
+41)------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost], file_type=csv, has_header=false
+42)--------------------------CoalesceBatchesExec: target_batch_size=8192
+43)----------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
+44)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+45)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
+46)------------------CoalesceBatchesExec: target_batch_size=8192
+47)--------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
+48)----------------------CoalesceBatchesExec: target_batch_size=8192
+49)------------------------FilterExec: n_name@1 = GERMANY, projection=[n_nationkey@0]
+50)--------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+51)----------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q22.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q22.slt.part
index fc9c01843cc7..22476156b80d 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q22.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q22.slt.part
@@ -83,23 +83,24 @@ physical_plan
 07)------------AggregateExec: mode=Partial, gby=[cntrycode@0 as cntrycode], aggr=[count(Int64(1)), sum(custsale.c_acctbal)]
 08)--------------ProjectionExec: expr=[substr(c_phone@0, 1, 2) as cntrycode, c_acctbal@1 as c_acctbal]
 09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-10)------------------NestedLoopJoinExec: join_type=Inner, filter=CAST(c_acctbal@0 AS Decimal128(19, 6)) > avg(customer.c_acctbal)@1
-11)--------------------CoalescePartitionsExec
-12)----------------------CoalesceBatchesExec: target_batch_size=8192
-13)------------------------HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2]
-14)--------------------------CoalesceBatchesExec: target_batch_size=8192
-15)----------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
-16)------------------------------CoalesceBatchesExec: target_batch_size=8192
-17)--------------------------------FilterExec: substr(c_phone@1, 1, 2) IN ([13, 31, 23, 29, 30, 18, 17])
-18)----------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-19)------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_phone, c_acctbal], file_type=csv, has_header=false
-20)--------------------------CoalesceBatchesExec: target_batch_size=8192
-21)----------------------------RepartitionExec: partitioning=Hash([o_custkey@0], 4), input_partitions=4
-22)------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_custkey], file_type=csv, has_header=false
-23)--------------------AggregateExec: mode=Final, gby=[], aggr=[avg(customer.c_acctbal)]
-24)----------------------CoalescePartitionsExec
-25)------------------------AggregateExec: mode=Partial, gby=[], aggr=[avg(customer.c_acctbal)]
-26)--------------------------CoalesceBatchesExec: target_batch_size=8192
-27)----------------------------FilterExec: c_acctbal@1 > Some(0),15,2 AND substr(c_phone@0, 1, 2) IN ([13, 31, 23, 29, 30, 18, 17]), projection=[c_acctbal@1]
-28)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-29)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_phone, c_acctbal], file_type=csv, has_header=false
+10)------------------NestedLoopJoinExec: join_type=Inner, filter=join_proj_push_down_1@1 > avg(customer.c_acctbal)@0, projection=[c_phone@0, c_acctbal@1, avg(customer.c_acctbal)@3]
+11)--------------------ProjectionExec: expr=[c_phone@0 as c_phone, c_acctbal@1 as c_acctbal, CAST(c_acctbal@1 AS Decimal128(19, 6)) as join_proj_push_down_1]
+12)----------------------CoalescePartitionsExec
+13)------------------------CoalesceBatchesExec: target_batch_size=8192
+14)--------------------------HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2]
+15)----------------------------CoalesceBatchesExec: target_batch_size=8192
+16)------------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
+17)--------------------------------CoalesceBatchesExec: target_batch_size=8192
+18)----------------------------------FilterExec: substr(c_phone@1, 1, 2) IN ([13, 31, 23, 29, 30, 18, 17])
+19)------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+20)--------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_phone, c_acctbal], file_type=csv, has_header=false
+21)----------------------------CoalesceBatchesExec: target_batch_size=8192
+22)------------------------------RepartitionExec: partitioning=Hash([o_custkey@0], 4), input_partitions=4
+23)--------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_custkey], file_type=csv, has_header=false
+24)--------------------AggregateExec: mode=Final, gby=[], aggr=[avg(customer.c_acctbal)]
+25)----------------------CoalescePartitionsExec
+26)------------------------AggregateExec: mode=Partial, gby=[], aggr=[avg(customer.c_acctbal)]
+27)--------------------------CoalesceBatchesExec: target_batch_size=8192
+28)----------------------------FilterExec: c_acctbal@1 > Some(0),15,2 AND substr(c_phone@0, 1, 2) IN ([13, 31, 23, 29, 30, 18, 17]), projection=[c_acctbal@1]
+29)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+30)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_phone, c_acctbal], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/type_coercion.slt b/datafusion/sqllogictest/test_files/type_coercion.slt
index 3175a0646b79..e3baa8fedcf6 100644
--- a/datafusion/sqllogictest/test_files/type_coercion.slt
+++ b/datafusion/sqllogictest/test_files/type_coercion.slt
@@ -47,7 +47,7 @@ query error DataFusion error: Error during planning: Cannot coerce arithmetic ex
 select interval '1 month' - '2023-05-01'::date;
 
 # interval - timestamp
-query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types
+query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(ns\) to valid types
 SELECT interval '1 month' - '2023-05-01 12:30:00'::timestamp;
 
 # dictionary(int32, utf8) -> utf8
diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt
index 1f7605d220c5..75db459b1881 100644
--- a/datafusion/sqllogictest/test_files/union.slt
+++ b/datafusion/sqllogictest/test_files/union.slt
@@ -521,7 +521,7 @@ physical_plan
 16)----ProjectionExec: expr=[1 as cnt]
 17)------PlaceholderRowExec
 18)----ProjectionExec: expr=[lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as cnt]
-19)------BoundedWindowAggExec: wdw=[lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+19)------BoundedWindowAggExec: wdw=[lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 20)--------ProjectionExec: expr=[1 as c1]
 21)----------PlaceholderRowExec
 
diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt
index 67b3a7cf5666..50121813133b 100644
--- a/datafusion/sqllogictest/test_files/unnest.slt
+++ b/datafusion/sqllogictest/test_files/unnest.slt
@@ -863,11 +863,11 @@ select count(*) from (select unnest(range(0, 100000)) id) t inner join (select u
 # Test implicit LATERAL support for UNNEST
 # Issue: https://github.com/apache/datafusion/issues/13659
 # TODO: https://github.com/apache/datafusion/issues/10048
-query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "column1", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}, Column \{ relation: Some\(Bare \{ table: "u" \}\), name: "column1" \}\)
+query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "column1", data_type: List\(Field \{ data_type: Int64, nullable: true \}\), nullable: true \}, Column \{ relation: Some\(Bare \{ table: "u" \}\), name: "column1" \}\)
 select * from unnest_table u, unnest(u.column1);
 
 # Test implicit LATERAL support for UNNEST (INNER JOIN)
-query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "column1", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}, Column \{ relation: Some\(Bare \{ table: "u" \}\), name: "column1" \}\)
+query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "column1", data_type: List\(Field \{ data_type: Int64, nullable: true \}\), nullable: true \}, Column \{ relation: Some\(Bare \{ table: "u" \}\), name: "column1" \}\)
 select * from unnest_table u INNER JOIN unnest(u.column1) AS t(column1) ON u.column3 = t.column1;
 
 # Test implicit LATERAL planning for UNNEST
@@ -883,7 +883,7 @@ logical_plan
 06)------Unnest: lists[__unnest_placeholder(outer_ref(u.column1))|depth=1] structs[]
 07)--------Projection: outer_ref(u.column1) AS __unnest_placeholder(outer_ref(u.column1))
 08)----------EmptyRelation: rows=1
-physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "column1", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Column { relation: Some(Bare { table: "u" }), name: "column1" })
+physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "column1", data_type: List(Field { data_type: Int64, nullable: true }), nullable: true }, Column { relation: Some(Bare { table: "u" }), name: "column1" })
 
 # Test implicit LATERAL planning for UNNEST (INNER JOIN)
 query TT
@@ -899,7 +899,7 @@ logical_plan
 07)--------Unnest: lists[__unnest_placeholder(outer_ref(u.column1))|depth=1] structs[]
 08)----------Projection: outer_ref(u.column1) AS __unnest_placeholder(outer_ref(u.column1))
 09)------------EmptyRelation: rows=1
-physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "column1", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Column { relation: Some(Bare { table: "u" }), name: "column1" })
+physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "column1", data_type: List(Field { data_type: Int64, nullable: true }), nullable: true }, Column { relation: Some(Bare { table: "u" }), name: "column1" })
 
 # uncorrelated EXISTS with unnest
 query I
@@ -941,3 +941,275 @@ where min_height * width1 = (
 )
 ----
 4 7 4 28
+
+## Unnest with ordering on unrelated column is preserved
+query TT
+EXPLAIN WITH unnested AS (SELECT
+    ROW_NUMBER() OVER () AS generated_id,
+    unnest(array[value]) as ar
+  FROM range(1,5)) SELECT array_agg(ar) FROM unnested group by generated_id;
+----
+logical_plan
+01)Projection: array_agg(unnested.ar)
+02)--Aggregate: groupBy=[[unnested.generated_id]], aggr=[[array_agg(unnested.ar)]]
+03)----SubqueryAlias: unnested
+04)------Projection: generated_id, __unnest_placeholder(make_array(range().value),depth=1) AS UNNEST(make_array(range().value)) AS ar
+05)--------Unnest: lists[__unnest_placeholder(make_array(range().value))|depth=1] structs[]
+06)----------Projection: row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS generated_id, make_array(range().value) AS __unnest_placeholder(make_array(range().value))
+07)------------WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+08)--------------TableScan: range() projection=[value]
+physical_plan
+01)ProjectionExec: expr=[array_agg(unnested.ar)@1 as array_agg(unnested.ar)]
+02)--AggregateExec: mode=FinalPartitioned, gby=[generated_id@0 as generated_id], aggr=[array_agg(unnested.ar)], ordering_mode=Sorted
+03)----SortExec: expr=[generated_id@0 ASC NULLS LAST], preserve_partitioning=[true]
+04)------CoalesceBatchesExec: target_batch_size=8192
+05)--------RepartitionExec: partitioning=Hash([generated_id@0], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[generated_id@0 as generated_id], aggr=[array_agg(unnested.ar)], ordering_mode=Sorted
+07)------------ProjectionExec: expr=[generated_id@0 as generated_id, __unnest_placeholder(make_array(range().value),depth=1)@1 as ar]
+08)--------------UnnestExec
+09)----------------ProjectionExec: expr=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as generated_id, make_array(value@0) as __unnest_placeholder(make_array(range().value))]
+10)------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+11)--------------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+12)----------------------LazyMemoryExec: partitions=1, batch_generators=[range: start=1, end=5, batch_size=8192]
+
+# Unnest array where data is already ordered by column2 (100, 200, 300, 400)
+statement ok
+COPY (
+  SELECT * FROM VALUES
+    ([1,2,3], 100),
+    ([3],     200),
+    ([],      300),
+    ([3,1],   400)
+  ORDER BY column2
+ ) TO 'test_files/scratch/unnest/ordered_array.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE t
+STORED AS PARQUET
+LOCATION 'test_files/scratch/unnest/ordered_array.parquet'
+WITH ORDER (column2)
+
+query ?I
+SELECT * FROM t;
+----
+[1, 2, 3] 100
+[3] 200
+[] 300
+[3, 1] 400
+
+# Data is sorted on column2 already, so no need to sort again
+query II
+SELECT UNNEST(column1), column2 FROM t ORDER BY column2;
+----
+1 100
+2 100
+3 100
+3 200
+3 400
+1 400
+
+# Explain should not have a SortExec
+query TT
+EXPLAIN SELECT UNNEST(column1), column2 FROM t ORDER BY column2;
+----
+logical_plan
+01)Sort: t.column2 ASC NULLS LAST
+02)--Projection: __unnest_placeholder(t.column1,depth=1) AS UNNEST(t.column1), t.column2
+03)----Unnest: lists[__unnest_placeholder(t.column1)|depth=1] structs[]
+04)------Projection: t.column1 AS __unnest_placeholder(t.column1), t.column2
+05)--------TableScan: t projection=[column1, column2]
+physical_plan
+01)ProjectionExec: expr=[__unnest_placeholder(t.column1,depth=1)@0 as UNNEST(t.column1), column2@1 as column2]
+02)--UnnestExec
+03)----ProjectionExec: expr=[column1@0 as __unnest_placeholder(t.column1), column2@1 as column2]
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_array.parquet]]}, projection=[column1, column2], output_ordering=[column2@1 ASC NULLS LAST], file_type=parquet
+
+# Explain should have a SortExec at the top because we order by the output of the unnest (i.e. discarding the ordering)
+query TT
+EXPLAIN SELECT UNNEST(column1) as unnested, column2 FROM t ORDER BY 1;
+----
+logical_plan
+01)Sort: unnested ASC NULLS LAST
+02)--Projection: __unnest_placeholder(t.column1,depth=1) AS UNNEST(t.column1) AS unnested, t.column2
+03)----Unnest: lists[__unnest_placeholder(t.column1)|depth=1] structs[]
+04)------Projection: t.column1 AS __unnest_placeholder(t.column1), t.column2
+05)--------TableScan: t projection=[column1, column2]
+physical_plan
+01)SortExec: expr=[unnested@0 ASC NULLS LAST], preserve_partitioning=[false]
+02)--ProjectionExec: expr=[__unnest_placeholder(t.column1,depth=1)@0 as unnested, column2@1 as column2]
+03)----UnnestExec
+04)------ProjectionExec: expr=[column1@0 as __unnest_placeholder(t.column1), column2@1 as column2]
+05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_array.parquet]]}, projection=[column1, column2], output_ordering=[column2@1 ASC NULLS LAST], file_type=parquet
+
+# cleanup
+statement ok
+drop table t;
+
+# Unnest tuple where the data is already sorted by column 1
+statement ok
+COPY (
+  SELECT * FROM VALUES
+    (100, [3,2,1], 'a'),
+    (200, [1,2,3], 'b'),
+    (300, [3,1,2], 'c')
+  ORDER BY column1
+ ) TO 'test_files/scratch/unnest/ordered_tuples.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE t
+STORED AS PARQUET
+LOCATION 'test_files/scratch/unnest/ordered_tuples.parquet'
+WITH ORDER (column1)
+
+query I?T
+SELECT * FROM t;
+----
+100 [3, 2, 1] a
+200 [1, 2, 3] b
+300 [3, 1, 2] c
+
+# Put the columns in a tuple and unnest, we need to sort because we discard ordering of unnested columns
+query TT
+EXPLAIN WITH unnested AS (
+  SELECT unnest((column1, column2, column3))
+  FROM t
+) SELECT * FROM unnested order by 1;
+----
+logical_plan
+01)Sort: unnested.__unnest_placeholder(struct(t.column1,t.column2,t.column3)).c0 ASC NULLS LAST
+02)--SubqueryAlias: unnested
+03)----Unnest: lists[] structs[__unnest_placeholder(struct(t.column1,t.column2,t.column3))]
+04)------Projection: struct(t.column1, t.column2, t.column3) AS __unnest_placeholder(struct(t.column1,t.column2,t.column3))
+05)--------TableScan: t projection=[column1, column2, column3]
+physical_plan
+01)SortExec: expr=[__unnest_placeholder(struct(t.column1,t.column2,t.column3)).c0@0 ASC NULLS LAST], preserve_partitioning=[false]
+02)--UnnestExec
+03)----ProjectionExec: expr=[struct(column1@0, column2@1, column3@2) as __unnest_placeholder(struct(t.column1,t.column2,t.column3))]
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_tuples.parquet]]}, projection=[column1, column2, column3], output_ordering=[column1@0 ASC NULLS LAST], file_type=parquet
+
+# cleanup
+statement ok
+drop table t;
+
+# Unnest struct where data is already ordered by column2 (100, 200, 300, 400)
+statement ok
+COPY (
+  SELECT * FROM VALUES
+    (named_struct('s1', 1, 's2', 2, 's3', 3), 100),
+    (named_struct('s1', 1, 's2', 3, 's3', 2), 200),
+    (named_struct('s1', 2, 's2', 1, 's3', 3), 300),
+    (named_struct('s1', 3, 's2', 2, 's3', 1), 400)
+  ORDER BY column2
+ ) TO 'test_files/scratch/unnest/ordered_struct.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE t
+STORED AS PARQUET
+LOCATION 'test_files/scratch/unnest/ordered_struct.parquet'
+WITH ORDER (column2)
+
+query ?I
+SELECT * FROM t;
+----
+{s1: 1, s2: 2, s3: 3} 100
+{s1: 1, s2: 3, s3: 2} 200
+{s1: 2, s2: 1, s3: 3} 300
+{s1: 3, s2: 2, s3: 1} 400
+
+# data is sorted on column2 already, so no need to sort again
+query IIII
+SELECT UNNEST(column1), column2 FROM t ORDER BY column2;
+----
+1 2 3 100
+1 3 2 200
+2 1 3 300
+3 2 1 400
+
+# Explain should not have a SortExec
+query TT
+EXPLAIN SELECT UNNEST(column1), column2 FROM t ORDER BY column2;
+----
+logical_plan
+01)Sort: t.column2 ASC NULLS LAST
+02)--Unnest: lists[] structs[__unnest_placeholder(t.column1)]
+03)----Projection: t.column1 AS __unnest_placeholder(t.column1), t.column2
+04)------TableScan: t projection=[column1, column2]
+physical_plan
+01)UnnestExec
+02)--ProjectionExec: expr=[column1@0 as __unnest_placeholder(t.column1), column2@1 as column2]
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_struct.parquet]]}, projection=[column1, column2], output_ordering=[column2@1 ASC NULLS LAST], file_type=parquet
+
+# cleanup
+statement ok
+drop table t;
+
+# Unnest nested array (unnesting twice), struct, and array, where data is already ordered by column4 (100, 200, 300, 400)
+statement ok
+COPY (
+  SELECT * FROM VALUES
+    ([[1],[2],[3]], [1,2,3], named_struct('s1', 1, 's2', 2, 's3', 3),  100),
+    ([[1],[3],[2]], [3],     named_struct('s1', 1, 's2', 3, 's3', 2),  200),
+    ([[2],[1],[3]], [],      named_struct('s1', 2, 's2', 1, 's3', 3),  300),
+    ([[3],[2],[1]], [3,1],   named_struct('s1', 3, 's2', 2, 's3', 1),  400)
+  ORDER BY column4
+ ) TO 'test_files/scratch/unnest/ordered_struct_arrays.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE t
+STORED AS PARQUET
+LOCATION 'test_files/scratch/unnest/ordered_struct_arrays.parquet'
+WITH ORDER (column4)
+
+query ???I
+SELECT * FROM t;
+----
+[[1], [2], [3]] [1, 2, 3] {s1: 1, s2: 2, s3: 3} 100
+[[1], [3], [2]] [3] {s1: 1, s2: 3, s3: 2} 200
+[[2], [1], [3]] [] {s1: 2, s2: 1, s3: 3} 300
+[[3], [2], [1]] [3, 1] {s1: 3, s2: 2, s3: 1} 400
+
+# data is sorted on column4 already, so no need to sort again
+query IIIIII
+SELECT UNNEST(UNNEST(column1)), UNNEST(column2), UNNEST(column3), column4 FROM t ORDER BY column4;
+----
+1 1 1 2 3 100
+NULL 2 1 2 3 100
+NULL 3 1 2 3 100
+2 1 1 2 3 100
+NULL 2 1 2 3 100
+NULL 3 1 2 3 100
+3 1 1 2 3 100
+NULL 2 1 2 3 100
+NULL 3 1 2 3 100
+1 3 1 3 2 200
+3 3 1 3 2 200
+2 3 1 3 2 200
+2 NULL 2 1 3 300
+1 NULL 2 1 3 300
+3 NULL 2 1 3 300
+3 3 3 2 1 400
+NULL 1 3 2 1 400
+2 3 3 2 1 400
+NULL 1 3 2 1 400
+1 3 3 2 1 400
+NULL 1 3 2 1 400
+
+# Explain should not have a SortExec
+query TT
+EXPLAIN SELECT UNNEST(UNNEST(column1)), UNNEST(column2), UNNEST(column3), column4 FROM t ORDER BY column4;
+----
+logical_plan
+01)Sort: t.column4 ASC NULLS LAST
+02)--Projection: __unnest_placeholder(t.column1,depth=2) AS UNNEST(UNNEST(t.column1)), __unnest_placeholder(t.column2,depth=1) AS UNNEST(t.column2), __unnest_placeholder(t.column3).s1, __unnest_placeholder(t.column3).s2, __unnest_placeholder(t.column3).s3, t.column4
+03)----Unnest: lists[__unnest_placeholder(t.column1)|depth=2, __unnest_placeholder(t.column2)|depth=1] structs[__unnest_placeholder(t.column3)]
+04)------Projection: t.column1 AS __unnest_placeholder(t.column1), t.column2 AS __unnest_placeholder(t.column2), t.column3 AS __unnest_placeholder(t.column3), t.column4
+05)--------TableScan: t projection=[column1, column2, column3, column4]
+physical_plan
+01)ProjectionExec: expr=[__unnest_placeholder(t.column1,depth=2)@0 as UNNEST(UNNEST(t.column1)), __unnest_placeholder(t.column2,depth=1)@1 as UNNEST(t.column2), __unnest_placeholder(t.column3).s1@2 as __unnest_placeholder(t.column3).s1, __unnest_placeholder(t.column3).s2@3 as __unnest_placeholder(t.column3).s2, __unnest_placeholder(t.column3).s3@4 as __unnest_placeholder(t.column3).s3, column4@5 as column4]
+02)--UnnestExec
+03)----ProjectionExec: expr=[column1@0 as __unnest_placeholder(t.column1), column2@1 as __unnest_placeholder(t.column2), column3@2 as __unnest_placeholder(t.column3), column4@3 as column4]
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_struct_arrays.parquet]]}, projection=[column1, column2, column3, column4], output_ordering=[column4@3 ASC NULLS LAST], file_type=parquet
+
+# cleanup
+statement ok
+drop table t;
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index f1a708d84dd3..d9b4a818f99e 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -360,7 +360,7 @@ physical_plan
 02)--ProjectionExec: expr=[b@0 as b, max(d.a)@1 as max_a, max(d.seq)@2 as max(d.seq)]
 03)----AggregateExec: mode=SinglePartitioned, gby=[b@2 as b], aggr=[max(d.a), max(d.seq)], ordering_mode=Sorted
 04)------ProjectionExec: expr=[row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as seq, a@0 as a, b@1 as b]
-05)--------BoundedWindowAggExec: wdw=[row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 06)----------SortExec: expr=[b@1 ASC NULLS LAST, a@0 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=8192
 08)--------------RepartitionExec: partitioning=Hash([b@1], 4), input_partitions=4
@@ -1241,9 +1241,9 @@ logical_plan
 05)--------TableScan: aggregate_test_100 projection=[c8, c9]
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as sum2]
-02)--BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 03)----ProjectionExec: expr=[c9@1 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------SortExec: expr=[c9@1 ASC NULLS LAST, c8@0 ASC NULLS LAST], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c8, c9], file_type=csv, has_header=true
 
@@ -1262,9 +1262,9 @@ logical_plan
 05)--------TableScan: aggregate_test_100 projection=[c2, c9]
 physical_plan
 01)ProjectionExec: expr=[c2@0 as c2, max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@4 as sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-02)--WindowAggExec: wdw=[sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-03)----BoundedWindowAggExec: wdw=[max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+02)--WindowAggExec: wdw=[sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+03)----BoundedWindowAggExec: wdw=[max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------SortExec: expr=[c2@0 ASC NULLS LAST, c9@1 ASC NULLS LAST], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c9], file_type=csv, has_header=true
 
@@ -1286,10 +1286,10 @@ logical_plan
 physical_plan
 01)SortExec: expr=[c2@0 ASC NULLS LAST], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[c2@0 as c2, max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@4 as sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-03)----WindowAggExec: wdw=[sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-04)------BoundedWindowAggExec: wdw=[max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----WindowAggExec: wdw=[sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+04)------BoundedWindowAggExec: wdw=[max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------SortExec: expr=[c9@1 ASC NULLS LAST, c2@0 ASC NULLS LAST], preserve_partitioning=[false]
-06)----------BoundedWindowAggExec: wdw=[min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+06)----------BoundedWindowAggExec: wdw=[min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 07)------------SortExec: expr=[c2@0 ASC NULLS LAST, c9@1 ASC NULLS LAST], preserve_partitioning=[false]
 08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c9], file_type=csv, has_header=true
 
@@ -1311,12 +1311,12 @@ logical_plan
 05)--------TableScan: aggregate_test_100 projection=[c1, c2, c4]
 physical_plan
 01)ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@2 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING]
-02)--BoundedWindowAggExec: wdw=[count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 03)----SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 04)------CoalesceBatchesExec: target_batch_size=4096
 05)--------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
 06)----------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING]
-07)------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+07)------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 08)--------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 09)----------------CoalesceBatchesExec: target_batch_size=4096
 10)------------------RepartitionExec: partitioning=Hash([c1@0, c2@1], 2), input_partitions=2
@@ -1343,8 +1343,8 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as sum2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 05)--------SortExec: TopK(fetch=10), expr=[c9@0 DESC], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -1386,8 +1386,8 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as sum2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 05)--------SortExec: TopK(fetch=10), expr=[c9@0 DESC], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -1446,8 +1446,8 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as sum2]
 02)--GlobalLimitExec: skip=5, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 05)--------SortExec: TopK(fetch=15), expr=[c9@0 DESC], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -1488,8 +1488,8 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as fv1, first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as fv2, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as lag1, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@2 as lag2, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as lead1, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@3 as lead2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 05)--------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -1531,9 +1531,9 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as rn1, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as rn2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 04)------SortExec: TopK(fetch=10), expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false]
-05)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 06)----------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -1573,10 +1573,10 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@2 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@3 as sum2, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as rn2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 04)------SortExec: TopK(fetch=10), expr=[c9@2 ASC NULLS LAST, c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[false]
-05)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
-06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 07)------------SortExec: expr=[c9@2 DESC, c1@0 DESC], preserve_partitioning=[false]
 08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c9], file_type=csv, has_header=true
 
@@ -1655,19 +1655,19 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@18 as a, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@18 as b, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@3 as c, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@11 as d, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@7 as e, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@3 as f, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@11 as g, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@19 as h, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as i, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@12 as j, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as k, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@8 as l, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@17 as m, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@15 as n, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as o, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@16 as p, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@20 as a1, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@20 as b1, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@5 as c1, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@13 as d1, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@9 as e1, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@5 as f1, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@13 as g1, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@19 as h1, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@19 as j1, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as k1, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@12 as l1, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@8 as m1, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as n1, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@12 as o1, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@21 as h11, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@21 as j11, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@6 as k11, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@14 as l11, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@10 as m11, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@6 as n11, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@14 as o11]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }]
+03)----WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }]
 04)------ProjectionExec: expr=[c1@0 as c1, c3@2 as c3, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@4 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@6 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@7 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@8 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@9 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@10 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@11 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@12 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@13 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@14 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@15 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@16 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@17 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@18 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-05)--------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 06)----------SortExec: expr=[c3@2 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[false]
-07)------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+07)------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 08)--------------SortExec: expr=[c3@2 ASC NULLS LAST, c1@0 ASC], preserve_partitioning=[false]
-09)----------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+09)----------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 10)------------------SortExec: expr=[c3@2 ASC NULLS LAST, c1@0 DESC], preserve_partitioning=[false]
-11)--------------------WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(11)), end_bound: Following(Int64(10)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(11)), end_bound: Following(Int64(NULL)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }]
-12)----------------------WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }]
+11)--------------------WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(11)), end_bound: Following(Int64(10)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(11)), end_bound: Following(Int64(NULL)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }]
+12)----------------------WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }]
 13)------------------------SortExec: expr=[c3@2 DESC NULLS LAST], preserve_partitioning=[false]
-14)--------------------------WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }]
-15)----------------------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+14)--------------------------WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }]
+15)----------------------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 16)------------------------------SortExec: expr=[c3@2 DESC, c1@0 ASC NULLS LAST], preserve_partitioning=[false]
 17)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/null_cases.csv]]}, projection=[c1, c2, c3], file_type=csv, has_header=true
 
@@ -1741,8 +1741,8 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@1 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@3 as sum2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 05)--------SortExec: TopK(fetch=10), expr=[c1@0 ASC NULLS LAST, c9@1 DESC], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], file_type=csv, has_header=true
 
@@ -1785,8 +1785,8 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@1 as c9, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@3 as sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as sum2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 05)--------SortExec: TopK(fetch=10), expr=[c1@0 ASC NULLS LAST, c9@1 DESC], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], file_type=csv, has_header=true
 
@@ -1831,9 +1831,9 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c3@1 as c3, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----WindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int16(NULL)), is_causal: false }]
+03)----WindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int16(NULL)), is_causal: false }]
 04)------ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, c3@2 as c3, c9@3 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 06)----------SortPreservingMergeExec: [__common_expr_1@0 DESC, c9@3 DESC, c2@1 ASC NULLS LAST]
 07)------------SortExec: expr=[__common_expr_1@0 DESC, c9@3 DESC, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 08)--------------ProjectionExec: expr=[c3@1 + c4@2 as __common_expr_1, c2@0 as c2, c3@1 as c3, c9@3 as c9]
@@ -1926,13 +1926,13 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [c3@0 ASC NULLS LAST], fetch=5
 02)--ProjectionExec: expr=[c3@0 as c3, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum2]
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c3@0 ASC NULLS LAST, c9@1 DESC], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=4096
 06)----------RepartitionExec: partitioning=Hash([c3@0], 2), input_partitions=2
 07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
 08)--------------ProjectionExec: expr=[c3@1 as c3, c9@2 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-09)----------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+09)----------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 10)------------------SortExec: expr=[c3@1 DESC, c9@2 DESC, c2@0 ASC NULLS LAST], preserve_partitioning=[false]
 11)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c3, c9], file_type=csv, has_header=true
 
@@ -1968,7 +1968,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST]
 02)--ProjectionExec: expr=[c1@0 as c1, row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as rn1]
-03)----BoundedWindowAggExec: wdw=[row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 04)------SortExec: expr=[c1@0 ASC NULLS LAST], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=4096
 06)----------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
@@ -2097,7 +2097,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, rn1@1 ASC NULLS LAST]
 02)--ProjectionExec: expr=[c1@0 as c1, row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as rn1]
-03)----BoundedWindowAggExec: wdw=[row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 04)------SortExec: expr=[c1@0 ASC NULLS LAST], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=4096
 06)----------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
@@ -2123,10 +2123,10 @@ logical_plan
 physical_plan
 01)SortExec: expr=[c1@0 ASC NULLS LAST], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[c1@0 as c1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING@2 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@3 as sum2]
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 04)------SortPreservingMergeExec: [c9@1 ASC NULLS LAST]
 05)--------SortExec: expr=[c9@1 ASC NULLS LAST], preserve_partitioning=[true]
-06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING], mode=[Sorted]
+06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING: Field { "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING], mode=[Sorted]
 07)------------SortExec: expr=[c1@0 ASC NULLS LAST, c9@1 ASC NULLS LAST], preserve_partitioning=[true]
 08)--------------CoalesceBatchesExec: target_batch_size=4096
 09)----------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
@@ -2211,11 +2211,11 @@ logical_plan
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[c9@2 as c9, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@6 as sum2, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum3, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@5 as sum4]
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 04)------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c9@3 as c9, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@4 as sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@6 as sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]
-05)--------WindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
-07)------------WindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+05)--------WindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+07)------------WindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 08)--------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, c9@3 ASC NULLS LAST, c8@2 ASC NULLS LAST], preserve_partitioning=[false]
 09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c8, c9], file_type=csv, has_header=true
 
@@ -2266,12 +2266,12 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@1 as c9, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as sum1, sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@6 as sum2, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum3, sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@5 as sum4]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 04)------ProjectionExec: expr=[c2@0 as c2, c9@2 as c9, c1_alias@3 as c1_alias, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@4 as sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@6 as sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]
-05)--------WindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+05)--------WindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 06)----------ProjectionExec: expr=[c2@1 as c2, c8@2 as c8, c9@3 as c9, c1_alias@4 as c1_alias, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@5 as sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@6 as sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING]
-07)------------BoundedWindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
-08)--------------WindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+07)------------BoundedWindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+08)--------------WindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 09)----------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, c9@3 ASC NULLS LAST, c8@2 ASC NULLS LAST], preserve_partitioning=[false]
 10)------------------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c8@2 as c8, c9@3 as c9, c1@0 as c1_alias]
 11)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c8, c9], file_type=csv, has_header=true
@@ -2312,9 +2312,9 @@ physical_plan
 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2]
 02)--SortExec: TopK(fetch=5), expr=[c9@2 ASC NULLS LAST], preserve_partitioning=[false]
 03)----ProjectionExec: expr=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum1, sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING@4 as sum2, c9@1 as c9]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING: Field { name: "sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING: Field { "sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING": nullable Float64 }, frame: GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING], mode=[Sorted]
 05)--------ProjectionExec: expr=[c1@0 as c1, c9@2 as c9, c12@3 as c12, sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING]
-06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Float64 }, frame: GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 07)------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[false]
 08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c9, c12], file_type=csv, has_header=true
 
@@ -2348,7 +2348,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -2385,7 +2385,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -2422,7 +2422,7 @@ logical_plan
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[rn1@1 DESC], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
-03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -2462,7 +2462,7 @@ logical_plan
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[rn1@1 ASC NULLS LAST, c9@0 ASC NULLS LAST], preserve_partitioning=[false], sort_prefix=[rn1@1 ASC NULLS LAST]
 02)--ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
-03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -2537,7 +2537,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -2559,7 +2559,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c5@0 as c5, c9@1 as c9, row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rn1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[CAST(c9@1 AS Decimal128(20, 0)) + CAST(c5@0 AS Decimal128(20, 0)) DESC], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c5, c9], file_type=csv, has_header=true
 
@@ -2580,7 +2580,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, CAST(row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 AS Int64) as rn1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -2685,10 +2685,10 @@ physical_plan
 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2, sum3@2 as sum3, min1@3 as min1, min2@4 as min2, min3@5 as min3, max1@6 as max1, max2@7 as max2, max3@8 as max3, cnt1@9 as cnt1, cnt2@10 as cnt2, sumr1@11 as sumr1, sumr2@12 as sumr2, sumr3@13 as sumr3, minr1@14 as minr1, minr2@15 as minr2, minr3@16 as minr3, maxr1@17 as maxr1, maxr2@18 as maxr2, maxr3@19 as maxr3, cntr1@20 as cntr1, cntr2@21 as cntr2, sum4@22 as sum4, cnt3@23 as cnt3]
 02)--SortExec: TopK(fetch=5), expr=[inc_col@24 DESC], preserve_partitioning=[false]
 03)----ProjectionExec: expr=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@13 as sum1, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@14 as sum2, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@15 as sum3, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@16 as min1, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@17 as min2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as min3, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as max1, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@20 as max2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@21 as max3, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING@22 as cnt1, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@23 as cnt2, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING@2 as sumr1, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING@3 as sumr2, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as sumr3, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@5 as minr1, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@6 as minr2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@7 as minr3, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@8 as maxr1, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@9 as maxr2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@10 as maxr3, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@11 as cntr1, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@12 as cntr2, sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@24 as sum4, count(Int64(1)) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@25 as cnt3, inc_col@1 as inc_col]
-04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING, count(Int64(1)) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING, count(Int64(1)) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 05)--------ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, inc_col@3 as inc_col, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING@5 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING@6 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@7 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@8 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@9 as min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@10 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@11 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@12 as max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@13 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@14 as count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@15 as count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@16 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@17 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@20 as min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@21 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@22 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@23 as max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@24 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING@25 as count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@26 as count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING]
-06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { name: "min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { name: "max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING: Field { name: "count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING: Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 4 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING: Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 8 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { name: "min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { name: "max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 5 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Field { name: "count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 2 PRECEDING AND 6 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 8 FOLLOWING], mode=[Sorted]
+06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { "min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { "max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING: Field { "count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING": Int64 }, frame: RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING: Field { "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN 4 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING: Field { "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN 8 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { "min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { "max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 5 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Field { "count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING": Int64 }, frame: RANGE BETWEEN 2 PRECEDING AND 6 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 8 FOLLOWING], mode=[Sorted]
 08)--------------ProjectionExec: expr=[CAST(desc_col@2 AS Int64) as __common_expr_1, CAST(inc_col@1 AS Int64) as __common_expr_2, ts@0 as ts, inc_col@1 as inc_col, desc_col@2 as desc_col]
 09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col, desc_col], output_ordering=[ts@0 ASC NULLS LAST], file_type=csv, has_header=true
 
@@ -2771,8 +2771,8 @@ logical_plan
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[ts@0 DESC], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[ts@0 as ts, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@10 as fv1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@11 as fv2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@12 as lv1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@13 as lv2, nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@14 as nv1, nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@15 as nv2, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@16 as rn1, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@17 as rn2, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as rank1, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as rank2, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@20 as dense_rank1, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@21 as dense_rank2, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@22 as lag1, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@23 as lag2, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@24 as lead1, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@25 as lead2, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@2 as fvr1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@3 as fvr2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@4 as lvr1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@5 as lvr2, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@6 as lagr1, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@7 as lagr2, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@8 as leadr1, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@9 as leadr2]
-03)----BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING": UInt64 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": UInt64 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING": UInt64 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": UInt64 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING": UInt64 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": UInt64 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING], mode=[Sorted]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 query IIIIIIIIIIIIIIIIIIIIIIIII
@@ -2843,8 +2843,8 @@ physical_plan
 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2, min1@2 as min1, min2@3 as min2, max1@4 as max1, max2@5 as max2, count1@6 as count1, count2@7 as count2, avg1@8 as avg1, avg2@9 as avg2]
 02)--SortExec: TopK(fetch=5), expr=[inc_col@10 ASC NULLS LAST], preserve_partitioning=[false]
 03)----ProjectionExec: expr=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@9 as sum1, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as sum2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@10 as min1, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@5 as min2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@11 as max1, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@6 as max2, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@12 as count1, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@7 as count2, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@13 as avg1, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@8 as avg2, inc_col@3 as inc_col]
-04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { name: "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { name: "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING], mode=[Sorted]
-05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING": nullable Float64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Float64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted]
 06)----------ProjectionExec: expr=[CAST(inc_col@1 AS Int64) as __common_expr_1, CAST(inc_col@1 AS Float64) as __common_expr_2, ts@0 as ts, inc_col@1 as inc_col]
 07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], file_type=csv, has_header=true
 
@@ -2895,8 +2895,8 @@ physical_plan
 01)ProjectionExec: expr=[first_value1@0 as first_value1, first_value2@1 as first_value2, last_value1@2 as last_value1, last_value2@3 as last_value2, nth_value1@4 as nth_value1]
 02)--SortExec: TopK(fetch=5), expr=[inc_col@5 ASC NULLS LAST], preserve_partitioning=[false]
 03)----ProjectionExec: expr=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@4 as first_value1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@2 as first_value2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@5 as last_value1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@3 as last_value2, nth_value(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@6 as nth_value1, inc_col@1 as inc_col]
-04)------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, nth_value(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "nth_value(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-05)--------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, nth_value(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { "nth_value(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 query IIIII
@@ -2939,8 +2939,8 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@5 as sum1, sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum2, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@6 as count1, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as count2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted]
 05)--------ProjectionExec: expr=[CAST(inc_col@1 AS Int64) as __common_expr_1, ts@0 as ts, inc_col@1 as inc_col]
 06)----------StreamingTableExec: partition_sizes=1, projection=[ts, inc_col], infinite_source=true, output_ordering=[ts@0 ASC NULLS LAST]
 
@@ -2984,8 +2984,8 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@5 as sum1, sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum2, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@6 as count1, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as count2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted]
 05)--------ProjectionExec: expr=[CAST(inc_col@1 AS Int64) as __common_expr_1, ts@0 as ts, inc_col@1 as inc_col]
 06)----------StreamingTableExec: partition_sizes=1, projection=[ts, inc_col], infinite_source=true, output_ordering=[ts@0 ASC NULLS LAST]
 
@@ -3084,12 +3084,12 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[a@1 as a, b@2 as b, c@3 as c, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@9 as sum1, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING@10 as sum2, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@15 as sum3, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING@16 as sum4, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@5 as sum5, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@6 as sum6, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@11 as sum7, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@12 as sum8, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@7 as sum9, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW@8 as sum10, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@13 as sum11, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING@14 as sum12]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING], mode=[Linear]
-04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING], mode=[PartiallySorted([1, 0])]
-05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
-06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING], mode=[PartiallySorted([0])]
-07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND CURRENT ROW], mode=[PartiallySorted([0, 1])]
-08)--------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING], mode=[Linear]
+04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING], mode=[PartiallySorted([1, 0])]
+05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING], mode=[PartiallySorted([0])]
+07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND CURRENT ROW], mode=[PartiallySorted([0, 1])]
+08)--------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 09)----------------ProjectionExec: expr=[CAST(c@2 AS Int64) as __common_expr_1, a@0 as a, b@1 as b, c@2 as c, d@3 as d]
 10)------------------StreamingTableExec: partition_sizes=1, projection=[a, b, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST]
 
@@ -3152,17 +3152,17 @@ logical_plan
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[c@2 ASC NULLS LAST], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[a@1 as a, b@2 as b, c@3 as c, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@9 as sum1, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING@10 as sum2, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@15 as sum3, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING@16 as sum4, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@5 as sum5, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@6 as sum6, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@11 as sum7, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@12 as sum8, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@7 as sum9, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW@8 as sum10, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@13 as sum11, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING@14 as sum12]
-03)----BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING], mode=[Sorted]
 04)------SortExec: expr=[d@4 ASC NULLS LAST, a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false]
-05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING], mode=[Sorted]
 06)----------SortExec: expr=[b@2 ASC NULLS LAST, a@1 ASC NULLS LAST, d@4 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false]
-07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 08)--------------SortExec: expr=[b@2 ASC NULLS LAST, a@1 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false]
-09)----------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING], mode=[Sorted]
+09)----------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING], mode=[Sorted]
 10)------------------SortExec: expr=[a@1 ASC NULLS LAST, d@4 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false]
-11)--------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND CURRENT ROW], mode=[Sorted]
+11)--------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND CURRENT ROW], mode=[Sorted]
 12)----------------------SortExec: expr=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, d@4 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false]
-13)------------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+13)------------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 14)--------------------------ProjectionExec: expr=[CAST(c@2 AS Int64) as __common_expr_1, a@0 as a, b@1 as b, c@2 as c, d@3 as d]
 15)----------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], file_type=csv, has_header=true
 
@@ -3226,7 +3226,7 @@ physical_plan
 01)ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as rn1]
 02)--CoalesceBatchesExec: target_batch_size=4096, fetch=5
 03)----FilterExec: row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 < 50
-04)------BoundedWindowAggExec: wdw=[row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------StreamingTableExec: partition_sizes=1, projection=[a0, a, b, c, d], infinite_source=true, output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST]
 
 # Top level sort is pushed down through BoundedWindowAggExec as its SUM result does already satisfy the required
@@ -3248,7 +3248,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as sum1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -3333,11 +3333,11 @@ logical_plan
 08)--------------TableScan: annotated_data_infinite2 projection=[a, b, c, d]
 physical_plan
 01)ProjectionExec: expr=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum1, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum2, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum3, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as sum4]
-02)--BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Linear]
+02)--BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Linear]
 03)----ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, a@1 as a, d@4 as d, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@7 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[PartiallySorted([0])]
-06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[PartiallySorted([0])]
+06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 07)------------ProjectionExec: expr=[CAST(a@0 AS Int64) as __common_expr_1, a@0 as a, b@1 as b, c@2 as c, d@3 as d]
 08)--------------StreamingTableExec: partition_sizes=1, projection=[a, b, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST]
 
@@ -3364,17 +3364,17 @@ logical_plan
 08)--------------TableScan: annotated_data_infinite2 projection=[a, b, c, d]
 physical_plan
 01)ProjectionExec: expr=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum1, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum2, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum3, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as sum4]
-02)--BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Linear]
+02)--BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Linear]
 03)----CoalesceBatchesExec: target_batch_size=4096
 04)------RepartitionExec: partitioning=Hash([d@2], 2), input_partitions=2, preserve_order=true, sort_exprs=__common_expr_1@0 ASC NULLS LAST, a@1 ASC NULLS LAST
 05)--------ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, a@1 as a, d@4 as d, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@7 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 07)------------CoalesceBatchesExec: target_batch_size=4096
 08)--------------RepartitionExec: partitioning=Hash([b@2, a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, __common_expr_1@0 ASC NULLS LAST
-09)----------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[PartiallySorted([0])]
+09)----------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[PartiallySorted([0])]
 10)------------------CoalesceBatchesExec: target_batch_size=4096
 11)--------------------RepartitionExec: partitioning=Hash([a@1, d@4], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, __common_expr_1@0 ASC NULLS LAST
-12)----------------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+12)----------------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 13)------------------------CoalesceBatchesExec: target_batch_size=4096
 14)--------------------------RepartitionExec: partitioning=Hash([a@1, b@2], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, __common_expr_1@0 ASC NULLS LAST
 15)----------------------------ProjectionExec: expr=[CAST(a@0 AS Int64) as __common_expr_1, a@0 as a, b@1 as b, c@2 as c, d@3 as d]
@@ -3433,10 +3433,10 @@ logical_plan
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[c3@0 ASC NULLS LAST], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[c3@0 as c3, max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as min1, min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 as max1]
-03)----BoundedWindowAggExec: wdw=[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Float64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c12@1 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------ProjectionExec: expr=[c3@0 as c3, c12@2 as c12, min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@3 as min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]
-06)----------WindowAggExec: wdw=[min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+06)----------WindowAggExec: wdw=[min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 07)------------SortExec: expr=[c11@1 ASC NULLS LAST], preserve_partitioning=[false]
 08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c3, c11, c12], file_type=csv, has_header=true
 
@@ -3477,7 +3477,7 @@ physical_plan
 01)ProjectionExec: expr=[min1@0 as min1, max1@1 as max1]
 02)--SortExec: TopK(fetch=5), expr=[c3@2 ASC NULLS LAST], preserve_partitioning=[false]
 03)----ProjectionExec: expr=[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as min1, min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as max1, c3@0 as c3]
-04)------BoundedWindowAggExec: wdw=[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Float64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Float64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------SortExec: expr=[c12@1 ASC NULLS LAST], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c3, c12], file_type=csv, has_header=true
 
@@ -3529,7 +3529,7 @@ logical_plan
 02)--Filter: multiple_ordered_table.b = Int32(0)
 03)----TableScan: multiple_ordered_table projection=[a0, a, b, c, d], partial_filters=[multiple_ordered_table.b = Int32(0)]
 physical_plan
-01)BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+01)BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 02)--CoalesceBatchesExec: target_batch_size=4096
 03)----FilterExec: b@2 = 0
 04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_orderings=[[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST], [c@3 ASC NULLS LAST]], file_type=csv, has_header=true
@@ -3547,7 +3547,7 @@ logical_plan
 02)--Filter: multiple_ordered_table.b = Int32(0)
 03)----TableScan: multiple_ordered_table projection=[a0, a, b, c, d], partial_filters=[multiple_ordered_table.b = Int32(0)]
 physical_plan
-01)BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST, multiple_ordered_table.d ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST, multiple_ordered_table.d ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+01)BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST, multiple_ordered_table.d ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST, multiple_ordered_table.d ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 02)--SortExec: expr=[d@4 ASC NULLS LAST], preserve_partitioning=[false]
 03)----CoalesceBatchesExec: target_batch_size=4096
 04)------FilterExec: b@2 = 0
@@ -3584,9 +3584,9 @@ logical_plan
 05)--------TableScan: multiple_ordered_table projection=[a, b, c, d]
 physical_plan
 01)ProjectionExec: expr=[min(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as min1, max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as max1]
-02)--BoundedWindowAggExec: wdw=[min(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "min(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[min(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "min(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 03)----ProjectionExec: expr=[c@2 as c, d@3 as d, max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-04)------BoundedWindowAggExec: wdw=[max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_orderings=[[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], [c@2 ASC NULLS LAST]], file_type=csv, has_header=true
 
 query TT
@@ -3603,7 +3603,7 @@ logical_plan
 04)------TableScan: multiple_ordered_table projection=[c, d], partial_filters=[multiple_ordered_table.d = Int32(0)]
 physical_plan
 01)ProjectionExec: expr=[max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as max_c]
-02)--BoundedWindowAggExec: wdw=[max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 03)----CoalesceBatchesExec: target_batch_size=4096
 04)------FilterExec: d@1 = 0
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c, d], output_ordering=[c@0 ASC NULLS LAST], file_type=csv, has_header=true
@@ -3618,7 +3618,7 @@ logical_plan
 03)----TableScan: multiple_ordered_table projection=[a, c, d]
 physical_plan
 01)ProjectionExec: expr=[sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-02)--BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c, d], output_orderings=[[a@0 ASC NULLS LAST], [c@1 ASC NULLS LAST]], file_type=csv, has_header=true
 
 query TT
@@ -3631,7 +3631,7 @@ logical_plan
 03)----TableScan: multiple_ordered_table projection=[a, b, c, d]
 physical_plan
 01)ProjectionExec: expr=[sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-02)--BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_orderings=[[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], [c@2 ASC NULLS LAST]], file_type=csv, has_header=true
 
 query I
@@ -3673,7 +3673,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c@0 as c, nth_value(multiple_ordered_table.c,Int64(2)) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as nv1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----WindowAggExec: wdw=[nth_value(multiple_ordered_table.c,Int64(2)) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "nth_value(multiple_ordered_table.c,Int64(2)) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int32(NULL)), is_causal: false }]
+03)----WindowAggExec: wdw=[nth_value(multiple_ordered_table.c,Int64(2)) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "nth_value(multiple_ordered_table.c,Int64(2)) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int32(NULL)), is_causal: false }]
 04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], output_ordering=[c@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 query II
@@ -3724,7 +3724,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [c@3 ASC NULLS LAST]
 02)--ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW@5 as avg_d]
-03)----BoundedWindowAggExec: wdw=[avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW: Field { name: "avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND CURRENT ROW], mode=[Linear]
+03)----BoundedWindowAggExec: wdw=[avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW: Field { "avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW": nullable Float64 }, frame: RANGE BETWEEN 10 PRECEDING AND CURRENT ROW], mode=[Linear]
 04)------CoalesceBatchesExec: target_batch_size=4096
 05)--------RepartitionExec: partitioning=Hash([d@4], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST
 06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
@@ -4059,7 +4059,7 @@ logical_plan
 03)----TableScan: table_with_pk projection=[sn, ts, currency, amount]
 physical_plan
 01)ProjectionExec: expr=[sn@0 as sn, ts@1 as ts, currency@2 as currency, amount@3 as amount, sum(table_with_pk.amount) ORDER BY [table_with_pk.sn ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum1]
-02)--BoundedWindowAggExec: wdw=[sum(table_with_pk.amount) ORDER BY [table_with_pk.sn ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(table_with_pk.amount) ORDER BY [table_with_pk.sn ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[sum(table_with_pk.amount) ORDER BY [table_with_pk.sn ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(table_with_pk.amount) ORDER BY [table_with_pk.sn ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Float64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 03)----SortExec: expr=[sn@0 ASC NULLS LAST], preserve_partitioning=[false]
 04)------DataSourceExec: partitions=1, partition_sizes=[1]
 
@@ -4178,9 +4178,9 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c3@0 as c3, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum2, sum1@3 as sum1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----WindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int16(NULL)), is_causal: false }]
+03)----WindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int16(NULL)), is_causal: false }]
 04)------ProjectionExec: expr=[c3@0 as c3, c4@1 as c4, c9@2 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum1]
-05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 06)----------SortExec: expr=[c3@0 + c4@1 DESC], preserve_partitioning=[false]
 07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c3, c4, c9], file_type=csv, has_header=true
 
@@ -4219,7 +4219,7 @@ logical_plan
 04)------TableScan: a projection=[a]
 physical_plan
 01)ProjectionExec: expr=[count(Int64(1)) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as count(*) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-02)--BoundedWindowAggExec: wdw=[count(Int64(1)) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "count(Int64(1)) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[count(Int64(1)) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "count(Int64(1)) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 03)----CoalesceBatchesExec: target_batch_size=4096
 04)------RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
 05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
@@ -4242,7 +4242,7 @@ logical_plan
 04)------TableScan: a projection=[a]
 physical_plan
 01)ProjectionExec: expr=[row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]
-02)--BoundedWindowAggExec: wdw=[row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 03)----CoalesceBatchesExec: target_batch_size=4096
 04)------RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
 05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
@@ -5311,7 +5311,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, rank@2 ASC NULLS LAST]
 02)--ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank]
-03)----BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=1
 06)----------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
@@ -5355,7 +5355,7 @@ physical_plan
 02)--ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank]
 03)----CoalesceBatchesExec: target_batch_size=1
 04)------FilterExec: c2@1 >= 10
-05)--------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=1
 08)--------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
@@ -5397,7 +5397,7 @@ physical_plan
 02)--ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank]
 03)----CoalesceBatchesExec: target_batch_size=1
 04)------FilterExec: c2@1 = 10
-05)--------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 06)----------SortExec: expr=[c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=1
 08)--------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
@@ -5438,7 +5438,7 @@ physical_plan
 02)--ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank]
 03)----CoalesceBatchesExec: target_batch_size=1
 04)------FilterExec: c1@0 = 1 OR c2@1 = 10
-05)--------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=1
 08)--------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
@@ -5481,11 +5481,11 @@ physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, rank1@2 ASC NULLS LAST, rank2@3 ASC NULLS LAST]
 02)--SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, rank1@2 ASC NULLS LAST, rank2@3 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank1, rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as rank2]
-04)------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------SortExec: expr=[c2@1 ASC NULLS LAST, c1@0 ASC NULLS LAST], preserve_partitioning=[true]
 06)----------CoalesceBatchesExec: target_batch_size=1
 07)------------RepartitionExec: partitioning=Hash([c2@1, c1@0], 2), input_partitions=2
-08)--------------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+08)--------------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 09)----------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 10)------------------CoalesceBatchesExec: target_batch_size=1
 11)--------------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
@@ -5532,13 +5532,13 @@ physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, rank1@2 ASC NULLS LAST, rank2@3 ASC NULLS LAST]
 02)--SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, rank1@2 ASC NULLS LAST, rank2@3 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank1, rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as rank2]
-04)------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------SortExec: expr=[c2@1 ASC NULLS LAST, c1@0 ASC NULLS LAST], preserve_partitioning=[true]
 06)----------CoalesceBatchesExec: target_batch_size=1
 07)------------RepartitionExec: partitioning=Hash([c2@1, c1@0], 2), input_partitions=2
 08)--------------CoalesceBatchesExec: target_batch_size=1
 09)----------------FilterExec: c2@1 > 1
-10)------------------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+10)------------------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 11)--------------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 12)----------------------CoalesceBatchesExec: target_batch_size=1
 13)------------------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
@@ -5599,7 +5599,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, sum_c9@1 ASC NULLS LAST]
 02)--ProjectionExec: expr=[c1@0 as c1, sum(aggregate_test_100_ordered.c9) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 as sum_c9]
-03)----WindowAggExec: wdw=[sum(aggregate_test_100_ordered.c9) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100_ordered.c9) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+03)----WindowAggExec: wdw=[sum(aggregate_test_100_ordered.c9) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100_ordered.c9) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 04)------CoalesceBatchesExec: target_batch_size=1
 05)--------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2, preserve_order=true, sort_exprs=c1@0 ASC NULLS LAST
 06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
@@ -5615,7 +5615,7 @@ logical_plan
 04)------TableScan: aggregate_test_100_ordered projection=[c9]
 physical_plan
 01)ProjectionExec: expr=[sum(aggregate_test_100_ordered.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as sum_c9]
-02)--WindowAggExec: wdw=[sum(aggregate_test_100_ordered.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100_ordered.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+02)--WindowAggExec: wdw=[sum(aggregate_test_100_ordered.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100_ordered.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
 
@@ -5630,7 +5630,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, min_c5@1 DESC NULLS LAST]
 02)--ProjectionExec: expr=[c1@0 as c1, min(aggregate_test_100_ordered.c5) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 as min_c5]
-03)----WindowAggExec: wdw=[min(aggregate_test_100_ordered.c5) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "min(aggregate_test_100_ordered.c5) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+03)----WindowAggExec: wdw=[min(aggregate_test_100_ordered.c5) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "min(aggregate_test_100_ordered.c5) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 04)------CoalesceBatchesExec: target_batch_size=1
 05)--------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2, preserve_order=true, sort_exprs=c1@0 ASC NULLS LAST
 06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
@@ -5646,7 +5646,7 @@ logical_plan
 04)------TableScan: aggregate_test_100_ordered projection=[c5]
 physical_plan
 01)ProjectionExec: expr=[max(aggregate_test_100_ordered.c5) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as max_c5]
-02)--WindowAggExec: wdw=[max(aggregate_test_100_ordered.c5) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "max(aggregate_test_100_ordered.c5) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+02)--WindowAggExec: wdw=[max(aggregate_test_100_ordered.c5) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "max(aggregate_test_100_ordered.c5) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c5], file_type=csv, has_header=true
 
 query II rowsort
@@ -5829,7 +5829,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [k@0 ASC NULLS LAST, time@1 ASC NULLS LAST]
 02)--ProjectionExec: expr=[k@0 as k, time@2 as time, count(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW@3 as normal_count, count(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW@4 as distinct_count]
-03)----BoundedWindowAggExec: wdw=[count(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { name: "count(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW, count(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { name: "count(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[count(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { "count(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW": Int64 }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW, count(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { "count(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW": Int64 }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[k@0 ASC NULLS LAST, time@2 ASC NULLS LAST], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=1
 06)----------RepartitionExec: partitioning=Hash([k@0], 2), input_partitions=2
@@ -5892,7 +5892,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [k@0 ASC NULLS LAST, time@1 ASC NULLS LAST]
 02)--ProjectionExec: expr=[k@1 as k, time@2 as time, sum(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW@3 as sum_v, sum(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW@4 as sum_distinct_v]
-03)----BoundedWindowAggExec: wdw=[sum(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { name: "sum(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW, sum(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { name: "sum(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { "sum(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW, sum(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { "sum(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[k@1 ASC NULLS LAST, time@2 ASC NULLS LAST], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=1
 06)----------RepartitionExec: partitioning=Hash([k@1], 2), input_partitions=2
@@ -5937,7 +5937,7 @@ LIMIT 5
 ----
 DataFusion error: type_coercion
 caused by
-Error during planning: Cannot infer common argument type for comparison operation Int64 >= List(Field { name: "item", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+Error during planning: Cannot infer common argument type for comparison operation Int64 >= List(nullable Null)
 
 
 
@@ -5965,7 +5965,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c1@2 as c1, c2@3 as c2, sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum1, sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum2, count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as count1, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@7 as array_agg1, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@8 as array_agg2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable List(nullable Int64) }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable List(nullable Int64) }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortPreservingMergeExec: [c1@2 ASC NULLS LAST, c2@3 ASC NULLS LAST], fetch=5
 05)--------SortExec: TopK(fetch=5), expr=[c1@2 ASC NULLS LAST, c2@3 ASC NULLS LAST], preserve_partitioning=[true]
 06)----------ProjectionExec: expr=[__common_expr_3@0 as __common_expr_1, __common_expr_3@0 AND c2@2 < 4 AND c1@1 > 0 as __common_expr_2, c1@1 as c1, c2@2 as c2]
diff --git a/datafusion/sqllogictest/test_files/window_limits.slt b/datafusion/sqllogictest/test_files/window_limits.slt
index c1e680084f4b..883cd4404f4f 100644
--- a/datafusion/sqllogictest/test_files/window_limits.slt
+++ b/datafusion/sqllogictest/test_files/window_limits.slt
@@ -71,7 +71,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[lead(employees.empno) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as lead(employees.empno) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
 02)--GlobalLimitExec: skip=0, fetch=3
-03)----BoundedWindowAggExec: wdw=[lead(employees.empno) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(employees.empno) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[lead(employees.empno) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "lead(employees.empno) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=4), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true
 
@@ -108,7 +108,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[lead(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as lead(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
 02)--GlobalLimitExec: skip=0, fetch=3
-03)----BoundedWindowAggExec: wdw=[lead(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[lead(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "lead(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=5), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true
 
@@ -170,7 +170,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[empno@0 as empno, lead(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as lead1, lead(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as lead3, lead(employees.salary,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as lead5]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[lead(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, lead(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, lead(employees.salary,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(employees.salary,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[lead(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "lead(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, lead(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "lead(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, lead(employees.salary,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "lead(employees.salary,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=10), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno, salary], file_type=csv, has_header=true
 
@@ -207,7 +207,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[nth_value(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as nth_value(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
 02)--GlobalLimitExec: skip=0, fetch=3
-03)----BoundedWindowAggExec: wdw=[nth_value(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "nth_value(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[nth_value(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "nth_value(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=3), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true
 
@@ -244,7 +244,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[nth_value(employees.empno,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as nth_value(employees.empno,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
 02)--GlobalLimitExec: skip=0, fetch=3
-03)----BoundedWindowAggExec: wdw=[nth_value(employees.empno,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "nth_value(employees.empno,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[nth_value(employees.empno,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "nth_value(employees.empno,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=3), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true
 
@@ -309,7 +309,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[depname@0 as depname, empno@1 as empno, sum(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as running_sum, avg(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as running_avg, min(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as running_min, max(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as running_max]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, avg(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "avg(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "min(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, max(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, avg(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "avg(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Float64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "min(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, max(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "max(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=5), expr=[empno@1 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[depname, empno, salary], file_type=csv, has_header=true
 
@@ -371,7 +371,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[empno@0 as empno, row_number() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn, rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rnk, dense_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as drnk]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, dense_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "dense_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, dense_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "dense_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=5), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true
 
@@ -433,7 +433,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[empno@0 as empno, percent_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as pr, cume_dist() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as cd, ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as nt]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----WindowAggExec: wdw=[percent_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "percent_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }, cume_dist() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "cume_dist() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }, ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }]
+03)----WindowAggExec: wdw=[percent_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "percent_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }, cume_dist() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "cume_dist() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }, ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }]
 04)------SortExec: expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true
 
@@ -498,7 +498,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[empno@0 as empno, first_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as fv, lag(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as l1, last_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as lv, nth_value(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as n3]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[first_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "first_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, lag(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lag(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, last_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "last_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, nth_value(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "nth_value(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[first_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "first_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, lag(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "lag(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, last_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "last_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, nth_value(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "nth_value(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=5), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno, salary], file_type=csv, has_header=true
 
@@ -541,7 +541,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [depname@0 ASC NULLS LAST], fetch=5
 02)--ProjectionExec: expr=[depname@0 as depname, empno@1 as empno, salary@2 as salary, sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW@3 as running_sum]
-03)----BoundedWindowAggExec: wdw=[sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW: Field { name: "sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW: Field { "sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[depname@0 ASC NULLS LAST, empno@1 ASC NULLS LAST], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=8192
 06)----------RepartitionExec: partitioning=Hash([depname@0], 4), input_partitions=4
@@ -587,7 +587,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [depname@0 ASC NULLS LAST], fetch=5
 02)--ProjectionExec: expr=[depname@0 as depname, empno@1 as empno, salary@2 as salary, sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW@3 as running_sum]
-03)----BoundedWindowAggExec: wdw=[sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW: Field { name: "sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW: Field { "sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=5), expr=[depname@0 ASC NULLS LAST, empno@1 ASC NULLS LAST], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=8192
 06)----------RepartitionExec: partitioning=Hash([depname@0], 4), input_partitions=4
@@ -764,6 +764,6 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[empno@0 as empno, salary@1 as salary, lead(employees.salary,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as lead2]
 02)--GlobalLimitExec: skip=0, fetch=3
-03)----BoundedWindowAggExec: wdw=[lead(employees.salary,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(employees.salary,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[lead(employees.salary,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "lead(employees.salary,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=5), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno, salary], file_type=csv, has_header=true
diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml
index 16bb5cff4ad7..8417bd56852f 100644
--- a/datafusion/substrait/Cargo.toml
+++ b/datafusion/substrait/Cargo.toml
@@ -35,11 +35,12 @@ async-recursion = "1.0"
 async-trait = { workspace = true }
 chrono = { workspace = true }
 datafusion = { workspace = true, features = ["sql"] }
+half = { workspace = true }
 itertools = { workspace = true }
 object_store = { workspace = true }
 pbjson-types = { workspace = true }
 prost = { workspace = true }
-substrait = { version = "0.58", features = ["serde"] }
+substrait = { version = "0.59", features = ["serde"] }
 url = { workspace = true }
 tokio = { workspace = true, features = ["fs"] }
 uuid = { version = "1.17.0", features = ["v4"] }
diff --git a/datafusion/substrait/src/logical_plan/consumer/expr/literal.rs b/datafusion/substrait/src/logical_plan/consumer/expr/literal.rs
index dc7a5935c014..eb3d345dc001 100644
--- a/datafusion/substrait/src/logical_plan/consumer/expr/literal.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/expr/literal.rs
@@ -18,6 +18,7 @@
 use crate::logical_plan::consumer::types::from_substrait_type;
 use crate::logical_plan::consumer::utils::{next_struct_field_name, DEFAULT_TIMEZONE};
 use crate::logical_plan::consumer::SubstraitConsumer;
+use crate::variation_const::FLOAT_16_TYPE_NAME;
 #[allow(deprecated)]
 use crate::variation_const::{
     DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
@@ -38,6 +39,7 @@ use datafusion::common::{
     not_impl_err, plan_err, substrait_datafusion_err, substrait_err, ScalarValue,
 };
 use datafusion::logical_expr::Expr;
+use prost::Message;
 use std::sync::Arc;
 use substrait::proto;
 use substrait::proto::expression::literal::user_defined::Val;
@@ -440,8 +442,6 @@ pub(crate) fn from_substrait_literal(
                 return Ok(value);
             }
 
-            // TODO: remove the code below once the producer has been updated
-
             // Helper function to prevent duplicating this code - can be inlined once the non-extension path is removed
             let interval_month_day_nano =
                 |user_defined: &proto::expression::literal::UserDefined| -> datafusion::common::Result<ScalarValue> {
@@ -474,6 +474,36 @@ pub(crate) fn from_substrait_literal(
                 .get(&user_defined.type_reference)
             {
                 match name.as_ref() {
+                    FLOAT_16_TYPE_NAME => {
+                        // Rules for encoding fp16 Substrait literals are defined as part of Arrow here:
+                        //
+                        // https://github.com/apache/arrow/blame/bab558061696ddc1841148d6210424b12923d48e/format/substrait/extension_types.yaml#L112
+
+                        let Some(value) = user_defined.val.as_ref() else {
+                            return substrait_err!("Float16 value is empty");
+                        };
+                        let Val::Value(value_any) = value else {
+                            return substrait_err!(
+                                "Float16 value is not a value type literal"
+                            );
+                        };
+                        if value_any.type_url != "google.protobuf.UInt32Value" {
+                            return substrait_err!(
+                                "Float16 value is not a google.protobuf.UInt32Value"
+                            );
+                        }
+                        let decoded_value =
+                            pbjson_types::UInt32Value::decode(value_any.value.clone())
+                                .map_err(|err| {
+                                    substrait_datafusion_err!(
+                                        "Failed to decode float16 value: {err}"
+                                    )
+                                })?;
+                        let u32_bytes = decoded_value.value.to_le_bytes();
+                        let f16_val =
+                            half::f16::from_le_bytes(u32_bytes[0..2].try_into().unwrap());
+                        return Ok(ScalarValue::Float16(Some(f16_val)));
+                    }
                     // Kept for backwards compatibility - producers should use IntervalCompound instead
                     #[allow(deprecated)]
                     INTERVAL_MONTH_DAY_NANO_TYPE_NAME => {
diff --git a/datafusion/substrait/src/logical_plan/consumer/rel/aggregate_rel.rs b/datafusion/substrait/src/logical_plan/consumer/rel/aggregate_rel.rs
index c919bd038936..fce074cd5198 100644
--- a/datafusion/substrait/src/logical_plan/consumer/rel/aggregate_rel.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/rel/aggregate_rel.rs
@@ -40,6 +40,7 @@ pub async fn from_aggregate_rel(
         let mut aggr_exprs = vec![];
 
         match agg.groupings.len() {
+            0 => {}
             1 => {
                 group_exprs.extend_from_slice(
                     &from_substrait_grouping(
diff --git a/datafusion/substrait/src/logical_plan/consumer/types.rs b/datafusion/substrait/src/logical_plan/consumer/types.rs
index 80300af24ac4..772ea7177ca2 100644
--- a/datafusion/substrait/src/logical_plan/consumer/types.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/types.rs
@@ -17,6 +17,7 @@
 
 use super::utils::{from_substrait_precision, next_struct_field_name, DEFAULT_TIMEZONE};
 use super::SubstraitConsumer;
+use crate::variation_const::FLOAT_16_TYPE_NAME;
 #[allow(deprecated)]
 use crate::variation_const::{
     DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
@@ -251,6 +252,7 @@ pub fn from_substrait_type(
                     match name.as_ref() {
                         // Kept for backwards compatibility, producers should use IntervalCompound instead
                         INTERVAL_MONTH_DAY_NANO_TYPE_NAME => Ok(DataType::Interval(IntervalUnit::MonthDayNano)),
+                        FLOAT_16_TYPE_NAME => Ok(DataType::Float16),
                         _ => not_impl_err!(
                                 "Unsupported Substrait user defined type with ref {} and variation {}",
                                 u.type_reference,
@@ -304,7 +306,7 @@ pub fn from_substrait_named_struct(
         })?,
         &base_schema.names,
         &mut name_idx,
-    );
+    )?;
     if name_idx != base_schema.names.len() {
         return substrait_err!(
             "Names list must match exactly to nested schema, but found {} uses for {} names",
@@ -312,7 +314,7 @@ pub fn from_substrait_named_struct(
             base_schema.names.len()
         );
     }
-    DFSchema::try_from(Schema::new(fields?))
+    DFSchema::try_from(Schema::new(fields))
 }
 
 fn from_substrait_struct_type(
diff --git a/datafusion/substrait/src/logical_plan/producer/expr/cast.rs b/datafusion/substrait/src/logical_plan/producer/expr/cast.rs
index 9741dcdd1095..71c2140bac8b 100644
--- a/datafusion/substrait/src/logical_plan/producer/expr/cast.rs
+++ b/datafusion/substrait/src/logical_plan/producer/expr/cast.rs
@@ -40,7 +40,7 @@ pub fn from_cast(
                 nullable: true,
                 type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
                 literal_type: Some(LiteralType::Null(to_substrait_type(
-                    data_type, true,
+                    producer, data_type, true,
                 )?)),
             };
             return Ok(Expression {
@@ -51,7 +51,7 @@ pub fn from_cast(
     Ok(Expression {
         rex_type: Some(RexType::Cast(Box::new(
             substrait::proto::expression::Cast {
-                r#type: Some(to_substrait_type(data_type, true)?),
+                r#type: Some(to_substrait_type(producer, data_type, true)?),
                 input: Some(Box::new(producer.handle_expr(expr, schema)?)),
                 failure_behavior: FailureBehavior::ThrowException.into(),
             },
@@ -68,7 +68,7 @@ pub fn from_try_cast(
     Ok(Expression {
         rex_type: Some(RexType::Cast(Box::new(
             substrait::proto::expression::Cast {
-                r#type: Some(to_substrait_type(data_type, true)?),
+                r#type: Some(to_substrait_type(producer, data_type, true)?),
                 input: Some(Box::new(producer.handle_expr(expr, schema)?)),
                 failure_behavior: FailureBehavior::ReturnNull.into(),
             },
@@ -79,7 +79,9 @@ pub fn from_try_cast(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::logical_plan::producer::to_substrait_extended_expr;
+    use crate::logical_plan::producer::{
+        to_substrait_extended_expr, DefaultSubstraitProducer,
+    };
     use datafusion::arrow::datatypes::{DataType, Field};
     use datafusion::common::DFSchema;
     use datafusion::execution::SessionStateBuilder;
@@ -92,6 +94,8 @@ mod tests {
         let empty_schema = DFSchemaRef::new(DFSchema::empty());
         let field = Field::new("out", DataType::Int32, false);
 
+        let mut producer = DefaultSubstraitProducer::new(&state);
+
         let expr = Expr::Literal(ScalarValue::Null, None)
             .cast_to(&DataType::Int32, &empty_schema)
             .unwrap();
@@ -107,7 +111,7 @@ mod tests {
                 nullable: true,
                 type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
                 literal_type: Some(LiteralType::Null(
-                    to_substrait_type(&DataType::Int32, true).unwrap(),
+                    to_substrait_type(&mut producer, &DataType::Int32, true).unwrap(),
                 )),
             };
             let expected = Expression {
@@ -131,13 +135,16 @@ mod tests {
             typed_null.referred_expr[0].expr_type.as_ref().unwrap()
         {
             let cast_expr = substrait::proto::expression::Cast {
-                r#type: Some(to_substrait_type(&DataType::Int32, true).unwrap()),
+                r#type: Some(
+                    to_substrait_type(&mut producer, &DataType::Int32, true).unwrap(),
+                ),
                 input: Some(Box::new(Expression {
                     rex_type: Some(RexType::Literal(Literal {
                         nullable: true,
                         type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
                         literal_type: Some(LiteralType::Null(
-                            to_substrait_type(&DataType::Int64, true).unwrap(),
+                            to_substrait_type(&mut producer, &DataType::Int64, true)
+                                .unwrap(),
                         )),
                     })),
                 })),
diff --git a/datafusion/substrait/src/logical_plan/producer/expr/literal.rs b/datafusion/substrait/src/logical_plan/producer/expr/literal.rs
index 2c66e9f6b03c..1bb24168e57a 100644
--- a/datafusion/substrait/src/logical_plan/producer/expr/literal.rs
+++ b/datafusion/substrait/src/logical_plan/producer/expr/literal.rs
@@ -18,7 +18,7 @@
 use crate::logical_plan::producer::{to_substrait_type, SubstraitProducer};
 use crate::variation_const::{
     DATE_32_TYPE_VARIATION_REF, DECIMAL_128_TYPE_VARIATION_REF,
-    DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
+    DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF, FLOAT_16_TYPE_NAME,
     LARGE_CONTAINER_TYPE_VARIATION_REF, TIME_32_TYPE_VARIATION_REF,
     TIME_64_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
     VIEW_CONTAINER_TYPE_VARIATION_REF,
@@ -61,6 +61,7 @@ pub(crate) fn to_substrait_literal(
             nullable: true,
             type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
             literal_type: Some(LiteralType::Null(to_substrait_type(
+                producer,
                 &value.data_type(),
                 true,
             )?)),
@@ -94,6 +95,41 @@ pub(crate) fn to_substrait_literal(
             LiteralType::I64(*n as i64),
             UNSIGNED_INTEGER_TYPE_VARIATION_REF,
         ),
+        ScalarValue::Float16(Some(f)) => {
+            // Rules for encoding fp16 Substrait literals are defined as part of Arrow here:
+            //
+            // https://github.com/apache/arrow/blame/bab558061696ddc1841148d6210424b12923d48e/format/substrait/extension_types.yaml#L112
+            //
+            // fp16 literals are encoded as user defined literals with
+            // a google.protobuf.UInt32Value message where the lower 16 bits are
+            // the fp16 value.
+            let type_anchor = producer.register_type(FLOAT_16_TYPE_NAME.to_string());
+
+            // The spec says "lower 16 bits" but neglects to mention the endianness.
+            // Let's just use little-endian for now.
+            //
+            // See https://github.com/apache/arrow/issues/47846
+            let f_bytes = f.to_le_bytes();
+            let value = u32::from_le_bytes([f_bytes[0], f_bytes[1], 0, 0]);
+
+            let value = pbjson_types::UInt32Value { value };
+            let encoded_value = prost::Message::encode_to_vec(&value);
+            (
+                LiteralType::UserDefined(
+                    substrait::proto::expression::literal::UserDefined {
+                        type_reference: type_anchor,
+                        type_parameters: vec![],
+                        val: Some(substrait::proto::expression::literal::user_defined::Val::Value(
+                            pbjson_types::Any {
+                                type_url: "google.protobuf.UInt32Value".to_string(),
+                                value: encoded_value.into(),
+                            },
+                        )),
+                    },
+                ),
+                DEFAULT_TYPE_VARIATION_REF,
+            )
+        }
         ScalarValue::Float32(Some(f)) => {
             (LiteralType::Fp32(*f), DEFAULT_TYPE_VARIATION_REF)
         }
@@ -241,7 +277,7 @@ pub(crate) fn to_substrait_literal(
         ),
         ScalarValue::Map(m) => {
             let map = if m.is_empty() || m.value(0).is_empty() {
-                let mt = to_substrait_type(m.data_type(), m.is_nullable())?;
+                let mt = to_substrait_type(producer, m.data_type(), m.is_nullable())?;
                 let mt = match mt {
                     substrait::proto::Type {
                         kind: Some(r#type::Kind::Map(mt)),
@@ -354,12 +390,13 @@ fn convert_array_to_literal_list<T: OffsetSizeTrait>(
         .collect::<datafusion::common::Result<Vec<_>>>()?;
 
     if values.is_empty() {
-        let lt = match to_substrait_type(array.data_type(), array.is_nullable())? {
-            substrait::proto::Type {
-                kind: Some(r#type::Kind::List(lt)),
-            } => lt.as_ref().to_owned(),
-            _ => unreachable!(),
-        };
+        let lt =
+            match to_substrait_type(producer, array.data_type(), array.is_nullable())? {
+                substrait::proto::Type {
+                    kind: Some(r#type::Kind::List(lt)),
+                } => lt.as_ref().to_owned(),
+                _ => unreachable!(),
+            };
         Ok(LiteralType::EmptyList(lt))
     } else {
         Ok(LiteralType::List(List { values }))
diff --git a/datafusion/substrait/src/logical_plan/producer/expr/mod.rs b/datafusion/substrait/src/logical_plan/producer/expr/mod.rs
index 42e1f962f1d1..d37694ccea05 100644
--- a/datafusion/substrait/src/logical_plan/producer/expr/mod.rs
+++ b/datafusion/substrait/src/logical_plan/producer/expr/mod.rs
@@ -78,7 +78,7 @@ pub fn to_substrait_extended_expr(
             })
         })
         .collect::<datafusion::common::Result<Vec<_>>>()?;
-    let substrait_schema = to_substrait_named_struct(schema)?;
+    let substrait_schema = to_substrait_named_struct(&mut producer, schema)?;
 
     let extensions = producer.get_extensions();
     Ok(Box::new(ExtendedExpression {
diff --git a/datafusion/substrait/src/logical_plan/producer/plan.rs b/datafusion/substrait/src/logical_plan/producer/plan.rs
index 7d5b7754122d..28f6acd0890c 100644
--- a/datafusion/substrait/src/logical_plan/producer/plan.rs
+++ b/datafusion/substrait/src/logical_plan/producer/plan.rs
@@ -36,7 +36,7 @@ pub fn to_substrait_plan(
     let plan_rels = vec![PlanRel {
         rel_type: Some(plan_rel::RelType::Root(RelRoot {
             input: Some(*producer.handle_plan(plan)?),
-            names: to_substrait_named_struct(plan.schema())?.names,
+            names: to_substrait_named_struct(&mut producer, plan.schema())?.names,
         })),
     }];
 
diff --git a/datafusion/substrait/src/logical_plan/producer/rel/read_rel.rs b/datafusion/substrait/src/logical_plan/producer/rel/read_rel.rs
index 212874e7913b..4b2e3782108b 100644
--- a/datafusion/substrait/src/logical_plan/producer/rel/read_rel.rs
+++ b/datafusion/substrait/src/logical_plan/producer/rel/read_rel.rs
@@ -48,7 +48,7 @@ pub fn from_table_scan(
     });
 
     let table_schema = scan.source.schema().to_dfschema_ref()?;
-    let base_schema = to_substrait_named_struct(&table_schema)?;
+    let base_schema = to_substrait_named_struct(producer, &table_schema)?;
 
     let filter_option = if scan.filters.is_empty() {
         None
@@ -83,7 +83,10 @@ pub fn from_table_scan(
     }))
 }
 
-pub fn from_empty_relation(e: &EmptyRelation) -> datafusion::common::Result<Box<Rel>> {
+pub fn from_empty_relation(
+    producer: &mut impl SubstraitProducer,
+    e: &EmptyRelation,
+) -> datafusion::common::Result<Box<Rel>> {
     if e.produce_one_row {
         return not_impl_err!("Producing a row from empty relation is unsupported");
     }
@@ -91,7 +94,7 @@ pub fn from_empty_relation(e: &EmptyRelation) -> datafusion::common::Result<Box<
     Ok(Box::new(Rel {
         rel_type: Some(RelType::Read(Box::new(ReadRel {
             common: None,
-            base_schema: Some(to_substrait_named_struct(&e.schema)?),
+            base_schema: Some(to_substrait_named_struct(producer, &e.schema)?),
             filter: None,
             best_effort_filter: None,
             projection: None,
@@ -135,7 +138,7 @@ pub fn from_values(
     Ok(Box::new(Rel {
         rel_type: Some(RelType::Read(Box::new(ReadRel {
             common: None,
-            base_schema: Some(to_substrait_named_struct(&v.schema)?),
+            base_schema: Some(to_substrait_named_struct(producer, &v.schema)?),
             filter: None,
             best_effort_filter: None,
             projection: None,
diff --git a/datafusion/substrait/src/logical_plan/producer/substrait_producer.rs b/datafusion/substrait/src/logical_plan/producer/substrait_producer.rs
index 56edfac5769c..db08e0f7bfd0 100644
--- a/datafusion/substrait/src/logical_plan/producer/substrait_producer.rs
+++ b/datafusion/substrait/src/logical_plan/producer/substrait_producer.rs
@@ -70,6 +70,10 @@ use substrait::proto::{
 ///        self.extensions.register_function(signature)
 ///     }
 ///
+///     fn register_type(&mut self, type_name: String) -> u32 {
+///         self.extensions.register_type(type_name)
+///     }
+///
 ///     fn get_extensions(self) -> Extensions {
 ///         self.extensions
 ///     }
@@ -114,6 +118,15 @@ pub trait SubstraitProducer: Send + Sync + Sized {
     /// there is one. Otherwise, it should generate a new anchor.
     fn register_function(&mut self, signature: String) -> u32;
 
+    /// Within a Substrait plan, user defined types are referenced using type anchors that are stored at
+    /// the top level of the [Plan](substrait::proto::Plan) within
+    /// [ExtensionType](substrait::proto::extensions::simple_extension_declaration::ExtensionType)
+    /// messages.
+    ///
+    /// When given a type name, this method should return the existing anchor for it if
+    /// there is one. Otherwise, it should generate a new anchor.
+    fn register_type(&mut self, name: String) -> u32;
+
     /// Consume the producer to generate the [Extensions] for the Substrait plan based on the
     /// functions that have been registered
     fn get_extensions(self) -> Extensions;
@@ -182,7 +195,7 @@ pub trait SubstraitProducer: Send + Sync + Sized {
         &mut self,
         plan: &EmptyRelation,
     ) -> datafusion::common::Result<Box<Rel>> {
-        from_empty_relation(plan)
+        from_empty_relation(self, plan)
     }
 
     fn handle_subquery_alias(
@@ -367,6 +380,10 @@ impl SubstraitProducer for DefaultSubstraitProducer<'_> {
         self.extensions.register_function(fn_name)
     }
 
+    fn register_type(&mut self, type_name: String) -> u32 {
+        self.extensions.register_type(type_name)
+    }
+
     fn get_extensions(self) -> Extensions {
         self.extensions
     }
diff --git a/datafusion/substrait/src/logical_plan/producer/types.rs b/datafusion/substrait/src/logical_plan/producer/types.rs
index 3da9269c5b9e..2079d7fd34bb 100644
--- a/datafusion/substrait/src/logical_plan/producer/types.rs
+++ b/datafusion/substrait/src/logical_plan/producer/types.rs
@@ -15,15 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::logical_plan::producer::to_substrait_precision;
 use crate::logical_plan::producer::utils::flatten_names;
+use crate::logical_plan::producer::{to_substrait_precision, SubstraitProducer};
 use crate::variation_const::{
     DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
     DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
     DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF,
     DEFAULT_MAP_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
     DICTIONARY_MAP_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF,
-    LARGE_CONTAINER_TYPE_VARIATION_REF, TIME_32_TYPE_VARIATION_REF,
+    FLOAT_16_TYPE_NAME, LARGE_CONTAINER_TYPE_VARIATION_REF, TIME_32_TYPE_VARIATION_REF,
     TIME_64_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
     VIEW_CONTAINER_TYPE_VARIATION_REF,
 };
@@ -32,6 +32,7 @@ use datafusion::common::{internal_err, not_impl_err, plan_err, DFSchemaRef};
 use substrait::proto::{r#type, NamedStruct};
 
 pub(crate) fn to_substrait_type(
+    producer: &mut impl SubstraitProducer,
     dt: &DataType,
     nullable: bool,
 ) -> datafusion::common::Result<substrait::proto::Type> {
@@ -96,7 +97,17 @@ pub(crate) fn to_substrait_type(
                 nullability,
             })),
         }),
-        // Float16 is not supported in Substrait
+        DataType::Float16 => {
+            let type_anchor = producer.register_type(FLOAT_16_TYPE_NAME.to_string());
+            Ok(substrait::proto::Type {
+                kind: Some(r#type::Kind::UserDefined(r#type::UserDefined {
+                    type_reference: type_anchor,
+                    type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
+                    nullability,
+                    type_parameters: vec![],
+                })),
+            })
+        }
         DataType::Float32 => Ok(substrait::proto::Type {
             kind: Some(r#type::Kind::Fp32(r#type::Fp32 {
                 type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
@@ -244,7 +255,8 @@ pub(crate) fn to_substrait_type(
             })),
         }),
         DataType::List(inner) => {
-            let inner_type = to_substrait_type(inner.data_type(), inner.is_nullable())?;
+            let inner_type =
+                to_substrait_type(producer, inner.data_type(), inner.is_nullable())?;
             Ok(substrait::proto::Type {
                 kind: Some(r#type::Kind::List(Box::new(r#type::List {
                     r#type: Some(Box::new(inner_type)),
@@ -254,7 +266,8 @@ pub(crate) fn to_substrait_type(
             })
         }
         DataType::LargeList(inner) => {
-            let inner_type = to_substrait_type(inner.data_type(), inner.is_nullable())?;
+            let inner_type =
+                to_substrait_type(producer, inner.data_type(), inner.is_nullable())?;
             Ok(substrait::proto::Type {
                 kind: Some(r#type::Kind::List(Box::new(r#type::List {
                     r#type: Some(Box::new(inner_type)),
@@ -266,10 +279,12 @@ pub(crate) fn to_substrait_type(
         DataType::Map(inner, _) => match inner.data_type() {
             DataType::Struct(key_and_value) if key_and_value.len() == 2 => {
                 let key_type = to_substrait_type(
+                    producer,
                     key_and_value[0].data_type(),
                     key_and_value[0].is_nullable(),
                 )?;
                 let value_type = to_substrait_type(
+                    producer,
                     key_and_value[1].data_type(),
                     key_and_value[1].is_nullable(),
                 )?;
@@ -285,8 +300,8 @@ pub(crate) fn to_substrait_type(
             _ => plan_err!("Map fields must contain a Struct with exactly 2 fields"),
         },
         DataType::Dictionary(key_type, value_type) => {
-            let key_type = to_substrait_type(key_type, nullable)?;
-            let value_type = to_substrait_type(value_type, nullable)?;
+            let key_type = to_substrait_type(producer, key_type, nullable)?;
+            let value_type = to_substrait_type(producer, value_type, nullable)?;
             Ok(substrait::proto::Type {
                 kind: Some(r#type::Kind::Map(Box::new(r#type::Map {
                     key: Some(Box::new(key_type)),
@@ -299,7 +314,9 @@ pub(crate) fn to_substrait_type(
         DataType::Struct(fields) => {
             let field_types = fields
                 .iter()
-                .map(|field| to_substrait_type(field.data_type(), field.is_nullable()))
+                .map(|field| {
+                    to_substrait_type(producer, field.data_type(), field.is_nullable())
+                })
                 .collect::<datafusion::common::Result<Vec<_>>>()?;
             Ok(substrait::proto::Type {
                 kind: Some(r#type::Kind::Struct(r#type::Struct {
@@ -330,6 +347,7 @@ pub(crate) fn to_substrait_type(
 }
 
 pub(crate) fn to_substrait_named_struct(
+    producer: &mut impl SubstraitProducer,
     schema: &DFSchemaRef,
 ) -> datafusion::common::Result<NamedStruct> {
     let mut names = Vec::with_capacity(schema.fields().len());
@@ -341,7 +359,7 @@ pub(crate) fn to_substrait_named_struct(
         types: schema
             .fields()
             .iter()
-            .map(|f| to_substrait_type(f.data_type(), f.is_nullable()))
+            .map(|f| to_substrait_type(producer, f.data_type(), f.is_nullable()))
             .collect::<datafusion::common::Result<_>>()?,
         type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
         nullability: r#type::Nullability::Required as i32,
@@ -360,8 +378,10 @@ mod tests {
     use crate::logical_plan::consumer::{
         from_substrait_named_struct, from_substrait_type_without_names,
     };
+    use crate::logical_plan::producer::DefaultSubstraitProducer;
     use datafusion::arrow::datatypes::{Field, Fields, Schema, TimeUnit};
     use datafusion::common::{DFSchema, Result};
+    use datafusion::prelude::SessionContext;
     use std::sync::Arc;
 
     #[test]
@@ -448,9 +468,12 @@ mod tests {
     fn round_trip_type(dt: DataType) -> Result<()> {
         println!("Checking round trip of {dt}");
 
+        let state = SessionContext::default().state();
+        let mut producer = DefaultSubstraitProducer::new(&state);
+
         // As DataFusion doesn't consider nullability as a property of the type, but field,
         // it doesn't matter if we set nullability to true or false here.
-        let substrait = to_substrait_type(&dt, true)?;
+        let substrait = to_substrait_type(&mut producer, &dt, true)?;
         let consumer = test_consumer();
         let roundtrip_dt = from_substrait_type_without_names(&consumer, &substrait)?;
         assert_eq!(dt, roundtrip_dt);
@@ -473,7 +496,10 @@ mod tests {
             Field::new("trailer", DataType::Float64, true),
         ]))?);
 
-        let named_struct = to_substrait_named_struct(&schema)?;
+        let state = SessionContext::default().state();
+        let mut producer = DefaultSubstraitProducer::new(&state);
+
+        let named_struct = to_substrait_named_struct(&mut producer, &schema)?;
 
         // Struct field names should be flattened DFS style
         // List field names should be omitted
diff --git a/datafusion/substrait/src/physical_plan/producer.rs b/datafusion/substrait/src/physical_plan/producer.rs
index cb725a7277fd..63abd14d6f5e 100644
--- a/datafusion/substrait/src/physical_plan/producer.rs
+++ b/datafusion/substrait/src/physical_plan/producer.rs
@@ -73,7 +73,7 @@ pub fn to_substrait_rel(
             let mut names = vec![];
             let mut types = vec![];
 
-            for field in file_config.file_schema.fields.iter() {
+            for field in file_config.file_schema().fields.iter() {
                 match to_substrait_type(field.data_type(), field.is_nullable()) {
                     Ok(t) => {
                         names.push(field.name().clone());
diff --git a/datafusion/substrait/src/variation_const.rs b/datafusion/substrait/src/variation_const.rs
index f78b3d785303..591f33aeb4b7 100644
--- a/datafusion/substrait/src/variation_const.rs
+++ b/datafusion/substrait/src/variation_const.rs
@@ -122,3 +122,6 @@ pub const INTERVAL_MONTH_DAY_NANO_TYPE_REF: u32 = 3;
     note = "Use Substrait `IntervalCompound` type instead"
 )]
 pub const INTERVAL_MONTH_DAY_NANO_TYPE_NAME: &str = "interval-month-day-nano";
+
+/// Defined in <https://github.com/apache/arrow/blame/main/format/substrait/extension_types.yaml>
+pub const FLOAT_16_TYPE_NAME: &str = "fp16";
diff --git a/datafusion/substrait/tests/cases/aggregation_tests.rs b/datafusion/substrait/tests/cases/aggregation_tests.rs
new file mode 100644
index 000000000000..815550bca5b8
--- /dev/null
+++ b/datafusion/substrait/tests/cases/aggregation_tests.rs
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Tests to verify aggregation relation handling in Substrait
+
+#[cfg(test)]
+mod tests {
+    use crate::utils::test::{add_plan_schemas_to_ctx, read_json};
+    use datafusion::common::Result;
+    use datafusion::dataframe::DataFrame;
+    use datafusion::prelude::SessionContext;
+    use datafusion_substrait::logical_plan::consumer::from_substrait_plan;
+    use insta::assert_snapshot;
+
+    #[tokio::test]
+    async fn no_grouping_set() -> Result<()> {
+        let proto_plan =
+            read_json("tests/testdata/test_plans/aggregate_groupings/no_groupings.json");
+        let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?;
+        let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?;
+
+        assert_snapshot!(
+            plan,
+            @r#"
+                Aggregate: groupBy=[[]], aggr=[[sum(c0) AS summation]]
+                  EmptyRelation: rows=0
+                "#
+        );
+
+        // Trigger execution to ensure plan validity
+        DataFrame::new(ctx.state(), plan).show().await?;
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn one_grouping_set() -> Result<()> {
+        let proto_plan = read_json(
+            "tests/testdata/test_plans/aggregate_groupings/single_grouping.json",
+        );
+        let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?;
+        let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?;
+
+        assert_snapshot!(
+            plan,
+            @r#"
+                Aggregate: groupBy=[[c0]], aggr=[[sum(c0) AS summation]]
+                  EmptyRelation: rows=0
+                "#
+        );
+
+        // Trigger execution to ensure plan validity
+        DataFrame::new(ctx.state(), plan).show().await?;
+
+        Ok(())
+    }
+}
diff --git a/datafusion/substrait/tests/cases/mod.rs b/datafusion/substrait/tests/cases/mod.rs
index 9e69bb4edd85..0870c56cd3ba 100644
--- a/datafusion/substrait/tests/cases/mod.rs
+++ b/datafusion/substrait/tests/cases/mod.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+mod aggregation_tests;
 mod builtin_expr_semantics_tests;
 mod consumer_integration;
 mod emit_kind_tests;
diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
index 39e4984ab9f7..f14d4cbf1fcc 100644
--- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
@@ -1144,6 +1144,7 @@ async fn all_type_literal() -> Result<()> {
             uint32_col = arrow_cast('0', 'UInt32') AND
             int64_col = arrow_cast('0', 'Int64') AND
             uint64_col = arrow_cast('0', 'UInt64') AND
+            float16_col = arrow_cast(0.0, 'Float16') AND
             float32_col = arrow_cast('0', 'Float32') AND
             float64_col = arrow_cast('0', 'Float64') AND
             sec_timestamp_col = arrow_cast('2020-01-01 00:00:00', 'Timestamp (Second, None)') AND
@@ -1856,6 +1857,7 @@ async fn create_all_type_context() -> Result<SessionContext> {
         Field::new("uint32_col", DataType::UInt32, true),
         Field::new("int64_col", DataType::Int64, true),
         Field::new("uint64_col", DataType::UInt64, true),
+        Field::new("float16_col", DataType::Float16, true),
         Field::new("float32_col", DataType::Float32, true),
         Field::new("float64_col", DataType::Float64, true),
         Field::new(
diff --git a/datafusion/substrait/tests/testdata/test_plans/aggregate_groupings/no_groupings.json b/datafusion/substrait/tests/testdata/test_plans/aggregate_groupings/no_groupings.json
new file mode 100644
index 000000000000..9305aa6461ab
--- /dev/null
+++ b/datafusion/substrait/tests/testdata/test_plans/aggregate_groupings/no_groupings.json
@@ -0,0 +1,92 @@
+{
+  "extensionUris": [
+    {
+      "extensionUriAnchor": 1,
+      "uri": "https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic.yaml"
+    }
+  ],
+  "extensions": [
+    {
+      "extensionFunction": {
+        "extensionUriReference": 1,
+        "functionAnchor": 1,
+        "name": "sum:i8"
+      }
+    }
+  ],
+  "relations": [
+    {
+      "root": {
+        "input": {
+          "aggregate": {
+            "common": {
+              "direct": {}
+            },
+            "input": {
+              "read": {
+                "baseSchema": {
+                  "names": [
+                    "c0",
+                    "c1"
+                  ],
+                  "struct": {
+                    "nullability": "NULLABILITY_REQUIRED",
+                    "types": [
+                      {
+                        "i8": {
+                          "nullability": "NULLABILITY_NULLABLE"
+                        }
+                      },
+                      {
+                        "i8": {
+                          "nullability": "NULLABILITY_NULLABLE"
+                        }
+                      }
+                    ]
+                  }
+                },
+                "common": {
+                  "direct": {}
+                },
+                "virtualTable": {}
+              }
+            },
+            "measures": [
+              {
+                "measure": {
+                  "arguments": [
+                    {
+                      "value": {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    }
+                  ],
+                  "functionReference": 1,
+                  "invocation": "AGGREGATION_INVOCATION_ALL",
+                  "outputType": {
+                    "i8": {
+                      "nullability": "NULLABILITY_NULLABLE"
+                    }
+                  },
+                  "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT"
+                }
+              }
+            ]
+          }
+        },
+        "names": [
+          "summation"
+        ]
+      }
+    }
+  ],
+  "version": {
+    "minorNumber": 29,
+    "producer": "substrait-go v4.2.0"
+  }
+}
\ No newline at end of file
diff --git a/datafusion/substrait/tests/testdata/test_plans/aggregate_groupings/single_grouping.json b/datafusion/substrait/tests/testdata/test_plans/aggregate_groupings/single_grouping.json
new file mode 100644
index 000000000000..9535596a1e81
--- /dev/null
+++ b/datafusion/substrait/tests/testdata/test_plans/aggregate_groupings/single_grouping.json
@@ -0,0 +1,109 @@
+{
+  "extensionUris": [
+    {
+      "extensionUriAnchor": 1,
+      "uri": "https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic.yaml"
+    }
+  ],
+  "extensions": [
+    {
+      "extensionFunction": {
+        "extensionUriReference": 1,
+        "functionAnchor": 1,
+        "name": "sum:i8"
+      }
+    }
+  ],
+  "relations": [
+    {
+      "root": {
+        "input": {
+          "aggregate": {
+            "common": {
+              "direct": {}
+            },
+            "input": {
+              "read": {
+                "baseSchema": {
+                  "names": [
+                    "c0",
+                    "c1"
+                  ],
+                  "struct": {
+                    "nullability": "NULLABILITY_REQUIRED",
+                    "types": [
+                      {
+                        "i8": {
+                          "nullability": "NULLABILITY_NULLABLE"
+                        }
+                      },
+                      {
+                        "i8": {
+                          "nullability": "NULLABILITY_NULLABLE"
+                        }
+                      }
+                    ]
+                  }
+                },
+                "common": {
+                  "direct": {}
+                },
+                "virtualTable": {}
+              }
+            },
+            "groupingExpressions": [
+              {
+                "selection": {
+                  "directReference": {
+                    "structField": {}
+                  },
+                  "rootReference": {}
+                }
+              }
+            ],
+            "groupings": [
+              {
+                "expressionReferences": [0]
+              }
+
+            ],
+            "measures": [
+              {
+                "measure": {
+                  "arguments": [
+                    {
+                      "value": {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    }
+                  ],
+                  "functionReference": 1,
+                  "invocation": "AGGREGATION_INVOCATION_ALL",
+                  "outputType": {
+                    "i8": {
+                      "nullability": "NULLABILITY_NULLABLE"
+                    }
+                  },
+                  "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT"
+                }
+              }
+            ]
+          }
+        },
+        "names": [
+          "c0",
+          "summation"
+        ]
+      }
+    }
+  ],
+  "version": {
+    "minorNumber": 29,
+    "producer": "substrait-go v4.2.0"
+  }
+}
\ No newline at end of file
diff --git a/dev/changelog/50.3.0.md b/dev/changelog/50.3.0.md
new file mode 100644
index 000000000000..49950e00c282
--- /dev/null
+++ b/dev/changelog/50.3.0.md
@@ -0,0 +1,47 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Apache DataFusion 50.3.0 Changelog
+
+This release consists of 7 commits from 3 contributors. See credits at the end of this changelog for more information.
+
+See the [upgrade guide](https://datafusion.apache.org/library-user-guide/upgrading.html) for information on how to upgrade from previous versions.
+
+**Other:**
+
+- [branch-50] chore: Fix `no space left on device` [#18141](https://github.com/apache/datafusion/pull/18141) (comphead)
+- [branch-50]: chore: cherry pick `concat` to 50.3.0 [#18128](https://github.com/apache/datafusion/pull/18128) (comphead)
+- [branch-50] Backport Fix bug in LimitPushPastWindows (#18029) [#18107](https://github.com/apache/datafusion/pull/18107) (avantgardnerio)
+- [branch-50] fix(SubqueryAlias): use maybe_project_redundant_column (#17478) [#18130](https://github.com/apache/datafusion/pull/18130) (hareshkh)
+- [branch-50]: fix: Add overflow checks to SparkDateAdd/Sub to avoid panics (#18013) [#18131](https://github.com/apache/datafusion/pull/18131) (hareshkh)
+- [branch-50] Fix ambiguous column names in substrait conversion #17299 [#18077](https://github.com/apache/datafusion/pull/18077) (hareshkh)
+- [branch-50] Extend datatype semantic equality check to include timestamps (#17777) [#18129](https://github.com/apache/datafusion/pull/18129) (hareshkh)
+- [branch-50] perf: Fix NLJ slow join with condition `array_has` (#18161) [#18179](https://github.com/apache/datafusion/pull/18179) (alamb)
+
+## Credits
+
+Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor.
+
+```
+     4	Haresh Khanna
+     2	Oleks V
+     1	Brent Gardner
+```
+
+Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release.
diff --git a/dev/release/README.md b/dev/release/README.md
index d70e256f7383..1b78f8d13be9 100644
--- a/dev/release/README.md
+++ b/dev/release/README.md
@@ -295,6 +295,7 @@ Verify that the Cargo.toml in the tarball contains the correct version
 (cd datafusion/catalog && cargo publish)
 (cd datafusion/catalog-listing && cargo publish)
 (cd datafusion/functions-table && cargo publish)
+(cd datafusion/datasource-arrow && cargo publish)
 (cd datafusion/datasource-csv && cargo publish)
 (cd datafusion/datasource-json && cargo publish)
 (cd datafusion/datasource-parquet && cargo publish)
diff --git a/dev/update_config_docs.sh b/dev/update_config_docs.sh
index ed3e699c1413..90bbc5d3bad0 100755
--- a/dev/update_config_docs.sh
+++ b/dev/update_config_docs.sh
@@ -175,6 +175,66 @@ SET datafusion.execution.batch_size = 1024;
 
 [`FairSpillPool`]: https://docs.rs/datafusion/latest/datafusion/execution/memory_pool/struct.FairSpillPool.html
 
+## Join Queries
+
+Currently Apache Datafusion supports the following join algorithms:
+
+- Nested Loop Join
+- Sort Merge Join
+- Hash Join
+- Symmetric Hash Join
+- Piecewise Merge Join (experimental)
+
+The physical planner will choose the appropriate algorithm based on the statistics + join
+condition of the two tables.
+
+# Join Algorithm Optimizer Configurations
+
+You can modify join optimization behavior in your queries by setting specific configuration values.
+Use the following command to update a configuration:
+
+``` sql
+SET datafusion.optimizer.<configuration_name>;
+```
+
+Example
+
+``` sql
+SET datafusion.optimizer.prefer_hash_join = false;
+```
+
+Adjusting the following configuration values influences how the optimizer selects the join algorithm
+used to execute your SQL query:
+
+## Join Optimizer Configurations
+
+Adjusting the following configuration values influences how the optimizer selects the join algorithm
+used to execute your SQL query.
+
+### allow_symmetric_joins_without_pruning (bool, default = true)
+
+Controls whether symmetric hash joins are allowed for unbounded data sources even when their inputs
+lack ordering or filtering.
+
+- If disabled, the `SymmetricHashJoin` operator cannot prune its internal buffers to be produced only at the end of execution.
+
+### prefer_hash_join (bool, default = true)
+
+Determines whether the optimizer prefers Hash Join over Sort Merge Join during physical plan selection.
+
+- true: favors HashJoin for faster execution when sufficient memory is available.
+- false: allows SortMergeJoin to be chosen when more memory-efficient execution is needed.
+
+### enable_piecewise_merge_join (bool, default = false)
+
+Enables the experimental Piecewise Merge Join algorithm.
+
+- When enabled, the physical planner may select PiecewiseMergeJoin if there is exactly one range
+  filter in the join condition.
+- Piecewise Merge Join is faster than Nested Loop Join performance wise for single range filter
+  except for cases where it is joining two large tables (num_rows > 100,000) that are approximately
+  equal in size.
+
 EOF
 
 
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 78206d2c1986..355cd347ef58 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -17,7 +17,7 @@
 
 sphinx==8.2.3
 sphinx-reredirects==1.0.0
-pydata-sphinx-theme==0.8.0
+pydata-sphinx-theme==0.16.1
 myst-parser==4.0.1
 maturin==1.9.6
 jinja2==3.1.6
diff --git a/docs/rustdoc_trim.py b/docs/rustdoc_trim.py
index 70becc45ee76..ab146c6a662f 100644
--- a/docs/rustdoc_trim.py
+++ b/docs/rustdoc_trim.py
@@ -19,7 +19,7 @@
 from sphinx.application import Sphinx
 
 # Regex pattern to match Rust code blocks in Markdown
-RUST_CODE_BLOCK_PATTERN = re.compile(r"```rust\s*(.*?)```", re.DOTALL)
+RUST_CODE_BLOCK_PATTERN = re.compile(r"```rust(?:,ignore)?\s*(.*?)```", re.DOTALL)
 
 
 def remove_hashtag_lines_in_rust_blocks(markdown_content):
diff --git a/docs/source/_static/images/original_dark.svg b/docs/source/_static/images/original_dark.svg
new file mode 100644
index 000000000000..532434f9214a
--- /dev/null
+++ b/docs/source/_static/images/original_dark.svg
@@ -0,0 +1,31 @@
+<svg width="801" height="168" viewBox="0 0 801 168" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g clip-path="url(#clip0_1_181)">
+<path fill-rule="evenodd" clip-rule="evenodd" d="M76.1297 168C88.4597 168 99.6097 158.25 107.58 142.55C127.23 144.6 143.09 139.99 149.76 128.51C156.29 117.25 152.65 101.68 141.58 86.09C152.36 70.67 155.85 55.32 149.39 44.19C143.01 33.19 128.19 28.5 109.69 29.92C101.65 11.62 89.5997 0 76.1297 0C62.6597 0 50.4997 11.73 42.4497 30.18C24.3497 28.97 9.87966 33.68 3.59966 44.51C-2.81034 55.56 0.589656 70.78 11.1897 86.09C0.299656 101.57 -3.24034 117.01 3.23966 128.18C9.80966 139.5 25.3097 144.13 44.5597 142.31C52.5397 158.15 63.7297 168 76.1297 168ZM52.7197 141.19C59.5097 154.21 68.6197 162.18 78.6397 162.18C88.6597 162.18 97.6397 154.63 104.76 142.22C95.7497 141.01 86.0297 138.46 76.0797 134.58C68.0497 137.65 60.1797 139.86 52.7197 141.19ZM65.0697 129.77C59.6397 131.52 54.3497 132.84 49.3097 133.72C47.3697 128.87 45.6997 123.54 44.3597 117.82C47.6797 120.07 51.1497 122.25 54.7497 124.32C58.1797 126.29 61.6197 128.11 65.0597 129.77H65.0697ZM76.1197 125.68C82.0597 123.2 88.0897 120.24 94.0797 116.79C102.76 111.8 110.61 106.25 117.41 100.43C118.01 95.13 118.33 89.64 118.33 84C118.33 76.79 117.81 69.8 116.84 63.17C111.11 58.53 104.73 54.12 97.7797 50.13C90.5497 45.98 83.2397 42.57 76.0697 39.91C69.1797 42.52 62.1597 45.82 55.2197 49.81C50.8597 52.32 46.7097 54.99 42.8197 57.78C41.3997 65.98 40.6297 74.8 40.6297 84C40.6297 91.65 41.1597 99.05 42.1597 106.04C47.2997 109.94 52.9197 113.66 58.9297 117.12C64.6597 120.41 70.4297 123.27 76.1197 125.68ZM87.1297 129.83C90.8397 128.07 94.5497 126.12 98.2497 124C104.42 120.45 110.18 116.61 115.48 112.58C113.79 120.73 111.42 128.22 108.52 134.76C101.82 133.95 94.6097 132.3 87.1297 129.83ZM117.71 110.84C116.16 119.65 113.87 127.8 110.98 135.03C127.49 136.56 140.52 132.79 145.95 123.44C151.17 114.43 148.57 101.58 140 88.28C134.26 96.01 126.71 103.7 117.71 110.84ZM136.56 79.61C145.46 66.59 148.33 53.99 143.08 44.93C137.99 36.16 126.02 31.96 110.76 32.43C114.3 41.14 116.97 51.23 118.5 62.2C125.46 67.76 131.53 73.64 136.55 79.61H136.56ZM108.21 32.55C111.86 40.6 114.7 50.11 116.43 60.57C110.98 56.37 105.01 52.37 98.6097 48.69C91.9597 44.87 85.2397 41.62 78.6197 38.96C89.0597 35.24 99.1497 33.11 108.2 32.54L108.21 32.55ZM107.08 30.16C97.4397 31.18 86.8897 33.79 76.0797 37.98C67.2997 34.65 58.7097 32.35 50.6497 31.1C57.5997 15.56 67.5697 5.83 78.6397 5.83C89.7097 5.83 99.7197 15.18 107.08 30.16ZM73.5197 38.99C65.2297 36.1 57.1697 34.21 49.6897 33.32C46.9897 39.84 44.7997 47.28 43.2497 55.38C46.7897 52.94 50.4997 50.6 54.3797 48.37C60.7497 44.71 67.1697 41.58 73.5197 38.99ZM33.7197 62.5C35.2297 51.54 37.8697 41.46 41.3797 32.74C26.5297 32.46 14.9097 36.66 9.91966 45.26C4.70966 54.24 7.48966 66.68 16.1997 79.58C21.0897 73.73 26.9797 67.96 33.7197 62.5ZM12.7697 88.3C18.3697 95.9 25.7197 103.48 34.4997 110.54C36.0297 119.36 38.3097 127.53 41.1797 134.78C25.0597 136.11 12.3797 132.31 7.03966 123.12C1.85966 114.19 4.36966 101.48 12.7597 88.31L12.7697 88.3ZM17.5697 81.54C21.9097 75.99 27.2197 70.45 33.3797 65.17C32.6597 71.22 32.2697 77.53 32.2697 84C32.2697 88.91 32.4897 93.72 32.9097 98.4C26.7997 92.89 21.6197 87.18 17.5597 81.53L17.5697 81.54ZM135.19 81.55C131 87.32 125.65 93.15 119.32 98.76C119.76 93.97 120 89.03 120 84C120 77.41 119.6 70.99 118.85 64.84C125.22 70.21 130.72 75.87 135.19 81.55Z" fill="url(#paint0_linear_1_181)"/>
+<path d="M204.34 45.97H192.83L190.47 52.2H185.2L196.07 23.88H201.47L212.54 52.2H206.78L204.34 45.97ZM202.74 41.74L198.54 30.99L194.46 41.74H202.73H202.74Z" fill="#FFF"/>
+<path d="M235.55 23.88H246.82C249.59 23.88 251.9 24.73 253.73 26.44C255.57 28.14 256.49 30.38 256.49 33.15C256.49 35.92 255.57 38.2 253.73 39.9C251.89 41.6 249.59 42.46 246.82 42.46H240.95V52.21H235.56V23.89L235.55 23.88ZM240.94 37.26H246.85C248 37.26 248.96 36.87 249.73 36.1C250.5 35.3 250.89 34.32 250.89 33.14C250.89 31.96 250.5 31 249.73 30.22C248.98 29.45 248.02 29.06 246.85 29.06H240.94V37.25V37.26Z" fill="#FFF"/>
+<path d="M293.67 45.97H282.16L279.8 52.2H274.53L285.4 23.88H290.79L301.86 52.2H296.1L293.66 45.97H293.67ZM292.07 41.74L287.87 30.99L283.79 41.74H292.06H292.07Z" fill="#FFF"/>
+<path d="M335.99 23C338.57 23 340.92 23.68 343.02 25.04C345.15 26.37 346.84 28.18 348.1 30.47L343.1 32.87C341.37 29.57 339 27.92 335.99 27.92C333.62 27.92 331.65 28.91 330.07 30.88C328.53 32.82 327.75 35.22 327.75 38.07C327.75 40.92 328.52 43.32 330.07 45.26C331.62 47.18 333.59 48.14 335.99 48.14C337.45 48.14 338.81 47.73 340.07 46.9C341.32 46.05 342.32 44.93 343.07 43.54L347.99 45.9C346.79 48.14 345.13 49.89 342.99 51.17C340.86 52.42 338.53 53.05 336 53.05C331.95 53.05 328.63 51.65 326.05 48.86C323.47 46.04 322.17 42.44 322.17 38.07C322.17 33.7 323.48 30.13 326.09 27.28C328.73 24.43 332.03 23.01 336 23.01L335.99 23Z" fill="#FFF"/>
+<path d="M393.49 52.2H388.09V40.61H377.1V52.2H371.71V23.88H377.1V35.43H388.09V23.88H393.49V52.2Z" fill="#FFF"/>
+<path d="M435.17 29.07H423.9V35.46H432.41V40.65H423.9V47H435.17V52.19H418.51V23.87H435.17V29.06V29.07Z" fill="#FFF"/>
+<path d="M185 76.1H207.25C218.63 76.1 227.58 79.3 234.1 85.69C240.69 92.02 243.98 100.14 243.98 110.04C243.98 119.94 240.69 128.1 234.1 134.49C227.51 140.88 218.53 144.08 207.15 144.08H184.99V76.1H185ZM199.96 90.49V129.7H206.67C209.55 129.7 212.2 129.38 214.63 128.74C217.12 128.1 219.43 127.08 221.54 125.67C223.71 124.26 225.41 122.25 226.62 119.63C227.83 116.95 228.44 113.75 228.44 110.04C228.44 106.33 227.83 103.17 226.62 100.55C225.4 97.93 223.71 95.92 221.54 94.51C219.43 93.1 217.16 92.08 214.73 91.44C212.3 90.8 209.61 90.48 206.67 90.48H199.96V90.49Z" fill="#FFF"/>
+<path d="M291.82 130.85H264.96L259.97 144.08H245.39L271.38 76.1H286.34L312.91 144.08H296.89L291.81 130.85H291.82ZM287.4 119.35L278.29 95.76L269.37 119.35H287.4Z" fill="#FFF"/>
+<path d="M362.7 90.49H342.94V144.09H327.98V90.49H308.51V76.11H362.7V90.49Z" fill="#FFF"/>
+<path d="M404.77 130.85H377.92L372.93 144.08H358.35L384.34 76.1H399.3L425.87 144.08H409.85L404.77 130.85ZM400.36 119.35L391.25 95.76L382.33 119.35H400.36Z" fill="#FFF"/>
+<path d="M474.73 90.49H448.45V102.95H468.4V117.33H448.45V144.08H433.49V76.1H474.73V90.48V90.49Z" fill="#FFF"/>
+<path d="M541.71 76.1V118.96C541.65 127.27 538.96 133.85 533.65 138.71C528.41 143.57 521.53 146 513.03 146C504.53 146 497.78 143.57 492.41 138.71C487.04 133.85 484.39 127.27 484.45 118.96V76.1H499.32V119.05C499.32 122.88 500.6 125.95 503.16 128.25C505.78 130.49 509.08 131.61 513.04 131.61C517 131.61 520.27 130.49 522.82 128.25C525.38 125.95 526.69 122.88 526.75 119.05V76.1H541.71Z" fill="#FFF"/>
+<path d="M578.69 73.99C585.72 73.99 591.96 75.59 597.39 78.78L590.77 91.24C585.98 89.19 581.43 88.17 577.15 88.17C570.95 88.17 567.85 90.25 567.85 94.4C567.85 96.45 569.03 98.17 571.4 99.58C573.83 100.92 577.28 102.39 581.76 103.99C586.3 105.52 589.72 106.96 592.02 108.3C598.22 111.88 601.32 117.25 601.32 124.41C601.32 131.12 598.86 136.4 593.93 140.23C589.01 144.07 582.87 145.98 575.52 145.98C571.36 145.98 567.05 145.21 562.57 143.68C558.09 142.15 554.61 140.39 552.12 138.41L560.18 126.43C567.02 130.46 573.22 132.21 578.79 131.7C580.96 131.51 582.76 130.74 584.16 129.4C585.63 128.06 586.24 126.43 585.98 124.51C585.72 122.72 584.22 121.25 581.47 120.1C578.72 118.89 575.14 117.48 570.73 115.88C566.38 114.28 562.83 112.4 560.08 110.22C555.03 106.13 552.5 101.05 552.5 94.98C552.5 91.98 553.01 89.26 554.03 86.83C555.05 84.4 556.4 82.42 558.06 80.89C559.79 79.29 561.8 77.98 564.1 76.96C566.4 75.87 568.77 75.11 571.2 74.66C573.63 74.21 576.12 73.99 578.68 73.99H578.69Z" fill="#FFF"/>
+<path d="M626.79 144.08H611.83V76.1H626.79V144.08Z" fill="#FFF"/>
+<path d="M647.04 84.16C653.88 77.38 662.48 74 672.84 74C683.2 74 691.73 77.39 698.45 84.16C705.23 90.87 708.62 99.4 708.62 109.76C708.62 120.12 705.33 128.68 698.74 135.45C692.15 142.16 683.52 145.68 672.84 146C662.48 146 653.88 142.58 647.04 135.74C640.26 128.84 636.87 120.21 636.87 109.85C636.87 99.49 640.26 90.93 647.04 84.15V84.16ZM672.84 87.61C666.64 87.61 661.68 89.69 657.97 93.84C654.26 97.99 652.41 103.3 652.41 109.76C652.41 116.22 654.26 121.62 657.97 125.96C661.74 130.24 666.7 132.38 672.84 132.38C678.98 132.38 683.96 130.24 687.61 125.96C691.25 121.68 693.08 116.31 693.08 109.85C693.08 103.39 691.26 98.12 687.61 94.03C684.03 89.88 679.11 87.73 672.84 87.61Z" fill="#FFF"/>
+<path d="M775 144.08H759.65L732.7 101.99V144.08H718.7V76.1H732.99L761 118.57V76.1H775V144.08Z" fill="#FFF"/>
+<path d="M788.01 78.18H784.62V87.44H782.46V78.18H779.13V76.1H788.01V78.18ZM800.56 87.45H798.48V80.17L795.5 85.79H794.12L791.14 80.17L791.12 87.45H789.1V76.11H791.13L794.87 83.04L798.45 76.11H800.51L800.53 87.45H800.56Z" fill="#FFF"/>
+</g>
+<defs>
+<linearGradient id="paint0_linear_1_181" x1="17.0897" y1="117.74" x2="162.73" y2="36.78" gradientUnits="userSpaceOnUse">
+<stop stop-color="#EF4137"/>
+<stop offset="1" stop-color="#FBB042"/>
+</linearGradient>
+<clipPath id="clip0_1_181">
+<rect width="800.56" height="168" fill="currentColor"/>
+</clipPath>
+</defs>
+</svg>
diff --git a/docs/source/_static/theme_overrides.css b/docs/source/_static/theme_overrides.css
index 0859beb788aa..01f1a126a76a 100644
--- a/docs/source/_static/theme_overrides.css
+++ b/docs/source/_static/theme_overrides.css
@@ -40,6 +40,18 @@ code {
   text-align: center;
 }
 
+/* Display appropriate logo for dark and light mode */
+.light-logo { display: inline; }
+.dark-logo { display: none; }
+
+@media (prefers-color-scheme: dark) {
+  .light-logo { display: none; }
+  .dark-logo { 
+    display: inline; 
+    background-color: transparent !important;
+  }
+}
+
 /* Ensure the logo is properly displayed */
 
 .navbar-brand {
@@ -94,6 +106,13 @@ Details: 8rem for search box etc*/
   max-width: 100%;
 }
 
+/* Make table container width fit content instead of spanning full width. */
+.pst-scrollable-table-container {
+  display: inline-block;
+  overflow-x: auto;
+  max-width: 100%;
+}
+
 /* Restore proper table display to maintain column alignment */
 .bd-content table thead,
 .bd-content table tbody { display: table-row-group; }
diff --git a/docs/source/_templates/docs-sidebar.html b/docs/source/_templates/docs-sidebar.html
index 7c3ecc3d802e..01aabb986050 100644
--- a/docs/source/_templates/docs-sidebar.html
+++ b/docs/source/_templates/docs-sidebar.html
@@ -1,21 +1,21 @@
-
-
-<form class="bd-search d-flex align-items-center" action="{{ pathto('search') }}" method="get">
-  <i class="icon fas fa-search"></i>
-  <input type="search" class="form-control" name="q" id="search-input" placeholder="{{ theme_search_bar_text }}" aria-label="{{ theme_search_bar_text }}" autocomplete="off" >
-</form>
-
+<p>
+  <a href="{{ pathto(master_doc) }}">
+    <img src="{{ pathto('_static/images/original.svg', 1) }}" class="logo light-logo" alt="logo">
+    <img src="{{ pathto('_static/images/original_dark.svg', 1) }}" class="logo dark-logo" alt="logo">
+  </a>
+</p>
+<p>
+  <form class="bd-search d-flex align-items-center" action="{{ pathto('search') }}" method="get">
+    <input type="search" class="form-control" name="q" id="search-input" placeholder="{{ theme_search_bar_text }}" aria-label="{{ theme_search_bar_text }}" autocomplete="off" >
+  </form>
+</p>
 <nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
+
   <div class="bd-toc-item active">
     {% if "python/api" in pagename or "python/generated" in pagename %}
-    {{ generate_nav_html("sidebar", startdepth=0, maxdepth=3, collapse=False, includehidden=True, titles_only=True) }}
+    {{ generate_toctree_html("sidebar", startdepth=0, maxdepth=3, collapse=False, includehidden=True, titles_only=True) }}
     {% else %}
-    {{ generate_nav_html("sidebar", startdepth=0, maxdepth=4, collapse=False, includehidden=True, titles_only=True) }}
+    {{ generate_toctree_html("sidebar", startdepth=0, maxdepth=4, collapse=False, includehidden=True, titles_only=True) }}
     {% endif %}
   </div>
-
-  <a class="navbar-brand" href="{{ pathto(master_doc) }}">
-    <img src="{{ pathto('_static/images/2x_bgwhite_original.png', 1) }}" class="logo" alt="logo">
-  </a>
-</nav>
-
+</nav>
\ No newline at end of file
diff --git a/docs/source/contributor-guide/howtos.md b/docs/source/contributor-guide/howtos.md
index 89a1bc7360a1..24b63865cb71 100644
--- a/docs/source/contributor-guide/howtos.md
+++ b/docs/source/contributor-guide/howtos.md
@@ -21,60 +21,86 @@
 
 ## How to update the version of Rust used in CI tests
 
-- Make a PR to update the [rust-toolchain] file in the root of the repository:
+Make a PR to update the [rust-toolchain] file in the root of the repository.
 
 [rust-toolchain]: https://github.com/apache/datafusion/blob/main/rust-toolchain.toml
 
-## How to add a new scalar function
-
-Below is a checklist of what you need to do to add a new scalar function to DataFusion:
-
-- Add the actual implementation of the function to a new module file within:
-  - [here](https://github.com/apache/datafusion/tree/main/datafusion/functions-nested) for arrays, maps and structs functions
-  - [here](https://github.com/apache/datafusion/tree/main/datafusion/functions/src/crypto) for crypto functions
-  - [here](https://github.com/apache/datafusion/tree/main/datafusion/functions/src/datetime) for datetime functions
-  - [here](https://github.com/apache/datafusion/tree/main/datafusion/functions/src/encoding) for encoding functions
-  - [here](https://github.com/apache/datafusion/tree/main/datafusion/functions/src/math) for math functions
-  - [here](https://github.com/apache/datafusion/tree/main/datafusion/functions/src/regex) for regex functions
-  - [here](https://github.com/apache/datafusion/tree/main/datafusion/functions/src/string) for string functions
-  - [here](https://github.com/apache/datafusion/tree/main/datafusion/functions/src/unicode) for unicode functions
-  - create a new module [here](https://github.com/apache/datafusion/tree/main/datafusion/functions/src/) for other functions.
-- New function modules - for example a `vector` module, should use a [rust feature](https://doc.rust-lang.org/cargo/reference/features.html) (for example `vector_expressions`) to allow DataFusion
-  users to enable or disable the new module as desired.
-- The implementation of the function is done via implementing `ScalarUDFImpl` trait for the function struct.
-  - See the [advanced_udf.rs] example for an example implementation
-  - Add tests for the new function
-- To connect the implementation of the function add to the mod.rs file:
-  - a `mod xyz;` where xyz is the new module file
-  - a call to `make_udf_function!(..);`
-  - an item in `export_functions!(..);`
-- In [sqllogictest/test_files], add new `sqllogictest` integration tests where the function is called through SQL against well known data and returns the expected result.
-  - Documentation for `sqllogictest` [here](https://github.com/apache/datafusion/blob/main/datafusion/sqllogictest/README.md)
-- Add SQL reference documentation [here](https://github.com/apache/datafusion/blob/main/docs/source/user-guide/sql/scalar_functions.md)
-  - An example of this being done can be seen [here](https://github.com/apache/datafusion/pull/12775)
-  - Run `./dev/update_function_docs.sh` to update docs
-
-[advanced_udf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udaf.rs
-[datafusion/expr/src]: https://github.com/apache/datafusion/tree/main/datafusion/expr/src
-[sqllogictest/test_files]: https://github.com/apache/datafusion/tree/main/datafusion/sqllogictest/test_files
-
-## How to add a new aggregate function
-
-Below is a checklist of what you need to do to add a new aggregate function to DataFusion:
-
-- Add the actual implementation of an `Accumulator` and `AggregateExpr`:
-- In [datafusion/expr/src], add:
-  - a new variant to `AggregateFunction`
-  - a new entry to `FromStr` with the name of the function as called by SQL
-  - a new line in `return_type` with the expected return type of the function, given an incoming type
-  - a new line in `signature` with the signature of the function (number and types of its arguments)
-  - a new line in `create_aggregate_expr` mapping the built-in to the implementation
-  - tests to the function.
-- In [sqllogictest/test_files], add new `sqllogictest` integration tests where the function is called through SQL against well known data and returns the expected result.
-  - Documentation for `sqllogictest` [here](https://github.com/apache/datafusion/blob/main/datafusion/sqllogictest/README.md)
-- Add SQL reference documentation [here](https://github.com/apache/datafusion/blob/main/docs/source/user-guide/sql/aggregate_functions.md)
-  - An example of this being done can be seen [here](https://github.com/apache/datafusion/pull/12775)
-  - Run `./dev/update_function_docs.sh` to update docs
+## Adding new functions
+
+**Implementation**
+
+| Function type | Location to implement     | Trait to implement                             | Macros to use                                    | Example              |
+| ------------- | ------------------------- | ---------------------------------------------- | ------------------------------------------------ | -------------------- |
+| Scalar        | [functions][df-functions] | [`ScalarUDFImpl`]                              | `make_udf_function!()` and `export_functions!()` | [`advanced_udf.rs`]  |
+| Nested        | [functions-nested]        | [`ScalarUDFImpl`]                              | `make_udf_expr_and_func!()`                      |                      |
+| Aggregate     | [functions-aggregate]     | [`AggregateUDFImpl`] and an [`Accumulator`]    | `make_udaf_expr_and_func!()`                     | [`advanced_udaf.rs`] |
+| Window        | [functions-window]        | [`WindowUDFImpl`] and a [`PartitionEvaluator`] | `define_udwf_and_expr!()`                        | [`advanced_udwf.rs`] |
+| Table         | [functions-table]         | [`TableFunctionImpl`] and a [`TableProvider`]  | `create_udtf_function!()`                        | [`simple_udtf.rs`]   |
+
+- The macros are to simplify some boilerplate such as ensuring a DataFrame API compatible function is also created
+- Ensure new functions are properly exported through the subproject
+  `mod.rs` or `lib.rs`.
+- Functions should preferably provide documentation via the `#[user_doc(...)]` attribute so their documentation
+  can be included in the SQL reference documentation (see below section)
+- Scalar functions are further grouped into modules for families of functions (e.g. string, math, datetime).
+  Functions should be added to the relevant module; if a new module needs to be created then a new [Rust feature]
+  should also be added to allow DataFusion users to conditionally compile the modules as needed
+- Aggregate functions can optionally implement a [`GroupsAccumulator`] for better performance
+
+Spark compatible functions are [located in separate crate][df-spark] but otherwise follow the same steps, though all
+function types (e.g. scalar, nested, aggregate) are grouped together in the single location.
+
+[df-functions]: https://github.com/apache/datafusion/tree/main/datafusion/functions
+[functions-nested]: https://github.com/apache/datafusion/tree/main/datafusion/functions-nested
+[functions-aggregate]: https://github.com/apache/datafusion/tree/main/datafusion/functions-aggregate
+[functions-window]: https://github.com/apache/datafusion/tree/main/datafusion/functions-window
+[functions-table]: https://github.com/apache/datafusion/tree/main/datafusion/functions-table
+[df-spark]: https://github.com/apache/datafusion/tree/main/datafusion/spark
+[`scalarudfimpl`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/trait.ScalarUDFImpl.html
+[`aggregateudfimpl`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/trait.AggregateUDFImpl.html
+[`accumulator`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/trait.Accumulator.html
+[`groupsaccumulator`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/trait.GroupsAccumulator.html
+[`windowudfimpl`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/trait.WindowUDFImpl.html
+[`partitionevaluator`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/trait.PartitionEvaluator.html
+[`tablefunctionimpl`]: https://docs.rs/datafusion/latest/datafusion/catalog/trait.TableFunctionImpl.html
+[`tableprovider`]: https://docs.rs/datafusion/latest/datafusion/catalog/trait.TableProvider.html
+[`advanced_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs
+[`advanced_udaf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udaf.rs
+[`advanced_udwf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udwf.rs
+[`simple_udtf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udtf.rs
+[rust feature]: https://doc.rust-lang.org/cargo/reference/features.html
+
+**Testing**
+
+Prefer adding `sqllogictest` integration tests where the function is called via SQL against
+well known data and returns an expected result. See the existing [test files][slt-test-files] if
+there is an appropriate file to add test cases to, otherwise create a new file. See the
+[`sqllogictest` documentation][slt-readme] for details on how to construct these tests.
+Ensure edge case, `null` input cases are considered in these tests.
+
+If a behaviour cannot be tested via `sqllogictest` (e.g. testing `simplify()`, needs to be
+tested in isolation from the optimizer, difficult to construct exact input via `sqllogictest`)
+then tests can be added as Rust unit tests in the implementation module, though these should be
+kept minimal where possible
+
+[slt-test-files]: https://github.com/apache/datafusion/tree/main/datafusion/sqllogictest/test_files
+[slt-readme]: https://github.com/apache/datafusion/blob/main/datafusion/sqllogictest/README.md
+
+**Documentation**
+
+Run documentation update script `./dev/update_function_docs.sh` which will update the relevant
+markdown document [here][fn-doc-home] (see the documents for [scalar][fn-doc-scalar],
+[aggregate][fn-doc-aggregate] and [window][fn-doc-window] functions)
+
+- You _should not_ manually update the markdown document after running the script as those manual
+  changes would be overwritten on next execution
+- Reference [GitHub issue] which introduced this behaviour
+
+[fn-doc-home]: https://github.com/apache/datafusion/blob/main/docs/source/user-guide/sql
+[fn-doc-scalar]: https://github.com/apache/datafusion/blob/main/docs/source/user-guide/sql/scalar_functions.md
+[fn-doc-aggregate]: https://github.com/apache/datafusion/blob/main/docs/source/user-guide/sql/aggregate_functions.md
+[fn-doc-window]: https://github.com/apache/datafusion/blob/main/docs/source/user-guide/sql/window_functions.md
+[github issue]: https://github.com/apache/datafusion/issues/12740
 
 ## How to display plans graphically
 
@@ -97,11 +123,13 @@ can be displayed. For example, the following command creates a
 dot -Tpdf < /tmp/plan.dot > /tmp/plan.pdf
 ```
 
-## How to format `.md` document
+## How to format `.md` documents
 
-We are using `prettier` to format `.md` files.
+We use [`prettier`] to format `.md` files.
 
-You can either use `npm i -g prettier` to install it globally or use `npx` to run it as a standalone binary. Using `npx` required a working node environment. Upgrading to the latest prettier is recommended (by adding `--upgrade` to the `npm` command).
+You can either use `npm i -g prettier` to install it globally or use `npx` to run it as a standalone binary.
+Using `npx` requires a working node environment. Upgrading to the latest prettier is recommended (by adding
+`--upgrade` to the `npm` command).
 
 ```bash
 $ prettier --version
@@ -114,19 +142,19 @@ After you've confirmed your prettier version, you can format all the `.md` files
 prettier -w {datafusion,datafusion-cli,datafusion-examples,dev,docs}/**/*.md
 ```
 
+[`prettier`]: https://prettier.io/
+
 ## How to format `.toml` files
 
-We use `taplo` to format `.toml` files.
+We use [`taplo`] to format `.toml` files.
 
-For Rust developers, you can install it via:
+To install via cargo:
 
 ```sh
 cargo install taplo-cli --locked
 ```
 
-> Refer to the [Installation section][doc] on other ways to install it.
->
-> [doc]: https://taplo.tamasfe.dev/cli/installation/binary.html
+> Refer to the [taplo installation documentation][taplo-install] for other ways to install it.
 
 ```bash
 $ taplo --version
@@ -139,28 +167,24 @@ After you've confirmed your `taplo` version, you can format all the `.toml` file
 taplo fmt
 ```
 
+[`taplo`]: https://taplo.tamasfe.dev/
+[taplo-install]: https://taplo.tamasfe.dev/cli/installation/binary.html
+
 ## How to update protobuf/gen dependencies
 
-The prost/tonic code can be generated by running `./regen.sh`, which in turn invokes the Rust binary located in `./gen`
+For the `proto` and `proto-common` crates, the prost/tonic code is generated by running their respective `./regen.sh` scripts,
+which in turn invokes the Rust binary located in `./gen`.
 
 This is necessary after modifying the protobuf definitions or altering the dependencies of `./gen`, and requires a
 valid installation of [protoc] (see [installation instructions] for details).
 
 ```bash
-./regen.sh
+# From repository root
+# proto-common
+./datafusion/proto-common/regen.sh
+# proto
+./datafusion/proto/regen.sh
 ```
 
 [protoc]: https://github.com/protocolbuffers/protobuf#protocol-compiler-installation
 [installation instructions]: https://datafusion.apache.org/contributor-guide/getting_started.html#protoc-installation
-
-## How to add/edit documentation for UDFs
-
-Documentations for the UDF documentations are generated from code (related [github issue]). To generate markdown run `./update_function_docs.sh`.
-
-This is necessary after adding new UDF implementation or modifying existing implementation which requires to update documentation.
-
-```bash
-./dev/update_function_docs.sh
-```
-
-[github issue]: https://github.com/apache/datafusion/issues/12740
diff --git a/docs/source/contributor-guide/index.md b/docs/source/contributor-guide/index.md
index 383827893c70..df664975a84a 100644
--- a/docs/source/contributor-guide/index.md
+++ b/docs/source/contributor-guide/index.md
@@ -113,7 +113,7 @@ do take priority over the conventional commit approach, allowing maintainers to
 
 [conventional commits]: https://www.conventionalcommits.org/en/v1.0.0/
 
-# Reviewing Pull Requests
+## Reviewing Pull Requests
 
 Some helpful links:
 
@@ -175,3 +175,27 @@ The good thing about open code and open development is that any issues in one ch
 
 Pull requests will be marked with a `stale` label after 60 days of inactivity and then closed 7 days after that.
 Commenting on the PR will remove the `stale` label.
+
+## AI-Assisted contributions
+
+DataFusion has the following policy for AI-assisted PRs:
+
+- The PR author should **understand the core ideas** behind the implementation **end-to-end**, and be able to justify the design and code during review.
+- **Calls out unknowns and assumptions**. It's okay to not fully understand some bits of AI generated code. You should comment on these cases and point them out to reviewers so that they can use their knowledge of the codebase to clear up any concerns. For example, you might comment "calling this function here seems to work but I'm not familiar with how it works internally, I wonder if there's a race condition if it is called concurrently".
+
+### Why fully AI-generated PRs without understanding are not helpful
+
+Today, AI tools cannot reliably make complex changes to DataFusion on their own, which is why we rely on pull requests and code review.
+
+The purposes of code review are:
+
+1. Finish the intended task.
+2. Share knowledge between authors and reviewers, as a long-term investment in the project. For this reason, even if someone familiar with the codebase can finish a task quickly, we're still happy to help a new contributor work on it even if it takes longer.
+
+An AI dump for an issue doesn’t meet these purposes. Maintainers could finish the task faster by using AI directly, and the submitters gain little knowledge if they act only as a pass through AI proxy without understanding.
+
+Please understand the reviewing capacity is **very limited** for the project, so large PRs which appear to not have the requisite understanding might not get reviewed, and eventually closed or redirected.
+
+### Better ways to contribute than an “AI dump”
+
+It's recommended to write a high-quality issue with a clear problem statement and a minimal, reproducible example. This can make it easier for others to contribute.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 574c285b0e65..b589c9ce4047 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -15,8 +15,13 @@
 .. specific language governing permissions and limitations
 .. under the License.
 
-.. image:: _static/images/2x_bgwhite_original.png
-  :alt: DataFusion Logo
+.. image:: _static/images/original.svg
+   :alt: DataFusion Logo
+   :class: light-logo
+
+.. image:: _static/images/original_dark.svg
+   :alt: DataFusion Logo
+   :class: dark-logo
 
 =================
 Apache DataFusion
@@ -113,10 +118,12 @@ To get started, see
    user-guide/crate-configuration
    user-guide/cli/index
    user-guide/dataframe
+   user-guide/arrow-introduction
    user-guide/expressions
    user-guide/sql/index
    user-guide/configs
    user-guide/explain-usage
+   user-guide/metrics
    user-guide/faq
 
 .. _toc.library-user-guide:
diff --git a/docs/source/library-user-guide/functions/adding-udfs.md b/docs/source/library-user-guide/functions/adding-udfs.md
index 2335105882a1..ecb618179ea1 100644
--- a/docs/source/library-user-guide/functions/adding-udfs.md
+++ b/docs/source/library-user-guide/functions/adding-udfs.md
@@ -354,7 +354,7 @@ async fn main() {
 }
 ```
 
-## Adding a Async Scalar UDF
+## Adding an Async Scalar UDF
 
 An Async Scalar UDF allows you to implement user-defined functions that support
 asynchronous execution, such as performing network or I/O operations within the
@@ -1257,7 +1257,7 @@ async fn main() -> Result<()> {
 [`create_udaf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/fn.create_udaf.html
 [`advanced_udaf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udaf.rs
 
-## Adding a User-Defined Table Function
+## Adding a Table UDF
 
 A User-Defined Table Function (UDTF) is a function that takes parameters and returns a `TableProvider`.
 
@@ -1266,8 +1266,8 @@ This is a simple struct that holds a set of RecordBatches in memory and treats t
 be replaced with your own struct that implements `TableProvider`.
 
 While this is a simple example for illustrative purposes, UDTFs have a lot of potential use cases. And can be
-particularly useful for reading data from external sources and interactive analysis. For example, see the [example][4]
-for a working example that reads from a CSV file. As another example, you could use the built-in UDTF `parquet_metadata`
+particularly useful for reading data from external sources and interactive analysis. See the [working example][simple_udtf.rs]
+which reads from a CSV file. As another example, you could use the built-in UDTF `parquet_metadata`
 in the CLI to read the metadata from a Parquet file.
 
 ```console
diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md
index e93659872565..4174fef7a692 100644
--- a/docs/source/library-user-guide/upgrading.md
+++ b/docs/source/library-user-guide/upgrading.md
@@ -25,6 +25,15 @@
 
 You can see the current [status of the `51.0.0`release here](https://github.com/apache/datafusion/issues/17558)
 
+### `arrow` / `parquet` updated to 57.0.0
+
+### Upgrade to arrow `57.0.0` and parquet `57.0.0`
+
+This version of DataFusion upgrades the underlying Apache Arrow implementation
+to version `57.0.0`, including several dependent crates such as `prost`,
+`tonic`, `pyo3`, and `substrait`. . See the [release
+notes](https://github.com/apache/arrow-rs/releases/tag/57.0.0) for more details.
+
 ### `MSRV` updated to 1.87.0
 
 The Minimum Supported Rust Version (MSRV) has been updated to [`1.87.0`].
@@ -86,6 +95,42 @@ See [issue #17601] for more details.
 
 [issue #17601]: https://github.com/apache/datafusion/issues/17601
 
+### `SessionState`'s `sql_to_statement` method takes `Dialect` rather than a `str`
+
+The `dialect` parameter of `sql_to_statement` method defined in `datafusion::execution::session_state::SessionState`
+has changed from `&str` to `&Dialect`.
+`Dialect` is an enum defined in the `datafusion-common`
+crate under the `config` module that provides type safety
+and better validation for SQL dialect selection
+
+### Reorganization of `ListingTable` into `datafusion-catalog-listing` crate
+
+There has been a long standing request to remove features such as `ListingTable`
+from the `datafusion` crate to support faster build times. The structs
+`ListingOptions`, `ListingTable`, and `ListingTableConfig` are now available
+within the `datafusion-catalog-listing` crate. These are re-exported in
+the `datafusion` crate, so this should be a minimal impact to existing users.
+
+See [issue #14462] and [issue #17713] for more details.
+
+[issue #14462]: https://github.com/apache/datafusion/issues/14462
+[issue #17713]: https://github.com/apache/datafusion/issues/17713
+
+### Reorganization of `ArrowSource` into `datafusion-datasource-arrow` crate
+
+To support [issue #17713] the `ArrowSource` code has been removed from
+the `datafusion` core crate into it's own crate, `datafusion-datasource-arrow`.
+This follows the pattern for the AVRO, CSV, JSON, and Parquet data sources.
+Users may need to update their paths to account for these changes.
+
+See [issue #17713] for more details.
+
+### `DESCRIBE query` support
+
+`DESCRIBE query` was previously an alias for `EXPLAIN query`, which outputs the
+_execution plan_ of the query. With this release, `DESCRIBE query` now outputs
+the computed _schema_ of the query, consistent with the behavior of `DESCRIBE table_name`.
+
 ## DataFusion `50.0.0`
 
 ### ListingTable automatically detects Hive Partitioned tables
diff --git a/docs/source/user-guide/arrow-introduction.md b/docs/source/user-guide/arrow-introduction.md
new file mode 100644
index 000000000000..89662a0c29c5
--- /dev/null
+++ b/docs/source/user-guide/arrow-introduction.md
@@ -0,0 +1,255 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Gentle Arrow Introduction
+
+```{contents}
+:local:
+:depth: 2
+```
+
+## Overview
+
+DataFusion uses [Apache Arrow] as its native in-memory format, so anyone using DataFusion will likely interact with Arrow at some point. This guide introduces the key Arrow concepts you need to know to effectively use DataFusion.
+
+Apache Arrow defines a standardized columnar representation for in-memory data. This enables different systems and languages (e.g., Rust and Python) to share data with zero-copy interchange, avoiding serialization overhead. In addition to zero copy interchange, Arrow also standardizes best practice columnar data representation enabling high performance analytical processing through vectorized execution.
+
+## Columnar Layout
+
+Quick visual: row-major (left) vs Arrow's columnar layout (right). For a deeper primer, see the [arrow2 guide].
+
+```text
+Traditional Row Storage:          Arrow Columnar Storage:
+┌──────────────────┐              ┌─────────┬─────────┬──────────┐
+│ id │ name │ age  │              │   id    │  name   │   age    │
+├────┼──────┼──────┤              ├─────────┼─────────┼──────────┤
+│ 1  │  A   │  30  │              │ [1,2,3] │ [A,B,C] │[30,25,35]│
+│ 2  │  B   │  25  │              └─────────┴─────────┴──────────┘
+│ 3  │  C   │  35  │                   ↑          ↑         ↑
+└──────────────────┘              Int32Array StringArray Int32Array
+(read entire rows)                (process entire columns at once)
+```
+
+## `RecordBatch`
+
+Arrow's standard unit for packaging data is the **[`RecordBatch`]**.
+
+A **[`RecordBatch`]** represents a horizontal slice of a table—a collection of equal-length columnar arrays that conform to a defined schema. Each column within the slice is a contiguous Arrow array, and all columns have the same number of rows (length). This chunked, immutable unit enables efficient streaming and parallel execution.
+
+Think of it as having two perspectives:
+
+- **Columnar inside**: Each column (`id`, `name`, `age`) is a contiguous array optimized for vectorized operations
+- **Row-chunked externally**: The batch represents a chunk of rows (e.g., rows 1-1000), making it a manageable unit for streaming
+
+RecordBatches are **immutable snapshots**—once created, they cannot be modified. Any transformation produces a _new_ RecordBatch, enabling safe parallel processing without locks or coordination overhead.
+
+This design allows DataFusion to process streams of row-based chunks while gaining maximum performance from the columnar layout.
+
+## Streaming Through the Engine
+
+DataFusion processes queries as pull-based pipelines where operators request batches from their inputs. This streaming approach enables early result production, bounds memory usage (spilling to disk only when necessary), and naturally supports parallel execution across multiple CPU cores.
+
+For example, given the following query:
+
+```sql
+SELECT name FROM 'data.parquet' WHERE id > 10
+```
+
+The DataFusion Pipeline looks like this:
+
+```text
+
+┌─────────────┐    ┌──────────────┐    ┌────────────────┐    ┌──────────────────┐    ┌──────────┐
+│ Parquet     │───▶│ Scan         │───▶│ Filter         │───▶│ Projection       │───▶│ Results  │
+│ File        │    │ Operator     │    │ Operator       │    │ Operator         │    │          │
+└─────────────┘    └──────────────┘    └────────────────┘    └──────────────────┘    └──────────┘
+                   (reads data)        (id > 10)             (keeps "name" col)
+                   RecordBatch ───▶    RecordBatch ────▶     RecordBatch ────▶        RecordBatch
+```
+
+In this pipeline, [`RecordBatch`]es are the "packages" of columnar data that flow between the different stages of query execution. Each operator processes batches incrementally, enabling the system to produce results before reading the entire input.
+
+## Creating `ArrayRef` and `RecordBatch`es
+
+Sometimes you need to create Arrow data programmatically rather than reading from files.
+
+The first thing needed is creating an Arrow Array, for each column. [arrow-rs] provides array builders and `From` impls to create arrays from Rust vectors.
+
+```rust
+use arrow::array::{StringArray, Int32Array};
+// Create an Int32Array from a vector of i32 values
+let ids = Int32Array::from(vec![1, 2, 3]);
+// There are similar constructors for other array types, e.g., StringArray, Float64Array, etc.
+let names = StringArray::from(vec![Some("alice"), None, Some("carol")]);
+```
+
+Every element in an Arrow array can be "null" (aka missing). Often, arrays are
+created from `Option<T>` values to indicate nullability (e.g., `Some("alice")`
+vs `None` above).
+
+Note: You'll see [`Arc`] used frequently in the code—Arrow arrays are wrapped in
+[`Arc`] (atomically reference-counted pointers) to enable cheap, thread-safe
+sharing across operators and tasks. [`ArrayRef`] is simply a type alias for
+`Arc<dyn Array>`. To create an `ArrayRef`, wrap your array in `Arc::new(...)` as shown below.
+
+```rust
+use std::sync::Arc;
+# use arrow::array::{ArrayRef, Int32Array, StringArray};
+// To get an ArrayRef, wrap the Int32Array in an Arc.
+// (note you will often have to explicitly type annotate to ArrayRef)
+let arr: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3]));
+
+// you can also store Strings and other types in ArrayRefs
+let arr: ArrayRef = Arc::new(
+  StringArray::from(vec![Some("alice"), None, Some("carol")])
+);
+```
+
+To create a [`RecordBatch`], you need to define its [`Schema`] (the column names and types) and provide the corresponding columns as [`ArrayRef`]s as shown below:
+
+```rust
+# use std::sync::Arc;
+# use arrow_schema::ArrowError;
+# use arrow::array::{ArrayRef, Int32Array, StringArray, RecordBatch};
+use arrow_schema::{DataType, Field, Schema};
+
+// Create the columns as Arrow arrays
+let ids = Int32Array::from(vec![1, 2, 3]);
+let names = StringArray::from(vec![Some("alice"), None, Some("carol")]);
+// Create the schema
+let schema = Arc::new(Schema::new(vec![
+    Field::new("id", DataType::Int32, false), // false means non-nullable
+    Field::new("name", DataType::Utf8, true), // true means nullable
+]));
+// Assemble the columns
+let cols: Vec<ArrayRef> = vec![
+      Arc::new(ids),
+      Arc::new(names)
+];
+// Finally, create the RecordBatch
+RecordBatch::try_new(schema, cols).expect("Failed to create RecordBatch");
+```
+
+## Working with `ArrayRef` and `RecordBatch`
+
+Most DataFusion APIs are in terms of [`ArrayRef`] and [`RecordBatch`]. To work with the
+underlying data, you typically downcast the [`ArrayRef`] to its concrete type
+(e.g., [`Int32Array`]).
+
+To do so either use the `as_any().downcast_ref::<T>()` method or the
+`as_::<T>()` helper method from the [AsArray] trait.
+
+[asarray]: https://docs.rs/arrow-array/latest/arrow_array/cast/trait.AsArray.html
+
+```rust
+# use std::sync::Arc;
+# use arrow::datatypes::{DataType, Int32Type};
+# use arrow::array::{AsArray, ArrayRef, Int32Array, RecordBatch};
+# let arr: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3]));
+// First check the data type of the array
+match arr.data_type() {
+   &DataType::Int32 => {
+         // Downcast to Int32Array
+         let int_array = arr.as_primitive::<Int32Type>();
+         // Now you can access Int32Array methods
+         for i in 0..int_array.len() {
+              println!("Value at index {}: {}", i, int_array.value(i));
+         }
+   }
+    _ => {
+        println ! ("Array is not of type Int32");
+    }
+}
+```
+
+The following two downcasting methods are equivalent:
+
+```rust
+# use std::sync::Arc;
+# use arrow::datatypes::{DataType, Int32Type};
+# use arrow::array::{AsArray, ArrayRef, Int32Array, RecordBatch};
+# let arr: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3]));
+// Downcast to Int32Array using as_any
+let int_array1 = arr.as_any().downcast_ref::<Int32Array>().unwrap();
+// This is the same as using the as_::<T>() helper
+let int_array2 = arr.as_primitive::<Int32Type>();
+assert_eq!(int_array1, int_array2);
+```
+
+## Common Pitfalls
+
+When working with Arrow and RecordBatches, watch out for these common issues:
+
+- **Schema consistency**: All batches in a stream must share the exact same [`Schema`]. For example, you can't have one batch where a column is [`Int32`] and the next where it's [`Int64`], even if the values would fit
+- **Immutability**: Arrays are immutable—to "modify" data, you must build new arrays or new RecordBatches. For instance, to change a value in an array, you'd create a new array with the updated value
+- **Row by Row Processing**: Avoid iterating over Arrays element by element when possible, and use Arrow's built-in [compute kernels] instead
+- **Type mismatches**: Mixed input types across files may require explicit casts. For example, a string column `"123"` from a CSV file won't automatically join with an integer column `123` from a Parquet file—you'll need to cast one to match the other. Use Arrow's [`cast`] kernel where appropriate
+- **Batch size assumptions**: Don't assume a particular batch size; always iterate until the stream ends. One file might produce 8192-row batches while another produces 1024-row batches
+
+[compute kernels]: https://docs.rs/arrow/latest/arrow/compute/index.html
+
+## Further reading
+
+**Arrow Documentation:**
+
+- [Arrow Format Introduction](https://arrow.apache.org/docs/format/Intro.html) - Understand the Arrow specification and why it enables zero-copy data sharing
+- [Arrow Columnar Format](https://arrow.apache.org/docs/format/Columnar.html) - Deep dive into memory layout for performance optimization
+- [Arrow Rust Documentation](https://docs.rs/arrow/latest/arrow/) - Complete API reference for the Rust implementation
+
+**Key API References:**
+
+- [RecordBatch](https://docs.rs/arrow-array/latest/arrow_array/struct.RecordBatch.html) - The fundamental data structure for columnar data (a table slice)
+- [ArrayRef](https://docs.rs/arrow-array/latest/arrow_array/array/type.ArrayRef.html) - Represents a reference-counted Arrow array (single column)
+- [DataType](https://docs.rs/arrow-schema/latest/arrow_schema/enum.DataType.html) - Enum of all supported Arrow data types (e.g., Int32, Utf8)
+- [Schema](https://docs.rs/arrow-schema/latest/arrow_schema/struct.Schema.html) - Describes the structure of a RecordBatch (column names and types)
+
+[apache arrow]: https://arrow.apache.org/docs/index.html
+[`arc`]: https://doc.rust-lang.org/std/sync/struct.Arc.html
+[`arrayref`]: https://docs.rs/arrow-array/latest/arrow_array/array/type.ArrayRef.html
+[`cast`]: https://docs.rs/arrow/latest/arrow/compute/fn.cast.html
+[`field`]: https://docs.rs/arrow-schema/latest/arrow_schema/struct.Field.html
+[`schema`]: https://docs.rs/arrow-schema/latest/arrow_schema/struct.Schema.html
+[`datatype`]: https://docs.rs/arrow-schema/latest/arrow_schema/enum.DataType.html
+[`int32array`]: https://docs.rs/arrow-array/latest/arrow_array/array/struct.Int32Array.html
+[`stringarray`]: https://docs.rs/arrow-array/latest/arrow_array/array/struct.StringArray.html
+[`int32`]: https://docs.rs/arrow-schema/latest/arrow_schema/enum.DataType.html#variant.Int32
+[`int64`]: https://docs.rs/arrow-schema/latest/arrow_schema/enum.DataType.html#variant.Int64
+[extension points]: ../library-user-guide/extensions.md
+[`tableprovider`]: https://docs.rs/datafusion/latest/datafusion/datasource/trait.TableProvider.html
+[custom table providers guide]: ../library-user-guide/custom-table-providers.md
+[user-defined functions (udfs)]: ../library-user-guide/functions/adding-udfs.md
+[custom optimizer rules and physical operators]: ../library-user-guide/extending-operators.md
+[`executionplan`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.ExecutionPlan.html
+[`.register_table()`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.register_table
+[`.sql()`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.sql
+[`.show()`]: https://docs.rs/datafusion/latest/datafusion/dataframe/struct.DataFrame.html#method.show
+[`memtable`]: https://docs.rs/datafusion/latest/datafusion/datasource/struct.MemTable.html
+[`sessioncontext`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html
+[`csvreadoptions`]: https://docs.rs/datafusion/latest/datafusion/execution/options/struct.CsvReadOptions.html
+[`parquetreadoptions`]: https://docs.rs/datafusion/latest/datafusion/execution/options/struct.ParquetReadOptions.html
+[`recordbatch`]: https://docs.rs/arrow-array/latest/arrow_array/struct.RecordBatch.html
+[`read_csv`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.read_csv
+[`read_parquet`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.read_parquet
+[`read_json`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.read_json
+[`read_avro`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.read_avro
+[`dataframe`]: https://docs.rs/datafusion/latest/datafusion/dataframe/struct.DataFrame.html
+[`.collect()`]: https://docs.rs/datafusion/latest/datafusion/dataframe/struct.DataFrame.html#method.collect
+[arrow2 guide]: https://jorgecarleitao.github.io/arrow2/main/guide/arrow.html#what-is-apache-arrow
+[configuration settings]: configs.md
+[`datafusion.execution.batch_size`]: configs.md#setting-configuration-options
diff --git a/docs/source/user-guide/cli/usage.md b/docs/source/user-guide/cli/usage.md
index 57a96c5d7900..68804b2817e7 100644
--- a/docs/source/user-guide/cli/usage.md
+++ b/docs/source/user-guide/cli/usage.md
@@ -65,7 +65,7 @@ OPTIONS:
 
       --object-store-profiling <OBJECT_STORE_PROFILING>
           Specify the default object_store_profiling mode, defaults to 'disabled'.
-          [possible values: disabled, enabled] [default: Disabled]
+          [possible values: disabled, summary, trace] [default: Disabled]
 
     -p, --data-path <DATA_PATH>
             Path to your data, default to current directory
@@ -129,7 +129,36 @@ Available commands inside DataFusion CLI are:
 - Object Store Profiling Mode
 
 ```bash
-> \object_store_profiling [disabled|enabled]
+> \object_store_profiling [disabled|summary|trace]
+```
+
+When enabled, prints detailed information about object store (I/O) operations
+performed during query execution to STDOUT.
+
+```sql
+> \object_store_profiling trace
+ObjectStore Profile mode set to Trace
+> select count(*) from 'https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_1.parquet';
++----------+
+| count(*) |
++----------+
+| 1000000  |
++----------+
+1 row(s) fetched.
+Elapsed 0.552 seconds.
+
+Object Store Profiling
+Instrumented Object Store: instrument_mode: Trace, inner: HttpStore
+2025-10-17T18:08:48.457992+00:00 operation=Get duration=0.043592s size=8 range: bytes=174965036-174965043 path=hits_compatible/athena_partitioned/hits_1.parquet
+2025-10-17T18:08:48.501878+00:00 operation=Get duration=0.031542s size=34322 range: bytes=174930714-174965035 path=hits_compatible/athena_partitioned/hits_1.parquet
+
+Summaries:
++-----------+----------+-----------+-----------+-----------+-----------+-------+
+| Operation | Metric   | min       | max       | avg       | sum       | count |
++-----------+----------+-----------+-----------+-----------+-----------+-------+
+| Get       | duration | 0.031542s | 0.043592s | 0.037567s | 0.075133s | 2     |
+| Get       | size     | 8 B       | 34322 B   | 17165 B   | 34330 B   | 2     |
++-----------+----------+-----------+-----------+-----------+-----------+-------+
 ```
 
 ## Supported SQL
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index 6bc7b90e893a..fbf55a56057b 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -98,7 +98,7 @@ The following configuration settings are available:
 | datafusion.execution.parquet.dictionary_page_size_limit                 | 1048576                   | (writing) Sets best effort maximum dictionary page size, in bytes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
 | datafusion.execution.parquet.statistics_enabled                         | page                      | (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
 | datafusion.execution.parquet.max_row_group_size                         | 1048576                   | (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
-| datafusion.execution.parquet.created_by                                 | datafusion version 50.2.0 | (writing) Sets "created by" property                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| datafusion.execution.parquet.created_by                                 | datafusion version 50.3.0 | (writing) Sets "created by" property                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | datafusion.execution.parquet.column_index_truncate_length               | 64                        | (writing) Sets column index truncate length                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
 | datafusion.execution.parquet.statistics_truncate_length                 | 64                        | (writing) Sets statistics truncate length. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
 | datafusion.execution.parquet.data_page_row_count_limit                  | 20000                     | (writing) Sets best effort maximum number of rows in data page                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
@@ -132,7 +132,9 @@ The following configuration settings are available:
 | datafusion.optimizer.enable_round_robin_repartition                     | true                      | When set to true, the physical plan optimizer will try to add round robin repartitioning to increase parallelism to leverage more CPU cores                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
 | datafusion.optimizer.enable_topk_aggregation                            | true                      | When set to true, the optimizer will attempt to perform limit operations during aggregations, if possible                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
 | datafusion.optimizer.enable_window_limits                               | true                      | When set to true, the optimizer will attempt to push limit operations past window functions, if possible                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
-| datafusion.optimizer.enable_dynamic_filter_pushdown                     | true                      | When set to true attempts to push down dynamic filters generated by operators into the file scan phase. For example, for a query such as `SELECT * FROM t ORDER BY timestamp DESC LIMIT 10`, the optimizer will attempt to push down the current top 10 timestamps that the TopK operator references into the file scans. This means that if we already have 10 timestamps in the year 2025 any files that only have timestamps in the year 2024 can be skipped / pruned at various stages in the scan.                                                                                                                                                                                                                                                                                                                                                                                                                 |
+| datafusion.optimizer.enable_topk_dynamic_filter_pushdown                | true                      | When set to true, the optimizer will attempt to push down TopK dynamic filters into the file scan phase.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+| datafusion.optimizer.enable_join_dynamic_filter_pushdown                | true                      | When set to true, the optimizer will attempt to push down Join dynamic filters into the file scan phase.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+| datafusion.optimizer.enable_dynamic_filter_pushdown                     | true                      | When set to true attempts to push down dynamic filters generated by operators (topk & join) into the file scan phase. For example, for a query such as `SELECT * FROM t ORDER BY timestamp DESC LIMIT 10`, the optimizer will attempt to push down the current top 10 timestamps that the TopK operator references into the file scans. This means that if we already have 10 timestamps in the year 2025 any files that only have timestamps in the year 2024 can be skipped / pruned at various stages in the scan. The config will suppress `enable_join_dynamic_filter_pushdown` & `enable_topk_dynamic_filter_pushdown` So if you disable `enable_topk_dynamic_filter_pushdown`, then enable `enable_dynamic_filter_pushdown`, the `enable_topk_dynamic_filter_pushdown` will be overridden.                                                                                                                       |
 | datafusion.optimizer.filter_null_join_keys                              | false                     | When set to true, the optimizer will insert filters before a join between a nullable and non-nullable column to filter out nulls on the nullable side. This filter can add additional overhead when the file format does not fully support predicate push down.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
 | datafusion.optimizer.repartition_aggregations                           | true                      | Should DataFusion repartition data using the aggregate keys to execute aggregates in parallel using the provided `target_partitions` level                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
 | datafusion.optimizer.repartition_file_min_size                          | 10485760                  | Minimum total files size in bytes to perform file scan repartitioning.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
@@ -146,6 +148,7 @@ The following configuration settings are available:
 | datafusion.optimizer.max_passes                                         | 3                         | Number of times that the optimizer will attempt to optimize the plan                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | datafusion.optimizer.top_down_join_key_reordering                       | true                      | When set to true, the physical plan optimizer will run a top down process to reorder the join keys                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
 | datafusion.optimizer.prefer_hash_join                                   | true                      | When set to true, the physical plan optimizer will prefer HashJoin over SortMergeJoin. HashJoin can work more efficiently than SortMergeJoin but consumes more memory                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
+| datafusion.optimizer.enable_piecewise_merge_join                        | false                     | When set to true, piecewise merge join is enabled. PiecewiseMergeJoin is currently experimental. Physical planner will opt for PiecewiseMergeJoin when there is only one range filter.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
 | datafusion.optimizer.hash_join_single_partition_threshold               | 1048576                   | The maximum estimated size in bytes for one input side of a HashJoin will be collected into a single partition                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
 | datafusion.optimizer.hash_join_single_partition_threshold_rows          | 131072                    | The maximum estimated size in rows for one input side of a HashJoin will be collected into a single partition                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
 | datafusion.optimizer.default_filter_selectivity                         | 20                        | The default filter selectivity used by Filter Statistics when an exact selectivity cannot be determined. Valid values are between 0 (no selectivity) and 100 (all rows are selected).                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
@@ -158,6 +161,7 @@ The following configuration settings are available:
 | datafusion.explain.show_schema                                          | false                     | When set to true, the explain statement will print schema information                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
 | datafusion.explain.format                                               | indent                    | Display format of explain. Default is "indent". When set to "tree", it will print the plan in a tree-rendered format.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
 | datafusion.explain.tree_maximum_render_width                            | 240                       | (format=tree only) Maximum total width of the rendered tree. When set to 0, the tree will have no width limit.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
+| datafusion.explain.analyze_level                                        | dev                       | Verbosity level for "EXPLAIN ANALYZE". Default is "dev" "summary" shows common metrics for high-level insights. "dev" provides deep operator-level introspection for developers.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
 | datafusion.sql_parser.parse_float_as_decimal                            | false                     | When set to true, SQL parser will parse float as decimal type                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
 | datafusion.sql_parser.enable_ident_normalization                        | true                      | When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
 | datafusion.sql_parser.enable_options_value_normalization                | false                     | When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
@@ -249,3 +253,63 @@ SET datafusion.execution.batch_size = 1024;
 ```
 
 [`fairspillpool`]: https://docs.rs/datafusion/latest/datafusion/execution/memory_pool/struct.FairSpillPool.html
+
+## Join Queries
+
+Currently Apache Datafusion supports the following join algorithms:
+
+- Nested Loop Join
+- Sort Merge Join
+- Hash Join
+- Symmetric Hash Join
+- Piecewise Merge Join (experimental)
+
+The physical planner will choose the appropriate algorithm based on the statistics + join
+condition of the two tables.
+
+# Join Algorithm Optimizer Configurations
+
+You can modify join optimization behavior in your queries by setting specific configuration values.
+Use the following command to update a configuration:
+
+```sql
+SET datafusion.optimizer.<configuration_name>;
+```
+
+Example
+
+```sql
+SET datafusion.optimizer.prefer_hash_join = false;
+```
+
+Adjusting the following configuration values influences how the optimizer selects the join algorithm
+used to execute your SQL query:
+
+## Join Optimizer Configurations
+
+Adjusting the following configuration values influences how the optimizer selects the join algorithm
+used to execute your SQL query.
+
+### allow_symmetric_joins_without_pruning (bool, default = true)
+
+Controls whether symmetric hash joins are allowed for unbounded data sources even when their inputs
+lack ordering or filtering.
+
+- If disabled, the `SymmetricHashJoin` operator cannot prune its internal buffers to be produced only at the end of execution.
+
+### prefer_hash_join (bool, default = true)
+
+Determines whether the optimizer prefers Hash Join over Sort Merge Join during physical plan selection.
+
+- true: favors HashJoin for faster execution when sufficient memory is available.
+- false: allows SortMergeJoin to be chosen when more memory-efficient execution is needed.
+
+### enable_piecewise_merge_join (bool, default = false)
+
+Enables the experimental Piecewise Merge Join algorithm.
+
+- When enabled, the physical planner may select PiecewiseMergeJoin if there is exactly one range
+  filter in the join condition.
+- Piecewise Merge Join is faster than Nested Loop Join performance wise for single range filter
+  except for cases where it is joining two large tables (num_rows > 100,000) that are approximately
+  equal in size.
diff --git a/docs/source/user-guide/dataframe.md b/docs/source/user-guide/dataframe.md
index 82f1eeb2823d..85724a72399a 100644
--- a/docs/source/user-guide/dataframe.md
+++ b/docs/source/user-guide/dataframe.md
@@ -19,6 +19,8 @@
 
 # DataFrame API
 
+## DataFrame overview
+
 A DataFrame represents a logical set of rows with the same named columns,
 similar to a [Pandas DataFrame] or [Spark DataFrame].
 
diff --git a/docs/source/user-guide/introduction.md b/docs/source/user-guide/introduction.md
index dc4825dc06df..778562d55ffc 100644
--- a/docs/source/user-guide/introduction.md
+++ b/docs/source/user-guide/introduction.md
@@ -109,6 +109,7 @@ Here are some active projects using DataFusion:
 - [Funnel](https://funnel.io/) Data Platform powering Marketing Intelligence applications.
 - [GlareDB](https://github.com/GlareDB/glaredb) Fast SQL database for querying and analyzing distributed data.
 - [GreptimeDB] Open Source & Cloud Native Distributed Time Series Database
+- [hiop](https://hiop.io) Serverless Data Logistic Platform
 - [HoraeDB] Distributed Time-Series Database
 - [Iceberg-rust](https://github.com/apache/iceberg-rust) Rust implementation of Apache Iceberg
 - [InfluxDB] Time Series Database
diff --git a/docs/source/user-guide/metrics.md b/docs/source/user-guide/metrics.md
new file mode 100644
index 000000000000..1fb2f4a5c770
--- /dev/null
+++ b/docs/source/user-guide/metrics.md
@@ -0,0 +1,38 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Metrics
+
+DataFusion operators expose runtime metrics so you can understand where time is spent and how much data flows through the pipeline. See more in [EXPLAIN ANALYZE](sql/explain.md#explain-analyze).
+
+## Common Metrics
+
+### BaselineMetrics
+
+`BaselineMetrics` are available in most physical operators to capture common measurements.
+
+| Metric          | Description                                                                                                                                                                                        |
+| --------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| elapsed_compute | CPU time the operator actively spends processing work.                                                                                                                                             |
+| output_rows     | Total number of rows the operator produces.                                                                                                                                                        |
+| output_bytes    | Memory usage of all output batches. Note: This value may be overestimated. If multiple output `RecordBatch` instances share underlying memory buffers, their sizes will be counted multiple times. |
+
+## Operator-specific Metrics
+
+TODO
diff --git a/docs/source/user-guide/sql/aggregate_functions.md b/docs/source/user-guide/sql/aggregate_functions.md
index 205962031b1d..f17e09f2ce9d 100644
--- a/docs/source/user-guide/sql/aggregate_functions.md
+++ b/docs/source/user-guide/sql/aggregate_functions.md
@@ -65,6 +65,8 @@ Note: When no rows pass the filter, `COUNT` returns `0` while `SUM`/`AVG`/`MIN`/
 - [mean](#mean)
 - [median](#median)
 - [min](#min)
+- [percentile_cont](#percentile_cont)
+- [quantile_cont](#quantile_cont)
 - [string_agg](#string_agg)
 - [sum](#sum)
 - [var](#var)
@@ -388,6 +390,49 @@ min(expression)
 +----------------------+
 ```
 
+### `percentile_cont`
+
+Returns the exact percentile of input values, interpolating between values if needed.
+
+```sql
+percentile_cont(percentile) WITHIN GROUP (ORDER BY expression)
+```
+
+#### Arguments
+
+- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators.
+- **percentile**: Percentile to compute. Must be a float value between 0 and 1 (inclusive).
+
+#### Example
+
+```sql
+> SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY column_name) FROM table_name;
++----------------------------------------------------------+
+| percentile_cont(0.75) WITHIN GROUP (ORDER BY column_name) |
++----------------------------------------------------------+
+| 45.5                                                     |
++----------------------------------------------------------+
+```
+
+An alternate syntax is also supported:
+
+```sql
+> SELECT percentile_cont(column_name, 0.75) FROM table_name;
++---------------------------------------+
+| percentile_cont(column_name, 0.75)    |
++---------------------------------------+
+| 45.5                                  |
++---------------------------------------+
+```
+
+#### Aliases
+
+- quantile_cont
+
+### `quantile_cont`
+
+_Alias of [percentile_cont](#percentile_cont)._
+
 ### `string_agg`
 
 Concatenates the values of string expressions and places separator values between them. If ordering is required, strings are concatenated in the specified order. This aggregation function can only mix DISTINCT and ORDER BY if the ordering expression is exactly the same as the first argument expression.
diff --git a/docs/source/user-guide/sql/data_types.md b/docs/source/user-guide/sql/data_types.md
index d977a4396e40..02edb6371ce3 100644
--- a/docs/source/user-guide/sql/data_types.md
+++ b/docs/source/user-guide/sql/data_types.md
@@ -41,7 +41,18 @@ You can cast a SQL expression to a specific Arrow type using the `arrow_cast` fu
 For example, to cast the output of `now()` to a `Timestamp` with second precision:
 
 ```sql
-select arrow_cast(now(), 'Timestamp(Second, None)');
+select arrow_cast(now(), 'Timestamp(s)') as "now()";
++---------------------+
+| now()               |
++---------------------+
+| 2025-10-24T20:02:45 |
++---------------------+
+```
+
+The older syntax still works as well:
+
+```sql
+select arrow_cast(now(), 'Timestamp(Second, None)') as "now()";
 +---------------------+
 | now()               |
 +---------------------+
diff --git a/docs/source/user-guide/sql/explain.md b/docs/source/user-guide/sql/explain.md
index c5e2e215a6b6..1caadcc29141 100644
--- a/docs/source/user-guide/sql/explain.md
+++ b/docs/source/user-guide/sql/explain.md
@@ -37,8 +37,6 @@ The optional `[FORMAT format]` clause controls how the plan is displayed as
 explained below. If this clause is not specified, the plan is displayed using
 the format from the [configuration value] `datafusion.explain.format`.
 
-[configuration value]: ../configs.md
-
 ### `tree` format (default)
 
 The `tree` format is modeled after [DuckDB plans] and is designed to be easier
@@ -239,8 +237,7 @@ Elapsed 0.010 seconds.
 
 ## `EXPLAIN ANALYZE`
 
-Shows the execution plan and metrics of a statement. If you need more
-information output, use `EXPLAIN ANALYZE VERBOSE`. Note that `EXPLAIN ANALYZE`
+Shows the execution plan and metrics of a statement. Note that `EXPLAIN ANALYZE`
 only supports the `indent` format.
 
 ```sql
@@ -259,3 +256,9 @@ EXPLAIN ANALYZE SELECT SUM(x) FROM table GROUP BY b;
 |                   |               DataSourceExec: file_groups={1 group: [[/tmp/table.csv]]}, has_header=false, metrics=[]                                                        |
 +-------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------+
 ```
+
+By default `EXPLAIN ANALYZE` shows the aggregated metrics from all partitions for each operator. If you need to display per-partition metrics, use `EXPLAIN ANALYZE VERBOSE`.
+
+You can also set `datafusion.explain.analyze_level` from the [configuration value] to control the detail level for the metrics displayed.
+
+[configuration value]: ../configs.md
diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md
index 9fcaac762855..63a3eb2da87b 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -1056,7 +1056,7 @@ nullif(expression1, expression2)
 
 ### `nvl`
 
-Returns _expression2_ if _expression1_ is NULL otherwise it returns _expression1_.
+Returns _expression2_ if _expression1_ is NULL otherwise it returns _expression1_ and _expression2_ is not evaluated. This function can be used to substitute a default value for NULL values.
 
 ```sql
 nvl(expression1, expression2)
@@ -2409,6 +2409,8 @@ The `current_date()` return value is determined at query time and will return th
 
 ```sql
 current_date()
+    (optional) SET datafusion.execution.time_zone = '+00:00';
+    SELECT current_date();
 ```
 
 #### Aliases
@@ -2417,12 +2419,16 @@ current_date()
 
 ### `current_time`
 
-Returns the current UTC time.
+Returns the current time in the session time zone.
 
 The `current_time()` return value is determined at query time and will return the same time, no matter when in the query plan the function executes.
 
+The session time zone can be set using the statement 'SET datafusion.execution.time_zone = desired time zone'. The time zone can be a value like +00:00, 'Europe/London' etc.
+
 ```sql
 current_time()
+    (optional) SET datafusion.execution.time_zone = '+00:00';
+    SELECT current_time();
 ```
 
 ### `current_timestamp`
@@ -4966,16 +4972,26 @@ arrow_cast(expression, datatype)
 #### Example
 
 ```sql
-> select arrow_cast(-5, 'Int8') as a,
+> select
+  arrow_cast(-5,    'Int8') as a,
   arrow_cast('foo', 'Dictionary(Int32, Utf8)') as b,
-  arrow_cast('bar', 'LargeUtf8') as c,
-  arrow_cast('2023-01-02T12:53:02', 'Timestamp(Microsecond, Some("+08:00"))') as d
-  ;
-+----+-----+-----+---------------------------+
-| a  | b   | c   | d                         |
-+----+-----+-----+---------------------------+
-| -5 | foo | bar | 2023-01-02T12:53:02+08:00 |
-+----+-----+-----+---------------------------+
+  arrow_cast('bar', 'LargeUtf8') as c;
+
++----+-----+-----+
+| a  | b   | c   |
++----+-----+-----+
+| -5 | foo | bar |
++----+-----+-----+
+
+> select
+  arrow_cast('2023-01-02T12:53:02', 'Timestamp(µs, "+08:00")') as d,
+  arrow_cast('2023-01-02T12:53:02', 'Timestamp(µs)') as e;
+
++---------------------------+---------------------+
+| d                         | e                   |
++---------------------------+---------------------+
+| 2023-01-02T12:53:02+08:00 | 2023-01-02T12:53:02 |
++---------------------------+---------------------+
 ```
 
 ### `arrow_typeof`