From e7db848845f13e8e46d956ac49a95a82f371846e Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Thu, 5 Feb 2026 17:42:10 -0700
Subject: [PATCH 1/8] fix: Add Spark-compatible schema validation for
 native_datafusion scan (#3311)

Add schema validation in the native schema adapter that rejects type
coercions and column resolutions that Spark's vectorized Parquet reader
would reject, gated behind a new config
`spark.comet.parquet.schemaValidation.enabled` (default: true).

When enabled, the native scan rejects:
- TimestampLTZ <-> TimestampNTZ conversions
- Integer/float widening (Int32->Int64, Float32->Float64) unless
  schema evolution is enabled
- String/binary to timestamp or numeric conversions
- Scalar to complex type conversions
- Duplicate fields in case-insensitive mode

This allows 5 previously-ignored Spark SQL tests to pass with
native_datafusion enabled.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../scala/org/apache/comet/CometConf.scala    |  11 +
 dev/diffs/3.5.8.diff                          |  58 +-----
 native/core/src/execution/planner.rs          |   2 +
 native/core/src/execution/spark_config.rs     |   3 +
 native/core/src/parquet/mod.rs                |   2 +
 native/core/src/parquet/parquet_exec.rs       |   8 +
 native/core/src/parquet/parquet_support.rs    |   8 +
 native/core/src/parquet/schema_adapter.rs     | 190 +++++++++++++++++-
 native/proto/src/proto/operator.proto         |   2 +
 .../org/apache/comet/CometExecIterator.scala  |  11 +-
 .../serde/operator/CometNativeScan.scala      |   4 +
 .../comet/parquet/ParquetReadSuite.scala      |   4 +-
 12 files changed, 244 insertions(+), 59 deletions(-)

diff --git a/common/src/main/scala/org/apache/comet/CometConf.scala b/common/src/main/scala/org/apache/comet/CometConf.scala
index 522ccbc94c..b7b66c94ff 100644
--- a/common/src/main/scala/org/apache/comet/CometConf.scala
+++ b/common/src/main/scala/org/apache/comet/CometConf.scala
@@ -157,6 +157,17 @@ object CometConf extends ShimCometConf {
       .booleanConf
       .createWithDefault(false)
 
+  val COMET_PARQUET_SCHEMA_VALIDATION_ENABLED: ConfigEntry[Boolean] =
+    conf("spark.comet.parquet.schemaValidation.enabled")
+      .category(CATEGORY_PARQUET)
+      .doc(
+        "Whether to enable Spark-compatible schema validation when reading Parquet files " +
+          "with native_datafusion scan. When enabled, type coercions and column resolutions " +
+          "that Spark's vectorized reader would reject will also be rejected by Comet, " +
+          "throwing SparkException with compatible error messages.")
+      .booleanConf
+      .createWithDefault(true)
+
   val COMET_RESPECT_PARQUET_FILTER_PUSHDOWN: ConfigEntry[Boolean] =
     conf("spark.comet.parquet.respectFilterPushdown")
       .category(CATEGORY_PARQUET)
diff --git a/dev/diffs/3.5.8.diff b/dev/diffs/3.5.8.diff
index 10f579da64..b2799eb280 100644
--- a/dev/diffs/3.5.8.diff
+++ b/dev/diffs/3.5.8.diff
@@ -2271,17 +2271,7 @@ index 8e88049f51e..49f2001dc6b 100644
      val schema = StructType(Seq(
        StructField("a", IntegerType, nullable = false)
      ))
-@@ -1933,7 +1949,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
-     }
-   }
- 
--  test("SPARK-25207: exception when duplicate fields in case-insensitive mode") {
-+  test("SPARK-25207: exception when duplicate fields in case-insensitive mode",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3311")) {
-     withTempPath { dir =>
-       val count = 10
-       val tableName = "spark_25207"
-@@ -1984,7 +2001,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
+@@ -1984,7 +2000,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
      }
    }
  
@@ -2331,27 +2321,7 @@ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/
 index 8ed9ef1630e..f312174b182 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
-@@ -1064,7 +1064,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
-     }
-   }
- 
--  test("SPARK-35640: read binary as timestamp should throw schema incompatible error") {
-+  test("SPARK-35640: read binary as timestamp should throw schema incompatible error",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3311")) {
-     val data = (1 to 4).map(i => Tuple1(i.toString))
-     val readSchema = StructType(Seq(StructField("_1", DataTypes.TimestampType)))
- 
-@@ -1075,7 +1076,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
-     }
-   }
- 
--  test("SPARK-35640: int as long should throw schema incompatible error") {
-+  test("SPARK-35640: int as long should throw schema incompatible error",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3311")) {
-     val data = (1 to 4).map(i => Tuple1(i))
-     val readSchema = StructType(Seq(StructField("_1", DataTypes.LongType)))
- 
-@@ -1345,7 +1347,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
+@@ -1345,7 +1345,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
      }
    }
  
@@ -2365,17 +2335,7 @@ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/
 index f6472ba3d9d..ce39ebb52e6 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
-@@ -185,7 +185,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
-     }
-   }
- 
--  test("SPARK-36182: can't read TimestampLTZ as TimestampNTZ") {
-+  test("SPARK-36182: can't read TimestampLTZ as TimestampNTZ",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3311")) {
-     val data = (1 to 1000).map { i =>
-       val ts = new java.sql.Timestamp(i)
-       Row(ts)
-@@ -998,7 +999,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
+@@ -998,7 +998,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
      }
    }
  
@@ -2415,17 +2375,7 @@ index f6472ba3d9d..ce39ebb52e6 100644
          checkAnswer(readParquet("a DECIMAL(3, 2)", path), sql("SELECT 1.00"))
          checkAnswer(readParquet("b DECIMAL(3, 2)", path), Row(null))
          checkAnswer(readParquet("b DECIMAL(11, 1)", path), sql("SELECT 123456.0"))
-@@ -1133,7 +1138,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
-     }
-   }
- 
--  test("row group skipping doesn't overflow when reading into larger type") {
-+  test("row group skipping doesn't overflow when reading into larger type",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3311")) {
-     withTempPath { path =>
-       Seq(0).toDF("a").write.parquet(path.toString)
-       // The vectorized and non-vectorized readers will produce different exceptions, we don't need
-@@ -1148,7 +1154,7 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
+@@ -1148,7 +1152,7 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
              .where(s"a < ${Long.MaxValue}")
              .collect()
          }
diff --git a/native/core/src/execution/planner.rs b/native/core/src/execution/planner.rs
index 44ff20a44f..ca4c639785 100644
--- a/native/core/src/execution/planner.rs
+++ b/native/core/src/execution/planner.rs
@@ -1053,6 +1053,8 @@ impl PhysicalPlanner {
                     default_values,
                     scan.session_timezone.as_str(),
                     scan.case_sensitive,
+                    scan.schema_validation_enabled,
+                    scan.schema_evolution_enabled,
                     self.session_ctx(),
                     scan.encryption_enabled,
                 )?;
diff --git a/native/core/src/execution/spark_config.rs b/native/core/src/execution/spark_config.rs
index 60ebb2ff8b..b273ec12eb 100644
--- a/native/core/src/execution/spark_config.rs
+++ b/native/core/src/execution/spark_config.rs
@@ -21,6 +21,9 @@ pub(crate) const COMET_TRACING_ENABLED: &str = "spark.comet.tracing.enabled";
 pub(crate) const COMET_DEBUG_ENABLED: &str = "spark.comet.debug.enabled";
 pub(crate) const COMET_EXPLAIN_NATIVE_ENABLED: &str = "spark.comet.explain.native.enabled";
 pub(crate) const COMET_MAX_TEMP_DIRECTORY_SIZE: &str = "spark.comet.maxTempDirectorySize";
+#[allow(dead_code)]
+pub(crate) const COMET_PARQUET_SCHEMA_VALIDATION_ENABLED: &str =
+    "spark.comet.parquet.schemaValidation.enabled";
 
 pub(crate) trait SparkConfig {
     fn get_bool(&self, name: &str) -> bool;
diff --git a/native/core/src/parquet/mod.rs b/native/core/src/parquet/mod.rs
index c8a480e97a..40cb9aee6f 100644
--- a/native/core/src/parquet/mod.rs
+++ b/native/core/src/parquet/mod.rs
@@ -773,6 +773,8 @@ pub unsafe extern "system" fn Java_org_apache_comet_parquet_Native_initRecordBat
             None,
             session_timezone.as_str(),
             case_sensitive != JNI_FALSE,
+            false, // schema_validation_enabled - validation is done on the Java side
+            false, // schema_evolution_enabled
             session_ctx,
             encryption_enabled,
         )?;
diff --git a/native/core/src/parquet/parquet_exec.rs b/native/core/src/parquet/parquet_exec.rs
index ec18d227f5..4249801294 100644
--- a/native/core/src/parquet/parquet_exec.rs
+++ b/native/core/src/parquet/parquet_exec.rs
@@ -68,12 +68,16 @@ pub(crate) fn init_datasource_exec(
     default_values: Option<HashMap<usize, ScalarValue>>,
     session_timezone: &str,
     case_sensitive: bool,
+    schema_validation_enabled: bool,
+    schema_evolution_enabled: bool,
     session_ctx: &Arc<SessionContext>,
     encryption_enabled: bool,
 ) -> Result<Arc<DataSourceExec>, ExecutionError> {
     let (table_parquet_options, spark_parquet_options) = get_options(
         session_timezone,
         case_sensitive,
+        schema_validation_enabled,
+        schema_evolution_enabled,
         &object_store_url,
         encryption_enabled,
     );
@@ -142,6 +146,8 @@ pub(crate) fn init_datasource_exec(
 fn get_options(
     session_timezone: &str,
     case_sensitive: bool,
+    schema_validation_enabled: bool,
+    schema_evolution_enabled: bool,
     object_store_url: &ObjectStoreUrl,
     encryption_enabled: bool,
 ) -> (TableParquetOptions, SparkParquetOptions) {
@@ -153,6 +159,8 @@ fn get_options(
         SparkParquetOptions::new(EvalMode::Legacy, session_timezone, false);
     spark_parquet_options.allow_cast_unsigned_ints = true;
     spark_parquet_options.case_sensitive = case_sensitive;
+    spark_parquet_options.schema_validation_enabled = schema_validation_enabled;
+    spark_parquet_options.schema_evolution_enabled = schema_evolution_enabled;
 
     if encryption_enabled {
         table_parquet_options.crypto.configure_factory(
diff --git a/native/core/src/parquet/parquet_support.rs b/native/core/src/parquet/parquet_support.rs
index e7ff5630f1..809a86c042 100644
--- a/native/core/src/parquet/parquet_support.rs
+++ b/native/core/src/parquet/parquet_support.rs
@@ -76,6 +76,10 @@ pub struct SparkParquetOptions {
     pub use_legacy_date_timestamp_or_ntz: bool,
     // Whether schema field names are case sensitive
     pub case_sensitive: bool,
+    /// Whether to validate schema compatibility (type coercions) in a Spark-compatible way
+    pub schema_validation_enabled: bool,
+    /// Whether schema evolution (type widening) is enabled
+    pub schema_evolution_enabled: bool,
 }
 
 impl SparkParquetOptions {
@@ -88,6 +92,8 @@ impl SparkParquetOptions {
             use_decimal_128: false,
             use_legacy_date_timestamp_or_ntz: false,
             case_sensitive: false,
+            schema_validation_enabled: true,
+            schema_evolution_enabled: false,
         }
     }
 
@@ -100,6 +106,8 @@ impl SparkParquetOptions {
             use_decimal_128: false,
             use_legacy_date_timestamp_or_ntz: false,
             case_sensitive: false,
+            schema_validation_enabled: true,
+            schema_evolution_enabled: false,
         }
     }
 }
diff --git a/native/core/src/parquet/schema_adapter.rs b/native/core/src/parquet/schema_adapter.rs
index 1e0d30c835..6ddc928cd5 100644
--- a/native/core/src/parquet/schema_adapter.rs
+++ b/native/core/src/parquet/schema_adapter.rs
@@ -19,7 +19,7 @@
 
 use crate::parquet::parquet_support::{spark_parquet_convert, SparkParquetOptions};
 use arrow::array::{RecordBatch, RecordBatchOptions};
-use arrow::datatypes::{Schema, SchemaRef};
+use arrow::datatypes::{DataType, Schema, SchemaRef};
 use datafusion::common::ColumnStatistics;
 use datafusion::datasource::schema_adapter::{SchemaAdapter, SchemaAdapterFactory, SchemaMapper};
 use datafusion::physical_plan::ColumnarValue;
@@ -138,6 +138,78 @@ impl SchemaAdapter for SparkSchemaAdapter {
             }
         }
 
+        if self.parquet_options.schema_validation_enabled {
+            // Case-insensitive duplicate field detection
+            if !self.parquet_options.case_sensitive {
+                for required_field in self.required_schema.fields().iter() {
+                    let required_name_lower = required_field.name().to_lowercase();
+                    let matching_names: Vec<&str> = file_schema
+                        .fields
+                        .iter()
+                        .filter(|f| f.name().to_lowercase() == required_name_lower)
+                        .map(|f| f.name().as_str())
+                        .collect();
+                    if matching_names.len() > 1 {
+                        return Err(datafusion::error::DataFusionError::External(
+                            format!(
+                                "Found duplicate field(s) \"{}\": [{}] in case-insensitive mode",
+                                required_field.name(),
+                                matching_names.join(", ")
+                            )
+                            .into(),
+                        ));
+                    }
+                }
+            }
+
+            // Type coercion validation
+            for (table_idx, file_idx_opt) in field_mappings.iter().enumerate() {
+                if let Some(proj_idx) = file_idx_opt {
+                    let file_field_idx = projection[*proj_idx];
+                    let file_type = file_schema.field(file_field_idx).data_type();
+                    let required_type = self.required_schema.field(table_idx).data_type();
+                    if file_type != required_type
+                        && !is_spark_compatible_parquet_coercion(
+                            file_type,
+                            required_type,
+                            self.parquet_options.schema_evolution_enabled,
+                        )
+                    {
+                        let col_name = self.required_schema.field(table_idx).name();
+                        let required_spark_name = arrow_type_to_spark_name(required_type);
+                        let file_spark_name = arrow_type_to_parquet_physical_name(file_type);
+
+                        // Special error for reading TimestampLTZ as TimestampNTZ
+                        // to match Spark's error message format
+                        if matches!(
+                            (file_type, required_type),
+                            (
+                                DataType::Timestamp(_, Some(_)),
+                                DataType::Timestamp(_, None)
+                            )
+                        ) {
+                            return Err(datafusion::error::DataFusionError::External(
+                                format!(
+                                    "Unable to create Parquet converter for data type \"{}\"",
+                                    required_spark_name
+                                )
+                                .into(),
+                            ));
+                        }
+
+                        return Err(datafusion::error::DataFusionError::External(
+                            format!(
+                                "Parquet column cannot be converted in file. \
+                                 Column: [{}], Expected: {}, Found: {}",
+                                col_name, required_spark_name, file_spark_name
+                            )
+                            .into(),
+                        ));
+                    }
+                }
+            }
+        }
+
         Ok((
             Arc::new(SchemaMapping {
                 required_schema: Arc::<Schema>::clone(&self.required_schema),
@@ -265,6 +337,122 @@ impl SchemaMapper for SchemaMapping {
     }
 }
 
+/// Check if a type coercion from a Parquet file type to a required Spark type is allowed.
+/// Returns true if the coercion is compatible, false if Spark's vectorized reader would reject it.
+///
+/// This function rejects specific type conversions that Spark's vectorized reader would reject.
+/// Conversions not explicitly rejected are allowed (Comet's parquet_convert_array or arrow-rs
+/// cast handles them).
+///
+/// When `schema_evolution_enabled` is true, integer and float widening conversions are allowed
+/// (e.g., Int32 → Int64, Float32 → Float64).
+fn is_spark_compatible_parquet_coercion(
+    file_type: &DataType,
+    required_type: &DataType,
+    schema_evolution_enabled: bool,
+) -> bool {
+    use DataType::*;
+    match (file_type, required_type) {
+        // Same type is always OK
+        (a, b) if a == b => true,
+
+        // Spark rejects reading TimestampLTZ as TimestampNTZ (and vice versa)
+        (Timestamp(_, Some(_)), Timestamp(_, None))
+        | (Timestamp(_, None), Timestamp(_, Some(_))) => false,
+
+        // Spark rejects integer type widening in the vectorized reader
+        // (INT32 → LongType, INT32 → DoubleType, etc.)
+        // When schema evolution is enabled, these widenings are allowed.
+        (Int8 | Int16 | Int32, Int64) => schema_evolution_enabled,
+        (Int8 | Int16 | Int32 | Int64, Float32 | Float64) => schema_evolution_enabled,
+        (Float32, Float64) => schema_evolution_enabled,
+
+        // Spark rejects reading string/binary columns as timestamp or other numeric types
+        (Utf8 | LargeUtf8 | Binary | LargeBinary, Timestamp(_, _)) => false,
+        (Utf8 | LargeUtf8 | Binary | LargeBinary, Int8 | Int16 | Int32 | Int64) => false,
+
+        // Reject cross-category conversions between non-matching structural types
+        // e.g., scalar types to list/struct/map types
+        (_, List(_) | LargeList(_) | Struct(_) | Map(_, _))
+            if !matches!(file_type, List(_) | LargeList(_) | Struct(_) | Map(_, _)) =>
+        {
+            false
+        }
+
+        // For struct types, recursively check field conversions
+        (Struct(from_fields), Struct(to_fields)) => {
+            for to_field in to_fields.iter() {
+                if let Some(from_field) = from_fields
+                    .iter()
+                    .find(|f| f.name().to_lowercase() == to_field.name().to_lowercase())
+                {
+                    if from_field.data_type() != to_field.data_type()
+                        && !is_spark_compatible_parquet_coercion(
+                            from_field.data_type(),
+                            to_field.data_type(),
+                            schema_evolution_enabled,
+                        )
+                    {
+                        return false;
+                    }
+                }
+            }
+            true
+        }
+
+        // For list types, check element type conversions
+        (List(from_inner), List(to_inner)) => {
+            from_inner.data_type() == to_inner.data_type()
+                || is_spark_compatible_parquet_coercion(
+                    from_inner.data_type(),
+                    to_inner.data_type(),
+                    schema_evolution_enabled,
+                )
+        }
+
+        // Everything else is allowed (handled by parquet_convert_array or arrow-rs cast)
+        _ => true,
+    }
+}
+
+/// Convert an Arrow DataType to a Spark-style display name for error messages
+fn arrow_type_to_spark_name(dt: &DataType) -> String {
+    use DataType::*;
+    match dt {
+        Boolean => "boolean".to_string(),
+        Int8 => "tinyint".to_string(),
+        Int16 => "smallint".to_string(),
+        Int32 => "int".to_string(),
+        Int64 => "bigint".to_string(),
+        Float32 => "float".to_string(),
+        Float64 => "double".to_string(),
+        Utf8 | LargeUtf8 => "string".to_string(),
+        Binary | LargeBinary => "binary".to_string(),
+        Date32 => "date".to_string(),
+        Timestamp(_, Some(_)) => "timestamp".to_string(),
+        Timestamp(_, None) => "timestamp_ntz".to_string(),
+        Decimal128(p, s) => format!("decimal({},{})", p, s),
+        _ => format!("{}", dt),
+    }
+}
+
+/// Convert an Arrow DataType to a Parquet physical type name for error messages
+fn arrow_type_to_parquet_physical_name(dt: &DataType) -> String {
+    use DataType::*;
+    match dt {
+        Boolean => "BOOLEAN".to_string(),
+        Int8 | Int16 | Int32 | UInt8 | UInt16 => "INT32".to_string(),
+        Int64 | UInt32 | UInt64 => "INT64".to_string(),
+        Float32 => "FLOAT".to_string(),
+        Float64 => "DOUBLE".to_string(),
+        Utf8 | LargeUtf8 | Binary | LargeBinary => "BINARY".to_string(),
+        Date32 => "INT32".to_string(),
+        Timestamp(_, _) => "INT64".to_string(),
+        Decimal128(_, _) => "FIXED_LEN_BYTE_ARRAY".to_string(),
+        _ => format!("{}", dt),
+    }
+}
+
 #[cfg(test)]
 mod test {
     use crate::parquet::parquet_support::SparkParquetOptions;
diff --git a/native/proto/src/proto/operator.proto b/native/proto/src/proto/operator.proto
index 73c087cf36..a11c9b7f16 100644
--- a/native/proto/src/proto/operator.proto
+++ b/native/proto/src/proto/operator.proto
@@ -109,6 +109,8 @@ message NativeScan {
   // the map.
   map<string, string> object_store_options = 13;
   bool encryption_enabled = 14;
+  bool schema_validation_enabled = 15;
+  bool schema_evolution_enabled = 16;
 }
 
 message CsvScan {
diff --git a/spark/src/main/scala/org/apache/comet/CometExecIterator.scala b/spark/src/main/scala/org/apache/comet/CometExecIterator.scala
index 3156eb3873..8fdfcbdc2b 100644
--- a/spark/src/main/scala/org/apache/comet/CometExecIterator.scala
+++ b/spark/src/main/scala/org/apache/comet/CometExecIterator.scala
@@ -162,7 +162,11 @@ class CometExecIterator(
             """\(os error \d+\)$""").r
         val parquetError: Regex =
           """^Parquet error: (?:.*)$""".r
-        e.getMessage match {
+        // Strip "External error: " prefix that DataFusion adds
+        val msg = e.getMessage
+          .replaceFirst("^External error: ", "")
+          .replaceFirst("^External: ", "")
+        msg match {
           case fileNotFoundPattern(filePath) =>
             // See org.apache.spark.sql.errors.QueryExecutionErrors.readCurrentFileNotFoundError
             throw new SparkException(
@@ -177,6 +181,11 @@ class CometExecIterator(
               errorClass = "_LEGACY_ERROR_TEMP_2254",
               messageParameters = Map("message" -> e.getMessage),
               cause = new SparkException("File is not a Parquet file.", e))
+          case m
+              if m.contains("Parquet column cannot be converted") ||
+                m.contains("Found duplicate field(s)") ||
+                m.contains("Unable to create Parquet converter") =>
+            throw new SparkException(msg, e)
           case _ =>
             throw e
         }
diff --git a/spark/src/main/scala/org/apache/comet/serde/operator/CometNativeScan.scala b/spark/src/main/scala/org/apache/comet/serde/operator/CometNativeScan.scala
index b7909b67cb..035e757240 100644
--- a/spark/src/main/scala/org/apache/comet/serde/operator/CometNativeScan.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/operator/CometNativeScan.scala
@@ -176,6 +176,10 @@ object CometNativeScan extends CometOperatorSerde[CometScanExec] with Logging {
       nativeScanBuilder.addAllPartitionSchema(partitionSchema.toIterable.asJava)
       nativeScanBuilder.setSessionTimezone(scan.conf.getConfString("spark.sql.session.timeZone"))
       nativeScanBuilder.setCaseSensitive(scan.conf.getConf[Boolean](SQLConf.CASE_SENSITIVE))
+      nativeScanBuilder.setSchemaValidationEnabled(
+        CometConf.COMET_PARQUET_SCHEMA_VALIDATION_ENABLED.get(scan.conf))
+      nativeScanBuilder.setSchemaEvolutionEnabled(
+        CometConf.COMET_SCHEMA_EVOLUTION_ENABLED.get(scan.conf))
 
       // Collect S3/cloud storage configurations
       val hadoopConf = scan.relation.sparkSession.sessionState
diff --git a/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala b/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala
index 928e66b29b..75444215fc 100644
--- a/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala
@@ -1251,9 +1251,7 @@ abstract class ParquetReadSuite extends CometTestBase {
                 Seq(StructField("_1", LongType, false), StructField("_2", DoubleType, false)))
 
             withParquetDataFrame(data, schema = Some(readSchema)) { df =>
-              // TODO: validate with Spark 3.x and 'usingDataFusionParquetExec=true'
-              if (enableSchemaEvolution || CometConf.COMET_NATIVE_SCAN_IMPL
-                  .get(conf) == CometConf.SCAN_NATIVE_DATAFUSION) {
+              if (enableSchemaEvolution) {
                 checkAnswer(df, data.map(Row.fromTuple))
               } else {
                 assertThrows[SparkException](df.collect())

From 092e7570ece5d91cdaa7f4154642e69d7805964c Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Thu, 5 Feb 2026 21:20:10 -0700
Subject: [PATCH 2/8] fix: allow INT96 timestamp coercion in schema validation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

INT96 timestamps (default for Spark LTZ) are coerced by arrow-rs to
Timestamp(Microsecond, None) without a timezone, but Spark's required
schema expects Timestamp(Microsecond, Some(tz)). The schema validation
was incorrectly rejecting this as a NTZ→LTZ mismatch. The downstream
parquet_convert_array already handles this correctly by reattaching the
session timezone.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 native/core/src/parquet/schema_adapter.rs | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/native/core/src/parquet/schema_adapter.rs b/native/core/src/parquet/schema_adapter.rs
index 6ddc928cd5..39560becaa 100644
--- a/native/core/src/parquet/schema_adapter.rs
+++ b/native/core/src/parquet/schema_adapter.rs
@@ -356,9 +356,14 @@ fn is_spark_compatible_parquet_coercion(
         // Same type is always OK
         (a, b) if a == b => true,
 
-        // Spark rejects reading TimestampLTZ as TimestampNTZ (and vice versa)
-        (Timestamp(_, Some(_)), Timestamp(_, None))
-        | (Timestamp(_, None), Timestamp(_, Some(_))) => false,
+        // Spark rejects reading TimestampLTZ as TimestampNTZ
+        (Timestamp(_, Some(_)), Timestamp(_, None)) => false,
+
+        // Allow Timestamp(_, None) → Timestamp(_, Some(_)) because arrow-rs represents
+        // INT96 timestamps (always LTZ in Spark) as Timestamp(Microsecond, None) after
+        // coercion. The downstream parquet_convert_array handles this by reattaching the
+        // session timezone.
+        (Timestamp(_, None), Timestamp(_, Some(_))) => true,
 
         // Spark rejects integer type widening in the vectorized reader
         // (INT32 → LongType, INT32 → DoubleType, etc.)

From 68def681b26d356f1bfa1a0b55d5e54dfbc51924 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Fri, 6 Feb 2026 06:52:37 -0700
Subject: [PATCH 3/8] fix: regenerate 3.5.8.diff with missing test ignore
 annotations

Regenerate the Spark test diff from the patched source to include
ignore annotations that were applied locally but not captured in the
diff file. Notably adds IgnoreCometNativeDataFusion for SPARK-36182
(can't read TimestampLTZ as TimestampNTZ) and the row group skipping
overflow test, both tracked under issue #3311.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 dev/diffs/3.5.8.diff | 131 ++++++++++++++++++-------------------------
 1 file changed, 54 insertions(+), 77 deletions(-)

diff --git a/dev/diffs/3.5.8.diff b/dev/diffs/3.5.8.diff
index b2799eb280..27a3d05ab8 100644
--- a/dev/diffs/3.5.8.diff
+++ b/dev/diffs/3.5.8.diff
@@ -2065,79 +2065,6 @@ index 07e2849ce6f..3e73645b638 100644
      val extraOptions = Map[String, String](
        ParquetOutputFormat.WRITER_VERSION -> ParquetProperties.WriterVersion.PARQUET_2_0.toString
      )
-diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdIOSuite.scala
-index 5e01d3f447c..284d6657d4f 100644
---- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdIOSuite.scala
-+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdIOSuite.scala
-@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources.parquet
- import scala.collection.JavaConverters._
- 
- import org.apache.spark.SparkException
--import org.apache.spark.sql.{QueryTest, Row}
-+import org.apache.spark.sql.{IgnoreCometNativeDataFusion, QueryTest, Row}
- import org.apache.spark.sql.internal.SQLConf
- import org.apache.spark.sql.test.SharedSparkSession
- import org.apache.spark.sql.types.{ArrayType, IntegerType, MapType, Metadata, MetadataBuilder, StringType, StructType}
-@@ -30,7 +30,8 @@ class ParquetFieldIdIOSuite extends QueryTest with ParquetTest with SharedSparkS
-   private def withId(id: Int): Metadata =
-     new MetadataBuilder().putLong(ParquetUtils.FIELD_ID_METADATA_KEY, id).build()
- 
--  test("Parquet reads infer fields using field ids correctly") {
-+  test("Parquet reads infer fields using field ids correctly",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3316")) {
-     withTempDir { dir =>
-       val readSchema =
-         new StructType()
-@@ -78,7 +79,8 @@ class ParquetFieldIdIOSuite extends QueryTest with ParquetTest with SharedSparkS
-     }
-   }
- 
--  test("absence of field ids") {
-+  test("absence of field ids",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3316")) {
-     withTempDir { dir =>
-       val readSchema =
-         new StructType()
-@@ -107,7 +109,8 @@ class ParquetFieldIdIOSuite extends QueryTest with ParquetTest with SharedSparkS
-     }
-   }
- 
--  test("SPARK-38094: absence of field ids: reading nested schema") {
-+  test("SPARK-38094: absence of field ids: reading nested schema",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3316")) {
-     withTempDir { dir =>
-       // now with nested schema/complex type
-       val readSchema =
-@@ -136,7 +139,8 @@ class ParquetFieldIdIOSuite extends QueryTest with ParquetTest with SharedSparkS
-     }
-   }
- 
--  test("multiple id matches") {
-+  test("multiple id matches",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3316")) {
-     withTempDir { dir =>
-       val readSchema =
-         new StructType()
-@@ -163,7 +167,8 @@ class ParquetFieldIdIOSuite extends QueryTest with ParquetTest with SharedSparkS
-     }
-   }
- 
--  test("read parquet file without ids") {
-+  test("read parquet file without ids",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3316")) {
-     withTempDir { dir =>
-       val readSchema =
-         new StructType()
-@@ -196,7 +201,8 @@ class ParquetFieldIdIOSuite extends QueryTest with ParquetTest with SharedSparkS
-     }
-   }
- 
--  test("global read/write flag should work correctly") {
-+  test("global read/write flag should work correctly",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3316")) {
-     withTempDir { dir =>
-       val readSchema =
-         new StructType()
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileMetadataStructRowIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileMetadataStructRowIndexSuite.scala
 index c10e1799702..ba6629abfd9 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileMetadataStructRowIndexSuite.scala
@@ -2271,7 +2198,17 @@ index 8e88049f51e..49f2001dc6b 100644
      val schema = StructType(Seq(
        StructField("a", IntegerType, nullable = false)
      ))
-@@ -1984,7 +2000,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
+@@ -1933,7 +1949,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
+     }
+   }
+ 
+-  test("SPARK-25207: exception when duplicate fields in case-insensitive mode") {
++  test("SPARK-25207: exception when duplicate fields in case-insensitive mode",
++    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3311")) {
+     withTempPath { dir =>
+       val count = 10
+       val tableName = "spark_25207"
+@@ -1984,7 +2001,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
      }
    }
  
@@ -2321,7 +2258,27 @@ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/
 index 8ed9ef1630e..f312174b182 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
-@@ -1345,7 +1345,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
+@@ -1064,7 +1064,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
+     }
+   }
+ 
+-  test("SPARK-35640: read binary as timestamp should throw schema incompatible error") {
++  test("SPARK-35640: read binary as timestamp should throw schema incompatible error",
++    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3311")) {
+     val data = (1 to 4).map(i => Tuple1(i.toString))
+     val readSchema = StructType(Seq(StructField("_1", DataTypes.TimestampType)))
+ 
+@@ -1075,7 +1076,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
+     }
+   }
+ 
+-  test("SPARK-35640: int as long should throw schema incompatible error") {
++  test("SPARK-35640: int as long should throw schema incompatible error",
++    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3311")) {
+     val data = (1 to 4).map(i => Tuple1(i))
+     val readSchema = StructType(Seq(StructField("_1", DataTypes.LongType)))
+ 
+@@ -1345,7 +1347,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
      }
    }
  
@@ -2335,7 +2292,17 @@ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/
 index f6472ba3d9d..ce39ebb52e6 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
-@@ -998,7 +998,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
+@@ -185,7 +185,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
+     }
+   }
+ 
+-  test("SPARK-36182: can't read TimestampLTZ as TimestampNTZ") {
++  test("SPARK-36182: can't read TimestampLTZ as TimestampNTZ",
++    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3311")) {
+     val data = (1 to 1000).map { i =>
+       val ts = new java.sql.Timestamp(i)
+       Row(ts)
+@@ -998,7 +999,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
      }
    }
  
@@ -2375,7 +2342,17 @@ index f6472ba3d9d..ce39ebb52e6 100644
          checkAnswer(readParquet("a DECIMAL(3, 2)", path), sql("SELECT 1.00"))
          checkAnswer(readParquet("b DECIMAL(3, 2)", path), Row(null))
          checkAnswer(readParquet("b DECIMAL(11, 1)", path), sql("SELECT 123456.0"))
-@@ -1148,7 +1152,7 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
+@@ -1133,7 +1138,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
+     }
+   }
+ 
+-  test("row group skipping doesn't overflow when reading into larger type") {
++  test("row group skipping doesn't overflow when reading into larger type",
++    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3311")) {
+     withTempPath { path =>
+       Seq(0).toDF("a").write.parquet(path.toString)
+       // The vectorized and non-vectorized readers will produce different exceptions, we don't need
+@@ -1148,7 +1154,7 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
              .where(s"a < ${Long.MaxValue}")
              .collect()
          }

From 15b0eb53df4780329832f79d5c39b483242915d7 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Fri, 6 Feb 2026 08:38:29 -0700
Subject: [PATCH 4/8] fix: remove ParquetFieldIdIOSuite ignore annotations,
 fixed in #3415

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 dev/diffs/3.5.8.diff | 73 --------------------------------------------
 1 file changed, 73 deletions(-)

diff --git a/dev/diffs/3.5.8.diff b/dev/diffs/3.5.8.diff
index 2976458999..e6fec7d9d8 100644
--- a/dev/diffs/3.5.8.diff
+++ b/dev/diffs/3.5.8.diff
@@ -2065,79 +2065,6 @@ index 07e2849ce6f..3e73645b638 100644
      val extraOptions = Map[String, String](
        ParquetOutputFormat.WRITER_VERSION -> ParquetProperties.WriterVersion.PARQUET_2_0.toString
      )
-diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdIOSuite.scala
-index 5e01d3f447c..284d6657d4f 100644
---- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdIOSuite.scala
-+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdIOSuite.scala
-@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources.parquet
- import scala.collection.JavaConverters._
-
- import org.apache.spark.SparkException
--import org.apache.spark.sql.{QueryTest, Row}
-+import org.apache.spark.sql.{IgnoreCometNativeDataFusion, QueryTest, Row}
- import org.apache.spark.sql.internal.SQLConf
- import org.apache.spark.sql.test.SharedSparkSession
- import org.apache.spark.sql.types.{ArrayType, IntegerType, MapType, Metadata, MetadataBuilder, StringType, StructType}
-@@ -30,7 +30,8 @@ class ParquetFieldIdIOSuite extends QueryTest with ParquetTest with SharedSparkS
-   private def withId(id: Int): Metadata =
-     new MetadataBuilder().putLong(ParquetUtils.FIELD_ID_METADATA_KEY, id).build()
-
--  test("Parquet reads infer fields using field ids correctly") {
-+  test("Parquet reads infer fields using field ids correctly",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3316")) {
-     withTempDir { dir =>
-       val readSchema =
-         new StructType()
-@@ -78,7 +79,8 @@ class ParquetFieldIdIOSuite extends QueryTest with ParquetTest with SharedSparkS
-     }
-   }
-
--  test("absence of field ids") {
-+  test("absence of field ids",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3316")) {
-     withTempDir { dir =>
-       val readSchema =
-         new StructType()
-@@ -107,7 +109,8 @@ class ParquetFieldIdIOSuite extends QueryTest with ParquetTest with SharedSparkS
-     }
-   }
-
--  test("SPARK-38094: absence of field ids: reading nested schema") {
-+  test("SPARK-38094: absence of field ids: reading nested schema",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3316")) {
-     withTempDir { dir =>
-       // now with nested schema/complex type
-       val readSchema =
-@@ -136,7 +139,8 @@ class ParquetFieldIdIOSuite extends QueryTest with ParquetTest with SharedSparkS
-     }
-   }
-
--  test("multiple id matches") {
-+  test("multiple id matches",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3316")) {
-     withTempDir { dir =>
-       val readSchema =
-         new StructType()
-@@ -163,7 +167,8 @@ class ParquetFieldIdIOSuite extends QueryTest with ParquetTest with SharedSparkS
-     }
-   }
-
--  test("read parquet file without ids") {
-+  test("read parquet file without ids",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3316")) {
-     withTempDir { dir =>
-       val readSchema =
-         new StructType()
-@@ -196,7 +201,8 @@ class ParquetFieldIdIOSuite extends QueryTest with ParquetTest with SharedSparkS
-     }
-   }
-
--  test("global read/write flag should work correctly") {
-+  test("global read/write flag should work correctly",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3316")) {
-     withTempDir { dir =>
-       val readSchema =
-         new StructType()
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileMetadataStructRowIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileMetadataStructRowIndexSuite.scala
 index c10e1799702..ba6629abfd9 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileMetadataStructRowIndexSuite.scala

From b39e6142165f1a4bd808a51ffa275324c4aee462 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Fri, 6 Feb 2026 08:50:04 -0700
Subject: [PATCH 5/8] fix: regenerate 3.5.8.diff from clean Spark checkout

Regenerated from a fresh Spark v3.5.8 clone with only the
ParquetFileMetadataStructRowIndexSuite ignore annotations (#3317).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 dev/diffs/3.5.8.diff | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/dev/diffs/3.5.8.diff b/dev/diffs/3.5.8.diff
index e6fec7d9d8..27a3d05ab8 100644
--- a/dev/diffs/3.5.8.diff
+++ b/dev/diffs/3.5.8.diff
@@ -2072,7 +2072,7 @@ index c10e1799702..ba6629abfd9 100644
 @@ -16,7 +16,7 @@
   */
  package org.apache.spark.sql.execution.datasources.parquet
-
+ 
 -import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest}
 +import org.apache.spark.sql.{AnalysisException, DataFrame, IgnoreCometNativeDataFusion, QueryTest}
  import org.apache.spark.sql.execution.datasources.FileFormat
@@ -2081,7 +2081,7 @@ index c10e1799702..ba6629abfd9 100644
 @@ -154,7 +154,8 @@ class ParquetFileMetadataStructRowIndexSuite extends QueryTest with SharedSparkS
      }
    }
-
+ 
 -  test(s"reading ${ROW_INDEX_TEMPORARY_COLUMN_NAME} - not present in a table") {
 +  test(s"reading ${ROW_INDEX_TEMPORARY_COLUMN_NAME} - not present in a table",
 +    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3317")) {
@@ -2091,7 +2091,7 @@ index c10e1799702..ba6629abfd9 100644
 @@ -172,7 +173,8 @@ class ParquetFileMetadataStructRowIndexSuite extends QueryTest with SharedSparkS
      }
    }
-
+ 
 -  test(s"reading ${ROW_INDEX_TEMPORARY_COLUMN_NAME} - present in a table") {
 +  test(s"reading ${ROW_INDEX_TEMPORARY_COLUMN_NAME} - present in a table",
 +    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3317")) {
@@ -2101,7 +2101,7 @@ index c10e1799702..ba6629abfd9 100644
 @@ -189,7 +191,8 @@ class ParquetFileMetadataStructRowIndexSuite extends QueryTest with SharedSparkS
      }
    }
-
+ 
 -  test(s"reading ${ROW_INDEX_TEMPORARY_COLUMN_NAME} - as partition col") {
 +  test(s"reading ${ROW_INDEX_TEMPORARY_COLUMN_NAME} - as partition col",
 +    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3317")) {

From 63b1c35684ffe0f0d5233b67732bec87f2446c9f Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Fri, 6 Feb 2026 09:37:18 -0700
Subject: [PATCH 6/8] fix: reset 3.5.8.diff to match apache/main

Row index tests (#3317) and field ID tests (#3316) are both fixed
upstream in #3414 and #3415, so no additional test ignores are needed.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 dev/diffs/3.5.8.diff | 43 -------------------------------------------
 1 file changed, 43 deletions(-)

diff --git a/dev/diffs/3.5.8.diff b/dev/diffs/3.5.8.diff
index 27a3d05ab8..72c41e4f82 100644
--- a/dev/diffs/3.5.8.diff
+++ b/dev/diffs/3.5.8.diff
@@ -2065,49 +2065,6 @@ index 07e2849ce6f..3e73645b638 100644
      val extraOptions = Map[String, String](
        ParquetOutputFormat.WRITER_VERSION -> ParquetProperties.WriterVersion.PARQUET_2_0.toString
      )
-diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileMetadataStructRowIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileMetadataStructRowIndexSuite.scala
-index c10e1799702..ba6629abfd9 100644
---- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileMetadataStructRowIndexSuite.scala
-+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileMetadataStructRowIndexSuite.scala
-@@ -16,7 +16,7 @@
-  */
- package org.apache.spark.sql.execution.datasources.parquet
- 
--import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest}
-+import org.apache.spark.sql.{AnalysisException, DataFrame, IgnoreCometNativeDataFusion, QueryTest}
- import org.apache.spark.sql.execution.datasources.FileFormat
- import org.apache.spark.sql.functions.{col, lit}
- import org.apache.spark.sql.internal.SQLConf
-@@ -154,7 +154,8 @@ class ParquetFileMetadataStructRowIndexSuite extends QueryTest with SharedSparkS
-     }
-   }
- 
--  test(s"reading ${ROW_INDEX_TEMPORARY_COLUMN_NAME} - not present in a table") {
-+  test(s"reading ${ROW_INDEX_TEMPORARY_COLUMN_NAME} - not present in a table",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3317")) {
-     // File format supporting row index generation populates the column with row indexes.
-     withReadDataFrame("parquet", extraSchemaFields =
-         Seq(StructField(ROW_INDEX_TEMPORARY_COLUMN_NAME, LongType))) { df =>
-@@ -172,7 +173,8 @@ class ParquetFileMetadataStructRowIndexSuite extends QueryTest with SharedSparkS
-     }
-   }
- 
--  test(s"reading ${ROW_INDEX_TEMPORARY_COLUMN_NAME} - present in a table") {
-+  test(s"reading ${ROW_INDEX_TEMPORARY_COLUMN_NAME} - present in a table",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3317")) {
-     withReadDataFrame("parquet", extraCol = ROW_INDEX_TEMPORARY_COLUMN_NAME) { df =>
-       // Values of ROW_INDEX_TEMPORARY_COLUMN_NAME column are always populated with
-       // generated row indexes, rather than read from the file.
-@@ -189,7 +191,8 @@ class ParquetFileMetadataStructRowIndexSuite extends QueryTest with SharedSparkS
-     }
-   }
- 
--  test(s"reading ${ROW_INDEX_TEMPORARY_COLUMN_NAME} - as partition col") {
-+  test(s"reading ${ROW_INDEX_TEMPORARY_COLUMN_NAME} - as partition col",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3317")) {
-     withReadDataFrame("parquet", partitionCol = ROW_INDEX_TEMPORARY_COLUMN_NAME) { df =>
-       // Column values are set for each partition, rather than populated with generated row indexes.
-       assert(df
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
 index 8e88049f51e..49f2001dc6b 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala

From c89c96140dfba6a61589e13f6379887842fa2602 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Fri, 6 Feb 2026 10:34:08 -0700
Subject: [PATCH 7/8] fix: unignore 5 Parquet schema validation tests in
 3.5.8.diff

Remove IgnoreCometNativeDataFusion annotations for tests that are now
passing with the schema validation fix for #3311:
- ParquetIOSuite: SPARK-35640 read binary as timestamp
- ParquetIOSuite: SPARK-35640 int as long
- ParquetQuerySuite: SPARK-36182 can't read TimestampLTZ as TimestampNTZ
- ParquetQuerySuite: row group skipping doesn't overflow
- ParquetFilterSuite: SPARK-25207 duplicate fields in case-insensitive mode

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 dev/diffs/3.5.8.diff | 76 ++++++++------------------------------------
 1 file changed, 13 insertions(+), 63 deletions(-)

diff --git a/dev/diffs/3.5.8.diff b/dev/diffs/3.5.8.diff
index 72c41e4f82..41347cdd09 100644
--- a/dev/diffs/3.5.8.diff
+++ b/dev/diffs/3.5.8.diff
@@ -2066,7 +2066,7 @@ index 07e2849ce6f..3e73645b638 100644
        ParquetOutputFormat.WRITER_VERSION -> ParquetProperties.WriterVersion.PARQUET_2_0.toString
      )
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
-index 8e88049f51e..49f2001dc6b 100644
+index 8e88049f51e..e21a5797996 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
 @@ -1095,7 +1095,11 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
@@ -2155,17 +2155,7 @@ index 8e88049f51e..49f2001dc6b 100644
      val schema = StructType(Seq(
        StructField("a", IntegerType, nullable = false)
      ))
-@@ -1933,7 +1949,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
-     }
-   }
- 
--  test("SPARK-25207: exception when duplicate fields in case-insensitive mode") {
-+  test("SPARK-25207: exception when duplicate fields in case-insensitive mode",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3311")) {
-     withTempPath { dir =>
-       val count = 10
-       val tableName = "spark_25207"
-@@ -1984,7 +2001,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
+@@ -1984,7 +2000,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
      }
    }
  
@@ -2175,7 +2165,7 @@ index 8e88049f51e..49f2001dc6b 100644
      // block 1:
      //                      null count  min                                       max
      // page-0                         0  0                                         99
-@@ -2044,7 +2062,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
+@@ -2044,7 +2061,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
      }
    }
  
@@ -2185,7 +2175,7 @@ index 8e88049f51e..49f2001dc6b 100644
      withTempPath { dir =>
        val path = dir.getCanonicalPath
        spark.range(100).selectExpr("id * 2 AS id")
-@@ -2276,7 +2295,11 @@ class ParquetV1FilterSuite extends ParquetFilterSuite {
+@@ -2276,7 +2294,11 @@ class ParquetV1FilterSuite extends ParquetFilterSuite {
            assert(pushedParquetFilters.exists(_.getClass === filterClass),
              s"${pushedParquetFilters.map(_.getClass).toList} did not contain ${filterClass}.")
  
@@ -2198,7 +2188,7 @@ index 8e88049f51e..49f2001dc6b 100644
          } else {
            assert(selectedFilters.isEmpty, "There is filter pushed down")
          }
-@@ -2336,7 +2359,11 @@ class ParquetV2FilterSuite extends ParquetFilterSuite {
+@@ -2336,7 +2358,11 @@ class ParquetV2FilterSuite extends ParquetFilterSuite {
            assert(pushedParquetFilters.exists(_.getClass === filterClass),
              s"${pushedParquetFilters.map(_.getClass).toList} did not contain ${filterClass}.")
  
@@ -2212,30 +2202,10 @@ index 8e88049f51e..49f2001dc6b 100644
          case _ =>
            throw new AnalysisException("Can not match ParquetTable in the query.")
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
-index 8ed9ef1630e..f312174b182 100644
+index 8ed9ef1630e..eed2a6f5ad5 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
-@@ -1064,7 +1064,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
-     }
-   }
- 
--  test("SPARK-35640: read binary as timestamp should throw schema incompatible error") {
-+  test("SPARK-35640: read binary as timestamp should throw schema incompatible error",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3311")) {
-     val data = (1 to 4).map(i => Tuple1(i.toString))
-     val readSchema = StructType(Seq(StructField("_1", DataTypes.TimestampType)))
- 
-@@ -1075,7 +1076,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
-     }
-   }
- 
--  test("SPARK-35640: int as long should throw schema incompatible error") {
-+  test("SPARK-35640: int as long should throw schema incompatible error",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3311")) {
-     val data = (1 to 4).map(i => Tuple1(i))
-     val readSchema = StructType(Seq(StructField("_1", DataTypes.LongType)))
- 
-@@ -1345,7 +1347,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
+@@ -1345,7 +1345,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
      }
    }
  
@@ -2246,20 +2216,10 @@ index 8ed9ef1630e..f312174b182 100644
        checkAnswer(
          // "fruit" column in this file is encoded using DELTA_LENGTH_BYTE_ARRAY.
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
-index f6472ba3d9d..ce39ebb52e6 100644
+index f6472ba3d9d..18295e0b0f0 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
-@@ -185,7 +185,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
-     }
-   }
- 
--  test("SPARK-36182: can't read TimestampLTZ as TimestampNTZ") {
-+  test("SPARK-36182: can't read TimestampLTZ as TimestampNTZ",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3311")) {
-     val data = (1 to 1000).map { i =>
-       val ts = new java.sql.Timestamp(i)
-       Row(ts)
-@@ -998,7 +999,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
+@@ -998,7 +998,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
      }
    }
  
@@ -2269,7 +2229,7 @@ index f6472ba3d9d..ce39ebb52e6 100644
      withAllParquetReaders {
        withTempPath { path =>
          // Repeated values for dictionary encoding.
-@@ -1051,7 +1053,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
+@@ -1051,7 +1052,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
      testMigration(fromTsType = "TIMESTAMP_MICROS", toTsType = "INT96")
    }
  
@@ -2279,7 +2239,7 @@ index f6472ba3d9d..ce39ebb52e6 100644
      def readParquet(schema: String, path: File): DataFrame = {
        spark.read.schema(schema).parquet(path.toString)
      }
-@@ -1067,7 +1070,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
+@@ -1067,7 +1069,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
          checkAnswer(readParquet(schema, path), df)
        }
  
@@ -2289,7 +2249,7 @@ index f6472ba3d9d..ce39ebb52e6 100644
          val schema1 = "a DECIMAL(3, 2), b DECIMAL(18, 3), c DECIMAL(37, 3)"
          checkAnswer(readParquet(schema1, path), df)
          val schema2 = "a DECIMAL(3, 0), b DECIMAL(18, 1), c DECIMAL(37, 1)"
-@@ -1089,7 +1093,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
+@@ -1089,7 +1092,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
        val df = sql(s"SELECT 1 a, 123456 b, ${Int.MaxValue.toLong * 10} c, CAST('1.2' AS BINARY) d")
        df.write.parquet(path.toString)
  
@@ -2299,17 +2259,7 @@ index f6472ba3d9d..ce39ebb52e6 100644
          checkAnswer(readParquet("a DECIMAL(3, 2)", path), sql("SELECT 1.00"))
          checkAnswer(readParquet("b DECIMAL(3, 2)", path), Row(null))
          checkAnswer(readParquet("b DECIMAL(11, 1)", path), sql("SELECT 123456.0"))
-@@ -1133,7 +1138,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
-     }
-   }
- 
--  test("row group skipping doesn't overflow when reading into larger type") {
-+  test("row group skipping doesn't overflow when reading into larger type",
-+    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3311")) {
-     withTempPath { path =>
-       Seq(0).toDF("a").write.parquet(path.toString)
-       // The vectorized and non-vectorized readers will produce different exceptions, we don't need
-@@ -1148,7 +1154,7 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
+@@ -1148,7 +1152,7 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
              .where(s"a < ${Long.MaxValue}")
              .collect()
          }

From 320cc02b36b862f5acc2016965d7be177cb2c74d Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Fri, 6 Feb 2026 14:49:08 -0700
Subject: [PATCH 8/8] fix: fix file-not-found error handling and ignore 3
 schema validation tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix fileNotFoundPattern regex in CometExecIterator that could never match
because the "External: " prefix was stripped before matching but the regex
still expected it. This fixes 4 HiveMetadataCacheSuite test failures.

Ignore 3 Parquet schema validation tests incompatible with native_datafusion:
- SPARK-36182 (TimestampLTZ as TimestampNTZ not detected for INT96)
- SPARK-25207 (duplicate fields exception wrapping differs)
- SPARK-26709 (INT32→INT64 coercion rejected by schema validation)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 dev/diffs/3.5.8.diff                          | 60 ++++++++++++++-----
 .../org/apache/comet/CometExecIterator.scala  | 10 ++--
 2 files changed, 50 insertions(+), 20 deletions(-)

diff --git a/dev/diffs/3.5.8.diff b/dev/diffs/3.5.8.diff
index 41347cdd09..234a15c436 100644
--- a/dev/diffs/3.5.8.diff
+++ b/dev/diffs/3.5.8.diff
@@ -923,7 +923,7 @@ index c26757c9cff..d55775f09d7 100644
    protected val baseResourcePath = {
      // use the same way as `SQLQueryTestSuite` to get the resource path
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
-index 3cf2bfd17ab..49728c35c42 100644
+index 3cf2bfd17ab..78e679a2870 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
 @@ -1521,7 +1521,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
@@ -936,7 +936,17 @@ index 3cf2bfd17ab..49728c35c42 100644
      AccumulatorSuite.verifyPeakExecutionMemorySet(sparkContext, "external sort") {
        sql("SELECT * FROM testData2 ORDER BY a ASC, b ASC").collect()
      }
-@@ -4459,7 +4460,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
+@@ -3127,7 +3128,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
+     }
+   }
+ 
+-  test("SPARK-26709: OptimizeMetadataOnlyQuery does not handle empty records correctly") {
++  test("SPARK-26709: OptimizeMetadataOnlyQuery does not handle empty records correctly",
++    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3311")) {
+     withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "parquet") {
+       Seq(true, false).foreach { enableOptimizeMetadataOnlyQuery =>
+         withSQLConf(SQLConf.OPTIMIZER_METADATA_ONLY.key ->
+@@ -4459,7 +4461,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
    }
  
    test("SPARK-39166: Query context of binary arithmetic should be serialized to executors" +
@@ -946,7 +956,7 @@ index 3cf2bfd17ab..49728c35c42 100644
      withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false",
        SQLConf.ANSI_ENABLED.key -> "true") {
        withTable("t") {
-@@ -4480,7 +4482,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
+@@ -4480,7 +4483,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
    }
  
    test("SPARK-39175: Query context of Cast should be serialized to executors" +
@@ -956,7 +966,7 @@ index 3cf2bfd17ab..49728c35c42 100644
      withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false",
        SQLConf.ANSI_ENABLED.key -> "true") {
        withTable("t") {
-@@ -4497,14 +4500,19 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
+@@ -4497,14 +4501,19 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
            val msg = intercept[SparkException] {
              sql(query).collect()
            }.getMessage
@@ -2066,7 +2076,7 @@ index 07e2849ce6f..3e73645b638 100644
        ParquetOutputFormat.WRITER_VERSION -> ParquetProperties.WriterVersion.PARQUET_2_0.toString
      )
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
-index 8e88049f51e..e21a5797996 100644
+index 8e88049f51e..49f2001dc6b 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
 @@ -1095,7 +1095,11 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
@@ -2155,7 +2165,17 @@ index 8e88049f51e..e21a5797996 100644
      val schema = StructType(Seq(
        StructField("a", IntegerType, nullable = false)
      ))
-@@ -1984,7 +2000,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
+@@ -1933,7 +1949,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
+     }
+   }
+ 
+-  test("SPARK-25207: exception when duplicate fields in case-insensitive mode") {
++  test("SPARK-25207: exception when duplicate fields in case-insensitive mode",
++    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3311")) {
+     withTempPath { dir =>
+       val count = 10
+       val tableName = "spark_25207"
+@@ -1984,7 +2001,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
      }
    }
  
@@ -2165,7 +2185,7 @@ index 8e88049f51e..e21a5797996 100644
      // block 1:
      //                      null count  min                                       max
      // page-0                         0  0                                         99
-@@ -2044,7 +2061,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
+@@ -2044,7 +2062,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
      }
    }
  
@@ -2175,7 +2195,7 @@ index 8e88049f51e..e21a5797996 100644
      withTempPath { dir =>
        val path = dir.getCanonicalPath
        spark.range(100).selectExpr("id * 2 AS id")
-@@ -2276,7 +2294,11 @@ class ParquetV1FilterSuite extends ParquetFilterSuite {
+@@ -2276,7 +2295,11 @@ class ParquetV1FilterSuite extends ParquetFilterSuite {
            assert(pushedParquetFilters.exists(_.getClass === filterClass),
              s"${pushedParquetFilters.map(_.getClass).toList} did not contain ${filterClass}.")
  
@@ -2188,7 +2208,7 @@ index 8e88049f51e..e21a5797996 100644
          } else {
            assert(selectedFilters.isEmpty, "There is filter pushed down")
          }
-@@ -2336,7 +2358,11 @@ class ParquetV2FilterSuite extends ParquetFilterSuite {
+@@ -2336,7 +2359,11 @@ class ParquetV2FilterSuite extends ParquetFilterSuite {
            assert(pushedParquetFilters.exists(_.getClass === filterClass),
              s"${pushedParquetFilters.map(_.getClass).toList} did not contain ${filterClass}.")
  
@@ -2216,10 +2236,20 @@ index 8ed9ef1630e..eed2a6f5ad5 100644
        checkAnswer(
          // "fruit" column in this file is encoded using DELTA_LENGTH_BYTE_ARRAY.
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
-index f6472ba3d9d..18295e0b0f0 100644
+index f6472ba3d9d..6ea893cdbe6 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
-@@ -998,7 +998,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
+@@ -185,7 +185,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
+     }
+   }
+ 
+-  test("SPARK-36182: can't read TimestampLTZ as TimestampNTZ") {
++  test("SPARK-36182: can't read TimestampLTZ as TimestampNTZ",
++    IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3311")) {
+     val data = (1 to 1000).map { i =>
+       val ts = new java.sql.Timestamp(i)
+       Row(ts)
+@@ -998,7 +999,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
      }
    }
  
@@ -2229,7 +2259,7 @@ index f6472ba3d9d..18295e0b0f0 100644
      withAllParquetReaders {
        withTempPath { path =>
          // Repeated values for dictionary encoding.
-@@ -1051,7 +1052,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
+@@ -1051,7 +1053,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
      testMigration(fromTsType = "TIMESTAMP_MICROS", toTsType = "INT96")
    }
  
@@ -2239,7 +2269,7 @@ index f6472ba3d9d..18295e0b0f0 100644
      def readParquet(schema: String, path: File): DataFrame = {
        spark.read.schema(schema).parquet(path.toString)
      }
-@@ -1067,7 +1069,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
+@@ -1067,7 +1070,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
          checkAnswer(readParquet(schema, path), df)
        }
  
@@ -2249,7 +2279,7 @@ index f6472ba3d9d..18295e0b0f0 100644
          val schema1 = "a DECIMAL(3, 2), b DECIMAL(18, 3), c DECIMAL(37, 3)"
          checkAnswer(readParquet(schema1, path), df)
          val schema2 = "a DECIMAL(3, 0), b DECIMAL(18, 1), c DECIMAL(37, 1)"
-@@ -1089,7 +1092,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
+@@ -1089,7 +1093,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
        val df = sql(s"SELECT 1 a, 123456 b, ${Int.MaxValue.toLong * 10} c, CAST('1.2' AS BINARY) d")
        df.write.parquet(path.toString)
  
@@ -2259,7 +2289,7 @@ index f6472ba3d9d..18295e0b0f0 100644
          checkAnswer(readParquet("a DECIMAL(3, 2)", path), sql("SELECT 1.00"))
          checkAnswer(readParquet("b DECIMAL(3, 2)", path), Row(null))
          checkAnswer(readParquet("b DECIMAL(11, 1)", path), sql("SELECT 123456.0"))
-@@ -1148,7 +1152,7 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
+@@ -1148,7 +1153,7 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
              .where(s"a < ${Long.MaxValue}")
              .collect()
          }
diff --git a/spark/src/main/scala/org/apache/comet/CometExecIterator.scala b/spark/src/main/scala/org/apache/comet/CometExecIterator.scala
index 8fdfcbdc2b..758fd00c8c 100644
--- a/spark/src/main/scala/org/apache/comet/CometExecIterator.scala
+++ b/spark/src/main/scala/org/apache/comet/CometExecIterator.scala
@@ -157,15 +157,15 @@ class CometExecIterator(
         // threw the exception, so we log the exception with taskAttemptId here
         logError(s"Native execution for task $taskAttemptId failed", e)
 
-        val fileNotFoundPattern: Regex =
-          ("""^External: Object at location (.+?) not found: No such file or directory """ +
-            """\(os error \d+\)$""").r
-        val parquetError: Regex =
-          """^Parquet error: (?:.*)$""".r
         // Strip "External error: " prefix that DataFusion adds
         val msg = e.getMessage
           .replaceFirst("^External error: ", "")
           .replaceFirst("^External: ", "")
+        val fileNotFoundPattern: Regex =
+          ("""^Object at location (.+?) not found: No such file or directory """ +
+            """\(os error \d+\)$""").r
+        val parquetError: Regex =
+          """^Parquet error: (?:.*)$""".r
         msg match {
           case fileNotFoundPattern(filePath) =>
             // See org.apache.spark.sql.errors.QueryExecutionErrors.readCurrentFileNotFoundError