diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala index 44d31131e9c6d..8920ff88be519 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala @@ -101,7 +101,7 @@ class InMemoryFileIndex( } override def equals(other: Any): Boolean = other match { - case hdfs: InMemoryFileIndex => rootPaths.toSet == hdfs.rootPaths.toSet + case hdfs: InMemoryFileIndex => rootPaths.sorted == hdfs.rootPaths.sorted case _ => false } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala index 9ac61f0cee5fc..54403ea99c813 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala @@ -554,6 +554,30 @@ class FileIndexSuite extends SharedSparkSession { assert(FileIndexOptions.isValidOption("modifiedafter")) assert(FileIndexOptions.isValidOption("pathglobfilter")) } + + test("SPARK-52339: Correctly compare root paths") { + withTempDir { dir => + val file1 = new File(dir, "text1.txt") + stringToFile(file1, "text1") + val file2 = new File(dir, "text2.txt") + stringToFile(file2, "text2") + val path1 = new Path(file1.getCanonicalPath) + val path2 = new Path(file2.getCanonicalPath) + + val schema = StructType(Seq(StructField("a", StringType, false))) + + // Verify that the order of paths doesn't matter + val fileIndex1a = new InMemoryFileIndex(spark, Seq(path1, path2), Map.empty, Some(schema)) + val fileIndex1b = new InMemoryFileIndex(spark, Seq(path2, path1), Map.empty, Some(schema)) + assert(fileIndex1a == fileIndex1b) + + // Verify that a different number of paths does matter + val fileIndex2a = new InMemoryFileIndex(spark, Seq(path1, path1), Map.empty, Some(schema)) + val fileIndex2b = new InMemoryFileIndex(spark, Seq(path1, path1, path1), + Map.empty, Some(schema)) + assert(fileIndex2a != fileIndex2b) + } + } } object DeletionRaceFileSystem {