Skip to content

Commit 6be7648

Browse files
authored
Updated file path handling to use toURI().toString (#1381)
This commit replaces string concatenation for file paths with the more robust `toURI().toString` across various methods and tests. This improves consistency and ensures proper URI formatting.
1 parent b549691 commit 6be7648

File tree

3 files changed

+34
-17
lines changed

3 files changed

+34
-17
lines changed

dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReading.kt

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,15 @@ public fun DataFrame.Companion.readParquet(
223223
vararg paths: Path,
224224
nullability: NullabilityOptions = NullabilityOptions.Infer,
225225
batchSize: Long = ARROW_PARQUET_DEFAULT_BATCH_SIZE,
226-
): AnyFrame = readArrowDatasetImpl(paths.map { "file:$it" }.toTypedArray(), FileFormat.PARQUET, nullability, batchSize)
226+
): AnyFrame =
227+
readArrowDatasetImpl(
228+
paths.map {
229+
it.toUri().toString()
230+
}.toTypedArray(),
231+
FileFormat.PARQUET,
232+
nullability,
233+
batchSize,
234+
)
227235

228236
/**
229237
* Read [Parquet](https://parquet.apache.org/) data from existing [files] by using [Arrow Dataset](https://arrow.apache.org/docs/java/dataset.html)
@@ -235,7 +243,7 @@ public fun DataFrame.Companion.readParquet(
235243
): AnyFrame =
236244
readArrowDatasetImpl(
237245
files.map {
238-
"file:${it.toPath()}"
246+
it.toURI().toString()
239247
}.toTypedArray(),
240248
FileFormat.PARQUET,
241249
nullability,

dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -432,12 +432,12 @@ private fun resolveArrowDatasetUris(fileUris: Array<String>): Array<String> =
432432
tempFile.deleteOnExit()
433433
url.openStream().use { input ->
434434
Files.copy(input, tempFile.toPath())
435-
"file:${tempFile.toPath()}"
435+
tempFile.toURI().toString()
436436
}
437437
}
438438

439439
!it.startsWith("file:", true) && File(it).exists() -> {
440-
"file:$it"
440+
File(it).toURI().toString()
441441
}
442442

443443
else -> it

dataframe-arrow/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ArrowKtTest.kt

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,11 @@ import org.junit.Test
4848
import java.io.ByteArrayInputStream
4949
import java.io.ByteArrayOutputStream
5050
import java.io.File
51-
import java.net.URI
5251
import java.net.URL
5352
import java.nio.channels.Channels
54-
import java.nio.file.FileSystems
5553
import java.sql.DriverManager
5654
import java.util.Locale
55+
import kotlin.io.path.toPath
5756
import kotlin.reflect.typeOf
5857

5958
internal class ArrowKtTest {
@@ -658,9 +657,11 @@ internal class ArrowKtTest {
658657

659658
@Test
660659
fun testReadParquetPath() {
661-
val resourceLocation = testResource("test.arrow.parquet").path
662-
val resourcePath = FileSystems.getDefault().getPath(resourceLocation)
660+
val resourceUrl = testResource("test.arrow.parquet")
661+
val resourcePath = resourceUrl.toURI().toPath()
662+
663663
val dataFrame = DataFrame.readParquet(resourcePath)
664+
664665
dataFrame.rowsCount() shouldBe 300
665666
assertEstimations(
666667
exampleFrame = dataFrame,
@@ -672,9 +673,11 @@ internal class ArrowKtTest {
672673

673674
@Test
674675
fun testReadParquetFile() {
675-
val resourceLocation = testResource("test.arrow.parquet").path
676-
val resourcePath = FileSystems.getDefault().getPath(resourceLocation)
676+
val resourceUrl = testResource("test.arrow.parquet")
677+
val resourcePath = resourceUrl.toURI().toPath()
678+
677679
val dataFrame = DataFrame.readParquet(resourcePath.toFile())
680+
678681
dataFrame.rowsCount() shouldBe 300
679682
assertEstimations(
680683
exampleFrame = dataFrame,
@@ -686,9 +689,11 @@ internal class ArrowKtTest {
686689

687690
@Test
688691
fun testReadParquetStringPath() {
689-
val resourceLocation = testResource("test.arrow.parquet").path
690-
val resourcePath = FileSystems.getDefault().getPath(resourceLocation)
692+
val resourceUrl = testResource("test.arrow.parquet")
693+
val resourcePath = resourceUrl.toURI().toPath()
694+
691695
val dataFrame = DataFrame.readParquet("$resourcePath")
696+
692697
dataFrame.rowsCount() shouldBe 300
693698
assertEstimations(
694699
exampleFrame = dataFrame,
@@ -700,10 +705,12 @@ internal class ArrowKtTest {
700705

701706
@Test
702707
fun testReadParquetUrl() {
703-
val resourceLocation = testResource("test.arrow.parquet").path
704-
val resourcePath = FileSystems.getDefault().getPath(resourceLocation)
705-
val fileUrl = URI.create("file:$resourcePath").toURL()
708+
val resourceUrl = testResource("test.arrow.parquet")
709+
val resourcePath = resourceUrl.toURI().toPath()
710+
val fileUrl = resourcePath.toUri().toURL()
711+
706712
val dataFrame = DataFrame.readParquet(fileUrl)
713+
707714
dataFrame.rowsCount() shouldBe 300
708715
assertEstimations(
709716
exampleFrame = dataFrame,
@@ -715,9 +722,11 @@ internal class ArrowKtTest {
715722

716723
@Test
717724
fun testReadMultipleParquetFiles() {
718-
val resourceLocation = testResource("test.arrow.parquet").path
719-
val resourcePath = FileSystems.getDefault().getPath(resourceLocation)
725+
val resourceUrl = testResource("test.arrow.parquet")
726+
val resourcePath = resourceUrl.toURI().toPath()
727+
720728
val dataFrame = DataFrame.readParquet(resourcePath, resourcePath, resourcePath)
729+
721730
dataFrame.rowsCount() shouldBe 900
722731
}
723732
}

0 commit comments

Comments
 (0)