Skip to content

Commit

Permalink
Merge pull request #949 from Kotlin/compiler-plugin-read-improvements
Browse files Browse the repository at this point in the history
Compiler plugin read improvements
  • Loading branch information
koperagen authored Nov 20, 2024
2 parents 7a895e2 + 58e91d3 commit 741854c
Show file tree
Hide file tree
Showing 9 changed files with 94 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import org.jetbrains.kotlinx.dataframe.plugin.impl.toPluginDataFrameSchema
class FlattenDefault : AbstractSchemaModificationInterpreter() {
val Arguments.receiver by dataFrame()
val Arguments.keepParentNameForColumns: Boolean by arg(defaultValue = Present(false))
val Arguments.separator: String by arg(defaultValue = Present("."))
val Arguments.separator: String by arg(defaultValue = Present("_"))

override fun Arguments.interpret(): PluginDataFrameSchema {
return receiver.asDataFrame().flatten(keepParentNameForColumns, separator).toPluginDataFrameSchema()
Expand All @@ -24,7 +24,7 @@ class FlattenDefault : AbstractSchemaModificationInterpreter() {
class Flatten0 : AbstractSchemaModificationInterpreter() {
val Arguments.receiver by dataFrame()
val Arguments.keepParentNameForColumns: Boolean by arg(defaultValue = Present(false))
val Arguments.separator: String by arg(defaultValue = Present("."))
val Arguments.separator: String by arg(defaultValue = Present("_"))
val Arguments.columns: ColumnsResolver by arg()

override fun Arguments.interpret(): PluginDataFrameSchema {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.jetbrains.kotlinx.dataframe.plugin.impl.api

import kotlinx.serialization.decodeFromString
import kotlinx.serialization.json.Json
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
Expand Down Expand Up @@ -31,7 +30,11 @@ internal class Read0 : AbstractInterpreter<PluginDataFrameSchema>() {
val Arguments.header: List<String> by arg(defaultValue = Present(listOf()))

override fun Arguments.interpret(): PluginDataFrameSchema {
return DataFrame.read(path).schema().toPluginDataFrameSchema()
val df = when (val source = tryResolveFile(resolutionPath, path)) {
is ResolutionDirFile -> DataFrame.read(source.file)
is UrlOrAbsolutePath -> DataFrame.read(source.path)
}
return df.schema().toPluginDataFrameSchema()
}
}

Expand All @@ -43,11 +46,13 @@ internal class ReadCSV0 : AbstractInterpreter<PluginDataFrameSchema>() {
val Arguments.duplicate: Boolean by arg(defaultValue = Present(true))

override fun Arguments.interpret(): PluginDataFrameSchema {
val file = resolveFile(resolutionPath, fileOrUrl)
val df = if (file != null && file.exists()) {
DataFrame.readCSV(file, delimiter, skipLines = skipLines, readLines = readLines, duplicate = duplicate)
} else {
DataFrame.readCSV(fileOrUrl, delimiter, skipLines = skipLines, readLines = readLines, duplicate = duplicate)
val df = when (val source = tryResolveFile(resolutionPath, fileOrUrl)) {
is ResolutionDirFile -> {
DataFrame.readCSV(source.file, delimiter, skipLines = skipLines, readLines = readLines, duplicate = duplicate)
}
is UrlOrAbsolutePath -> {
DataFrame.readCSV(source.path, delimiter, skipLines = skipLines, readLines = readLines, duplicate = duplicate)
}
}
return df.schema().toPluginDataFrameSchema()
}
Expand Down Expand Up @@ -75,17 +80,14 @@ internal class ReadJson0 : AbstractInterpreter<PluginDataFrameSchema>() {
}

fun readJson(resolutionPath: String?, path: String): DataFrame<Any?> {
val file = resolveFile(resolutionPath, path)
val df = if (file != null && file.exists()) {
DataFrame.readJson(file)
} else {
DataFrame.readJson(path)
return when (val source = tryResolveFile(resolutionPath, path)) {
is ResolutionDirFile -> DataFrame.readJson(source.file)
is UrlOrAbsolutePath -> DataFrame.readJson(source.path)
}
return df
}

private fun resolveFile(resolutionPath: String?, path: String): File? {
return resolutionPath?.let {
private fun tryResolveFile(resolutionPath: String?, path: String): DataSource {
val file = resolutionPath?.let {
try {
val file = File(it)
if (file.exists() && file.isDirectory) {
Expand All @@ -97,8 +99,17 @@ private fun resolveFile(resolutionPath: String?, path: String): File? {
null
}
}
return if (file != null && file.exists()) {
ResolutionDirFile(file)
} else {
UrlOrAbsolutePath(path)
}
}

private sealed interface DataSource
private class UrlOrAbsolutePath(val path: String) : DataSource
private class ResolutionDirFile(val file: File) : DataSource

internal class ReadDelimStr : AbstractInterpreter<PluginDataFrameSchema>() {
val Arguments.text: String by arg()
val Arguments.delimiter: Char by arg(defaultValue = Present(','))
Expand Down Expand Up @@ -138,7 +149,12 @@ internal class ReadExcel : AbstractSchemaModificationInterpreter() {
val Arguments.nameRepairStrategy: NameRepairStrategy by arg(defaultValue = Present(NameRepairStrategy.CHECK_UNIQUE))

override fun Arguments.interpret(): PluginDataFrameSchema {
val df = DataFrame.readExcel(fileOrUrl, sheetName, skipRows, columns, stringColumns, rowsCount, nameRepairStrategy)
val df = when (val source = tryResolveFile(resolutionPath, fileOrUrl)) {
is ResolutionDirFile ->
DataFrame.readExcel(source.file, sheetName, skipRows, columns, stringColumns, rowsCount, nameRepairStrategy)
is UrlOrAbsolutePath ->
DataFrame.readExcel(source.path, sheetName, skipRows, columns, stringColumns, rowsCount, nameRepairStrategy)
}
return df.schema().toPluginDataFrameSchema()
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ import org.jetbrains.kotlinx.dataframe.plugin.impl.api.Move0
import org.jetbrains.kotlinx.dataframe.plugin.impl.api.PairConstructor
import org.jetbrains.kotlinx.dataframe.plugin.impl.api.PairToConstructor
import org.jetbrains.kotlinx.dataframe.plugin.impl.api.ReadExcel
import org.jetbrains.kotlinx.dataframe.plugin.impl.api.StringColumnsConstructor
import org.jetbrains.kotlinx.dataframe.plugin.impl.api.ToDataFrame
import org.jetbrains.kotlinx.dataframe.plugin.impl.api.ToDataFrameColumn
import org.jetbrains.kotlinx.dataframe.plugin.impl.api.ToDataFrameDefault
Expand Down Expand Up @@ -234,7 +235,7 @@ internal inline fun <reified T> String.load(): T {
"DataFrameOf0" -> DataFrameOf0()
"DataFrameBuilderInvoke0" -> DataFrameBuilderInvoke0()
"ToDataFrameColumn" -> ToDataFrameColumn()
"StringColumns" -> ToDataFrameColumn()
"StringColumns" -> StringColumnsConstructor()
"ReadExcel" -> ReadExcel()
"FillNulls0" -> FillNulls0()
"UpdateWith0" -> UpdateWith0()
Expand Down
2 changes: 2 additions & 0 deletions plugins/kotlin-dataframe/testData/box/flatten.kt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ fun box(): String {
flattened.compareSchemas(strict = true)
flattened.ungroup { f }.compareSchemas(strict = true)

grouped.flatten(keepParentNameForColumns = true) { f.e }.compareSchemas(strict = true)

grouped.flatten { f.e and f }.compareSchemas(strict = true)
return "OK"
}
Expand Down
15 changes: 15 additions & 0 deletions plugins/kotlin-dataframe/testData/box/readExcel.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import org.jetbrains.kotlinx.dataframe.*
import org.jetbrains.kotlinx.dataframe.annotations.*
import org.jetbrains.kotlinx.dataframe.api.*
import org.jetbrains.kotlinx.dataframe.io.*

fun box(): String {
val df = @Import DataFrame.read("testResources/sample.xls")
val d1: Double = df.col1[0]
val d2: Double = df.col2[0]

val df1 = @Import DataFrame.readExcel("testResources/sample.xls")
val d11: Double = df1.col1[0]
val d12: Double = df1.col2[0]
return "OK"
}
11 changes: 11 additions & 0 deletions plugins/kotlin-dataframe/testData/box/readExcel_stringColumns.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import org.jetbrains.kotlinx.dataframe.*
import org.jetbrains.kotlinx.dataframe.annotations.*
import org.jetbrains.kotlinx.dataframe.api.*
import org.jetbrains.kotlinx.dataframe.io.*

fun box(): String {
val df = @Import DataFrame.readExcel("testResources/sample.xls", stringColumns = StringColumns("A"))
val d1: String = df.col1[0]
val d2: Double = df.col2[0]
return "OK"
}
12 changes: 12 additions & 0 deletions plugins/kotlin-dataframe/testData/box/read_localFile.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import org.jetbrains.kotlinx.dataframe.*
import org.jetbrains.kotlinx.dataframe.annotations.*
import org.jetbrains.kotlinx.dataframe.api.*
import org.jetbrains.kotlinx.dataframe.io.*

fun box(): String {
val df = @Import DataFrame.read("testResources/achievements_all.json")

val df1 = df.explode { achievements }
df1.achievements.order
return "OK"
}
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,18 @@ public void testReadDelimStr_delimiter() {
runTest("testData/box/readDelimStr_delimiter.kt");
}

@Test
@TestMetadata("readExcel.kt")
public void testReadExcel() {
runTest("testData/box/readExcel.kt");
}

@Test
@TestMetadata("readExcel_stringColumns.kt")
public void testReadExcel_stringColumns() {
runTest("testData/box/readExcel_stringColumns.kt");
}

@Test
@TestMetadata("readJson.kt")
public void testReadJson() {
Expand Down Expand Up @@ -376,6 +388,12 @@ public void testReadJsonStr_memberProperty() {
runTest("testData/box/readJsonStr_memberProperty.kt");
}

@Test
@TestMetadata("read_localFile.kt")
public void testRead_localFile() {
runTest("testData/box/read_localFile.kt");
}

@Test
@TestMetadata("remove.kt")
public void testRemove() {
Expand Down

0 comments on commit 741854c

Please sign in to comment.