diff --git a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/flatten.kt b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/flatten.kt index 139f2e3c20..2567ba783b 100644 --- a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/flatten.kt +++ b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/flatten.kt @@ -14,7 +14,7 @@ import org.jetbrains.kotlinx.dataframe.plugin.impl.toPluginDataFrameSchema class FlattenDefault : AbstractSchemaModificationInterpreter() { val Arguments.receiver by dataFrame() val Arguments.keepParentNameForColumns: Boolean by arg(defaultValue = Present(false)) - val Arguments.separator: String by arg(defaultValue = Present(".")) + val Arguments.separator: String by arg(defaultValue = Present("_")) override fun Arguments.interpret(): PluginDataFrameSchema { return receiver.asDataFrame().flatten(keepParentNameForColumns, separator).toPluginDataFrameSchema() @@ -24,7 +24,7 @@ class FlattenDefault : AbstractSchemaModificationInterpreter() { class Flatten0 : AbstractSchemaModificationInterpreter() { val Arguments.receiver by dataFrame() val Arguments.keepParentNameForColumns: Boolean by arg(defaultValue = Present(false)) - val Arguments.separator: String by arg(defaultValue = Present(".")) + val Arguments.separator: String by arg(defaultValue = Present("_")) val Arguments.columns: ColumnsResolver by arg() override fun Arguments.interpret(): PluginDataFrameSchema { diff --git a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/read.kt b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/read.kt index 282fe2a5fd..aa5cbf7777 100644 --- a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/read.kt +++ b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/read.kt @@ -1,6 +1,5 @@ package org.jetbrains.kotlinx.dataframe.plugin.impl.api -import kotlinx.serialization.decodeFromString import kotlinx.serialization.json.Json import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.plugin.impl.AbstractInterpreter @@ -30,7 +29,11 @@ internal class Read0 : AbstractInterpreter() { val Arguments.header: List by arg(defaultValue = Present(listOf())) override fun Arguments.interpret(): PluginDataFrameSchema { - return DataFrame.read(path).schema().toPluginDataFrameSchema() + val df = when (val source = tryResolveFile(resolutionPath, path)) { + is ResolutionDirFile -> DataFrame.read(source.file) + is UrlOrAbsolutePath -> DataFrame.read(source.path) + } + return df.schema().toPluginDataFrameSchema() } } @@ -42,11 +45,13 @@ internal class ReadCSV0 : AbstractInterpreter() { val Arguments.duplicate: Boolean by arg(defaultValue = Present(true)) override fun Arguments.interpret(): PluginDataFrameSchema { - val file = resolveFile(resolutionPath, fileOrUrl) - val df = if (file != null && file.exists()) { - DataFrame.readCSV(file, delimiter, skipLines = skipLines, readLines = readLines, duplicate = duplicate) - } else { - DataFrame.readCSV(fileOrUrl, delimiter, skipLines = skipLines, readLines = readLines, duplicate = duplicate) + val df = when (val source = tryResolveFile(resolutionPath, fileOrUrl)) { + is ResolutionDirFile -> { + DataFrame.readCSV(source.file, delimiter, skipLines = skipLines, readLines = readLines, duplicate = duplicate) + } + is UrlOrAbsolutePath -> { + DataFrame.readCSV(source.path, delimiter, skipLines = skipLines, readLines = readLines, duplicate = duplicate) + } } return df.schema().toPluginDataFrameSchema() } @@ -74,17 +79,14 @@ internal class ReadJson0 : AbstractInterpreter() { } fun readJson(resolutionPath: String?, path: String): DataFrame { - val file = resolveFile(resolutionPath, path) - val df = if (file != null && file.exists()) { - DataFrame.readJson(file) - } else { - DataFrame.readJson(path) + return when (val source = tryResolveFile(resolutionPath, path)) { + is ResolutionDirFile -> DataFrame.readJson(source.file) + is UrlOrAbsolutePath -> DataFrame.readJson(source.path) } - return df } -private fun resolveFile(resolutionPath: String?, path: String): File? { - return resolutionPath?.let { +private fun tryResolveFile(resolutionPath: String?, path: String): DataSource { + val file = resolutionPath?.let { try { val file = File(it) if (file.exists() && file.isDirectory) { @@ -96,8 +98,17 @@ private fun resolveFile(resolutionPath: String?, path: String): File? { null } } + return if (file != null && file.exists()) { + ResolutionDirFile(file) + } else { + UrlOrAbsolutePath(path) + } } +private sealed interface DataSource +private class UrlOrAbsolutePath(val path: String) : DataSource +private class ResolutionDirFile(val file: File) : DataSource + internal class ReadDelimStr : AbstractInterpreter() { val Arguments.text: String by arg() val Arguments.delimiter: Char by arg(defaultValue = Present(',')) @@ -128,7 +139,12 @@ internal class ReadExcel : AbstractSchemaModificationInterpreter() { val Arguments.nameRepairStrategy: NameRepairStrategy by arg(defaultValue = Present(NameRepairStrategy.CHECK_UNIQUE)) override fun Arguments.interpret(): PluginDataFrameSchema { - val df = DataFrame.readExcel(fileOrUrl, sheetName, skipRows, columns, stringColumns, rowsCount, nameRepairStrategy) + val df = when (val source = tryResolveFile(resolutionPath, fileOrUrl)) { + is ResolutionDirFile -> + DataFrame.readExcel(source.file, sheetName, skipRows, columns, stringColumns, rowsCount, nameRepairStrategy) + is UrlOrAbsolutePath -> + DataFrame.readExcel(source.path, sheetName, skipRows, columns, stringColumns, rowsCount, nameRepairStrategy) + } return df.schema().toPluginDataFrameSchema() } } diff --git a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/loadInterpreter.kt b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/loadInterpreter.kt index becc8fa92e..7ea78868be 100644 --- a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/loadInterpreter.kt +++ b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/loadInterpreter.kt @@ -90,6 +90,7 @@ import org.jetbrains.kotlinx.dataframe.plugin.impl.api.Move0 import org.jetbrains.kotlinx.dataframe.plugin.impl.api.PairConstructor import org.jetbrains.kotlinx.dataframe.plugin.impl.api.PairToConstructor import org.jetbrains.kotlinx.dataframe.plugin.impl.api.ReadExcel +import org.jetbrains.kotlinx.dataframe.plugin.impl.api.StringColumnsConstructor import org.jetbrains.kotlinx.dataframe.plugin.impl.api.ToDataFrame import org.jetbrains.kotlinx.dataframe.plugin.impl.api.ToDataFrameColumn import org.jetbrains.kotlinx.dataframe.plugin.impl.api.ToDataFrameDefault @@ -232,7 +233,7 @@ internal inline fun String.load(): T { "DataFrameOf0" -> DataFrameOf0() "DataFrameBuilderInvoke0" -> DataFrameBuilderInvoke0() "ToDataFrameColumn" -> ToDataFrameColumn() - "StringColumns" -> ToDataFrameColumn() + "StringColumns" -> StringColumnsConstructor() "ReadExcel" -> ReadExcel() "FillNulls0" -> FillNulls0() "UpdateWith0" -> UpdateWith0() diff --git a/plugins/kotlin-dataframe/testData/box/flatten.kt b/plugins/kotlin-dataframe/testData/box/flatten.kt index f6eaf17eaf..cc57f83ee1 100644 --- a/plugins/kotlin-dataframe/testData/box/flatten.kt +++ b/plugins/kotlin-dataframe/testData/box/flatten.kt @@ -14,6 +14,8 @@ fun box(): String { flattened.compareSchemas(strict = true) flattened.ungroup { f }.compareSchemas(strict = true) + grouped.flatten(keepParentNameForColumns = true) { f.e }.compareSchemas(strict = true) + grouped.flatten { f.e and f }.compareSchemas(strict = true) return "OK" } diff --git a/plugins/kotlin-dataframe/testData/box/readExcel.kt b/plugins/kotlin-dataframe/testData/box/readExcel.kt new file mode 100644 index 0000000000..fef28eda78 --- /dev/null +++ b/plugins/kotlin-dataframe/testData/box/readExcel.kt @@ -0,0 +1,15 @@ +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.annotations.* +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.io.* + +fun box(): String { + val df = @Import DataFrame.read("testResources/sample.xls") + val d1: Double = df.col1[0] + val d2: Double = df.col2[0] + + val df1 = @Import DataFrame.readExcel("testResources/sample.xls") + val d11: Double = df1.col1[0] + val d12: Double = df1.col2[0] + return "OK" +} diff --git a/plugins/kotlin-dataframe/testData/box/readExcel_stringColumns.kt b/plugins/kotlin-dataframe/testData/box/readExcel_stringColumns.kt new file mode 100644 index 0000000000..9c26865792 --- /dev/null +++ b/plugins/kotlin-dataframe/testData/box/readExcel_stringColumns.kt @@ -0,0 +1,11 @@ +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.annotations.* +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.io.* + +fun box(): String { + val df = @Import DataFrame.readExcel("testResources/sample.xls", stringColumns = StringColumns("A")) + val d1: String = df.col1[0] + val d2: Double = df.col2[0] + return "OK" +} diff --git a/plugins/kotlin-dataframe/testData/box/read_localFile.kt b/plugins/kotlin-dataframe/testData/box/read_localFile.kt new file mode 100644 index 0000000000..9c18f54d66 --- /dev/null +++ b/plugins/kotlin-dataframe/testData/box/read_localFile.kt @@ -0,0 +1,12 @@ +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.annotations.* +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.io.* + +fun box(): String { + val df = @Import DataFrame.read("testResources/achievements_all.json") + + val df1 = df.explode { achievements } + df1.achievements.order + return "OK" +} diff --git a/plugins/kotlin-dataframe/testResources/sample.xls b/plugins/kotlin-dataframe/testResources/sample.xls new file mode 100644 index 0000000000..fcdced4e34 Binary files /dev/null and b/plugins/kotlin-dataframe/testResources/sample.xls differ diff --git a/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java b/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java index 9d35da9f28..6785f5e762 100644 --- a/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java +++ b/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java @@ -340,6 +340,18 @@ public void testReadDelimStr_delimiter() { runTest("testData/box/readDelimStr_delimiter.kt"); } + @Test + @TestMetadata("readExcel.kt") + public void testReadExcel() { + runTest("testData/box/readExcel.kt"); + } + + @Test + @TestMetadata("readExcel_stringColumns.kt") + public void testReadExcel_stringColumns() { + runTest("testData/box/readExcel_stringColumns.kt"); + } + @Test @TestMetadata("readJson.kt") public void testReadJson() { @@ -364,6 +376,12 @@ public void testReadJsonStr_memberProperty() { runTest("testData/box/readJsonStr_memberProperty.kt"); } + @Test + @TestMetadata("read_localFile.kt") + public void testRead_localFile() { + runTest("testData/box/read_localFile.kt"); + } + @Test @TestMetadata("remove.kt") public void testRemove() {