diff --git a/core/api/core.api b/core/api/core.api index 98a4dce3ae..390d8cbb20 100644 --- a/core/api/core.api +++ b/core/api/core.api @@ -1434,10 +1434,13 @@ public final class org/jetbrains/kotlinx/dataframe/api/ConstructorsKt { public final class org/jetbrains/kotlinx/dataframe/api/Convert { public fun (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/jvm/functions/Function2;)V public final fun cast ()Lorg/jetbrains/kotlinx/dataframe/api/Convert; + public final fun getColumns ()Lkotlin/jvm/functions/Function2; + public final fun getDf ()Lorg/jetbrains/kotlinx/dataframe/DataFrame; public fun toString ()Ljava/lang/String; } public final class org/jetbrains/kotlinx/dataframe/api/ConvertKt { + public static final fun asColumn (Lorg/jetbrains/kotlinx/dataframe/api/Convert;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final fun asFrame (Lorg/jetbrains/kotlinx/dataframe/api/Convert;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final fun convert (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/api/Convert; public static final fun convert (Lorg/jetbrains/kotlinx/dataframe/DataFrame;[Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/api/Convert; diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt index 3b7c08f3aa..e7fdddd479 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt @@ -21,6 +21,7 @@ import org.jetbrains.kotlinx.dataframe.annotations.Converter import org.jetbrains.kotlinx.dataframe.annotations.HasSchema import org.jetbrains.kotlinx.dataframe.annotations.Interpretable import org.jetbrains.kotlinx.dataframe.annotations.Refine +import org.jetbrains.kotlinx.dataframe.columns.BaseColumn import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.ColumnReference import org.jetbrains.kotlinx.dataframe.columns.toColumnSet @@ -97,7 +98,10 @@ public inline fun Convert.notNull( } @HasSchema(schemaArg = 0) -public class Convert(internal val df: DataFrame, internal val columns: ColumnsSelector) { +public class Convert( + @PublishedApi internal val df: DataFrame, + @PublishedApi internal val columns: ColumnsSelector, +) { public fun cast(): Convert = Convert(df, columns as ColumnsSelector) @Refine @@ -129,6 +133,21 @@ public fun Convert>.asFrame( body: ColumnsContainer.(ColumnGroup) -> DataFrame, ): DataFrame = to { body(this, it.asColumnGroup()).asColumnGroup(it.name()) } +/** + * Compiler plugin-friendly variant of [ReplaceClause.with] + * [ReplaceClause.with] allows to change both column types and names. + * Tracking of column name changes in arbitrary lambda expression is unreliable and generally impossible + * to do statically. + * This function ensures that all column names remain as is and only their type changes to [R] + * Example: + * `df.convert { colsOf() }.asColumn { it.asList().parallelStream().map { heavyIO(it) }.toList().toColumn() }` + */ +@Refine +@Interpretable("ConvertAsColumn") +public inline fun Convert.asColumn( + crossinline columnConverter: DataFrame.(DataColumn) -> BaseColumn, +): DataFrame = df.replace(columns).with { columnConverter(df, it).rename(it.name()) } + @Refine @Interpretable("PerRowCol") public inline fun Convert.perRowCol( diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/replace.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/replace.kt index 22ff9785a9..fe1e7a4743 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/replace.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/replace.kt @@ -53,6 +53,10 @@ public fun ReplaceClause.with(newColumns: List): DataFrame< } // TODO: Issue #418: breaks if running on ColumnGroup and its child + +/** + * For an alternative supported in the compiler plugin use [Convert.asColumn] + */ public fun ReplaceClause.with(transform: ColumnsContainer.(DataColumn) -> AnyBaseCol): DataFrame { val removeResult = df.removeImpl(columns = columns) val toInsert = removeResult.removedColumns.map { diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt index 503fde4164..2a4fd812b9 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt @@ -10,6 +10,7 @@ import org.jetbrains.kotlinx.dataframe.annotations.DataSchema import org.jetbrains.kotlinx.dataframe.api.ParserOptions import org.jetbrains.kotlinx.dataframe.api.add import org.jetbrains.kotlinx.dataframe.api.after +import org.jetbrains.kotlinx.dataframe.api.asColumn import org.jetbrains.kotlinx.dataframe.api.asFrame import org.jetbrains.kotlinx.dataframe.api.asGroupBy import org.jetbrains.kotlinx.dataframe.api.at @@ -87,6 +88,7 @@ import org.jetbrains.kotlinx.dataframe.api.sortWith import org.jetbrains.kotlinx.dataframe.api.split import org.jetbrains.kotlinx.dataframe.api.sum import org.jetbrains.kotlinx.dataframe.api.to +import org.jetbrains.kotlinx.dataframe.api.toColumn import org.jetbrains.kotlinx.dataframe.api.toFloat import org.jetbrains.kotlinx.dataframe.api.toStart import org.jetbrains.kotlinx.dataframe.api.toMap @@ -114,6 +116,7 @@ import org.junit.Test import java.net.URL import java.time.format.DateTimeFormatter import java.util.* +import java.util.stream.Collectors @Suppress("ktlint:standard:chain-method-continuation", "ktlint:standard:argument-list-wrapping") class Modify : TestBase() { @@ -235,6 +238,16 @@ class Modify : TestBase() { // SampleEnd } + @Test + @TransformDataFrameExpressions + fun convertAsColumn() { + // SampleStart + df.convert { name }.asColumn { col -> + col.toList().parallelStream().map { it.toString() }.collect(Collectors.toList()).toColumn() + } + // SampleEnd + } + @Test @TransformDataFrameExpressions fun parseAll() { diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTreeTests.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTreeTests.kt index 7bd9af1fd0..2c3a067717 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTreeTests.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTreeTests.kt @@ -19,6 +19,7 @@ import org.jetbrains.kotlinx.dataframe.api.addId import org.jetbrains.kotlinx.dataframe.api.after import org.jetbrains.kotlinx.dataframe.api.aggregate import org.jetbrains.kotlinx.dataframe.api.append +import org.jetbrains.kotlinx.dataframe.api.asColumn import org.jetbrains.kotlinx.dataframe.api.asColumnGroup import org.jetbrains.kotlinx.dataframe.api.asDataFrame import org.jetbrains.kotlinx.dataframe.api.asFrame @@ -82,7 +83,9 @@ import org.jetbrains.kotlinx.dataframe.api.single import org.jetbrains.kotlinx.dataframe.api.sortBy import org.jetbrains.kotlinx.dataframe.api.split import org.jetbrains.kotlinx.dataframe.api.sumOf +import org.jetbrains.kotlinx.dataframe.api.toColumn import org.jetbrains.kotlinx.dataframe.api.toColumnAccessor +import org.jetbrains.kotlinx.dataframe.api.toStr import org.jetbrains.kotlinx.dataframe.api.toTop import org.jetbrains.kotlinx.dataframe.api.under import org.jetbrains.kotlinx.dataframe.api.ungroup @@ -100,7 +103,9 @@ import org.jetbrains.kotlinx.dataframe.columns.FrameColumn import org.jetbrains.kotlinx.dataframe.columns.depth import org.jetbrains.kotlinx.dataframe.hasNulls import org.junit.Test +import java.util.stream.Collectors import kotlin.reflect.typeOf +import kotlin.streams.toList class DataFrameTreeTests : BaseTest() { @@ -485,6 +490,15 @@ class DataFrameTreeTests : BaseTest() { .sorted() } + @Test + fun `convert column expression ignoring name changes`() { + val res = df.convert { colsOf() }.asColumn { + it.toList().parallelStream().map { it.toString() }.collect(Collectors.toList()).toColumn("123") + } + + res shouldBe df.convert { colsOf() }.toStr() + } + @Test fun extensionPropertiesTest() { val code = CodeGenerator.create() diff --git a/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.convertAsColumn.html b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.convertAsColumn.html new file mode 100644 index 0000000000..2c85cffef9 --- /dev/null +++ b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.convertAsColumn.html @@ -0,0 +1,497 @@ + + + + + +
+ Input DataFrame: rowsCount = 7, columnsCount = 5 +
+ +

+
+
+Step 1: Convert +

class org.jetbrains.kotlinx.dataframe.api.Convert

+
+
+ Output DataFrame: rowsCount = 7, columnsCount = 5 +
+ +

+
+ + + diff --git a/docs/StardustDocs/topics/convert.md b/docs/StardustDocs/topics/convert.md index 6cbe4e8e5f..8cf95bf609 100644 --- a/docs/StardustDocs/topics/convert.md +++ b/docs/StardustDocs/topics/convert.md @@ -36,6 +36,23 @@ df.convert { name }.asFrame { it.add("fullName") { "$firstName $lastName" } } +Similar to `replace with` operation, +columns can be converted in a compiler plugin-friendly fashion +whenever you need to perform an operation on the entire column without changing its name. +For example, parallel reading. + + + +```kotlin +df.convert { name }.asColumn { col -> + col.toList().parallelStream().map { it.toString() }.collect(Collectors.toList()).toColumn() +} +``` + + + + + `convert` supports automatic type conversions between the following types: * `String` (uses [`parse`](parse.md) to convert from `String` to other types) * `Boolean` diff --git a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/convert.kt b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/convert.kt index b5f3c2f9fe..c746810a67 100644 --- a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/convert.kt +++ b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/convert.kt @@ -9,6 +9,10 @@ import org.jetbrains.kotlin.fir.types.ConeNullability import org.jetbrains.kotlin.fir.types.typeContext import org.jetbrains.kotlin.fir.types.withNullability import org.jetbrains.kotlin.name.Name +import org.jetbrains.kotlinx.dataframe.api.asColumn +import org.jetbrains.kotlinx.dataframe.api.convert +import org.jetbrains.kotlinx.dataframe.api.toPath +import org.jetbrains.kotlinx.dataframe.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.plugin.extensions.KotlinTypeFacade import org.jetbrains.kotlinx.dataframe.plugin.extensions.wrap import org.jetbrains.kotlinx.dataframe.plugin.impl.Absent @@ -22,9 +26,12 @@ import org.jetbrains.kotlinx.dataframe.plugin.impl.SimpleCol import org.jetbrains.kotlinx.dataframe.plugin.impl.SimpleColumnGroup import org.jetbrains.kotlinx.dataframe.plugin.impl.SimpleDataColumn import org.jetbrains.kotlinx.dataframe.plugin.impl.SimpleFrameColumn +import org.jetbrains.kotlinx.dataframe.plugin.impl.asDataColumn +import org.jetbrains.kotlinx.dataframe.plugin.impl.asDataFrame import org.jetbrains.kotlinx.dataframe.plugin.impl.dataFrame import org.jetbrains.kotlinx.dataframe.plugin.impl.ignore import org.jetbrains.kotlinx.dataframe.plugin.impl.simpleColumnOf +import org.jetbrains.kotlinx.dataframe.plugin.impl.toPluginDataFrameSchema import org.jetbrains.kotlinx.dataframe.plugin.impl.type import org.jetbrains.kotlinx.dataframe.plugin.utils.Names @@ -151,6 +158,19 @@ internal class To0 : AbstractInterpreter() { } } +internal class ConvertAsColumn : AbstractSchemaModificationInterpreter() { + val Arguments.receiver: ConvertApproximation by arg() + val Arguments.typeArg2: TypeApproximation by arg() + val Arguments.type: TypeApproximation by type(name("columnConverter")) + + override fun Arguments.interpret(): PluginDataFrameSchema { + return receiver.schema.asDataFrame() + .convert { receiver.columns.map { it.toPath() }.toColumnSet() } + .asColumn { simpleColumnOf("", typeArg2.type).asDataColumn() } + .toPluginDataFrameSchema() + } +} + internal abstract class AbstractToSpecificType : AbstractInterpreter() { val Arguments.functionCall: FirFunctionCall by arg(lens = Interpreter.Id) val Arguments.receiver: ConvertApproximation by arg() diff --git a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/loadInterpreter.kt b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/loadInterpreter.kt index 2d914d3e5e..b6deee1393 100644 --- a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/loadInterpreter.kt +++ b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/loadInterpreter.kt @@ -98,6 +98,7 @@ import org.jetbrains.kotlinx.dataframe.plugin.impl.api.ColsOf1 import org.jetbrains.kotlinx.dataframe.plugin.impl.api.ColsOf2 import org.jetbrains.kotlinx.dataframe.plugin.impl.api.ColumnRange import org.jetbrains.kotlinx.dataframe.plugin.impl.api.ConcatWithKeys +import org.jetbrains.kotlinx.dataframe.plugin.impl.api.ConvertAsColumn import org.jetbrains.kotlinx.dataframe.plugin.impl.api.DataFrameBuilderInvoke0 import org.jetbrains.kotlinx.dataframe.plugin.impl.api.DataFrameOf0 import org.jetbrains.kotlinx.dataframe.plugin.impl.api.DataFrameOf3 @@ -301,6 +302,7 @@ internal inline fun String.load(): T { "ToSpecificTypeZone" -> ToSpecificTypeZone() "ToSpecificTypePattern" -> ToSpecificTypePattern() "With0" -> With0() + "ConvertAsColumn" -> ConvertAsColumn() "PerRowCol" -> PerRowCol() "Explode0" -> Explode0() "Insert0" -> Insert0() diff --git a/plugins/kotlin-dataframe/testData/box/convertAsColumn.kt b/plugins/kotlin-dataframe/testData/box/convertAsColumn.kt new file mode 100644 index 0000000000..1a81fa98cf --- /dev/null +++ b/plugins/kotlin-dataframe/testData/box/convertAsColumn.kt @@ -0,0 +1,14 @@ +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.annotations.* +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.io.* + +fun box(): String { + val res = dataFrameOf("a")(1, 2, 3).convert { a }.asColumn { it.convertToString() } + val str: DataColumn = res.a + + val res1 = dataFrameOf("a")(1).convert { a }.asColumn { dataFrameOf("b", "c")(2, 3.0).asColumnGroup() } + val i: DataColumn = res1.a.b + val d: DataColumn = res1.a.c + return "OK" +} diff --git a/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java b/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java index 77a97df70b..5ddee255b2 100644 --- a/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java +++ b/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java @@ -94,6 +94,12 @@ public void testConflictingJvmDeclarations() { runTest("testData/box/conflictingJvmDeclarations.kt"); } + @Test + @TestMetadata("convertAsColumn.kt") + public void testConvertAsColumn() { + runTest("testData/box/convertAsColumn.kt"); + } + @Test @TestMetadata("convertToDataFrame.kt") public void testConvertToDataFrame() { @@ -157,8 +163,8 @@ public void testDiff() { @Test @TestMetadata("distinct.kt") public void testDistinct() { - runTest("testData/box/distinct.kt"); - } + runTest("testData/box/distinct.kt"); + } @Test @TestMetadata("dropNA.kt")