diff --git a/core/api/core.api b/core/api/core.api index 2839a3b16f..98a4dce3ae 100644 --- a/core/api/core.api +++ b/core/api/core.api @@ -4328,10 +4328,11 @@ public final class org/jetbrains/kotlinx/dataframe/api/TypeConversionsKt { } public final class org/jetbrains/kotlinx/dataframe/api/UnfoldKt { - public static final fun unfold (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final fun unfold (Lorg/jetbrains/kotlinx/dataframe/DataFrame;[Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun unfold (Lorg/jetbrains/kotlinx/dataframe/DataFrame;[Lkotlin/reflect/KCallable;ILkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final fun unfold (Lorg/jetbrains/kotlinx/dataframe/DataFrame;[Lkotlin/reflect/KProperty;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final fun unfold (Lorg/jetbrains/kotlinx/dataframe/DataFrame;[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun unfold$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;[Lkotlin/reflect/KCallable;ILkotlin/jvm/functions/Function2;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; } public final class org/jetbrains/kotlinx/dataframe/api/UngroupKt { @@ -5611,6 +5612,10 @@ public final class org/jetbrains/kotlinx/dataframe/impl/api/ToSequenceKt { public static final fun toSequenceImpl (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/reflect/KType;)Lkotlin/sequences/Sequence; } +public final class org/jetbrains/kotlinx/dataframe/impl/api/UnfoldKt { + public static final fun unfoldImpl (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; +} + public final class org/jetbrains/kotlinx/dataframe/impl/api/UpdateKt { public static final fun updateImpl (Lorg/jetbrains/kotlinx/dataframe/api/Update;Lkotlin/jvm/functions/Function3;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/unfold.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/unfold.kt index c4598b2995..a03f57f1a8 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/unfold.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/unfold.kt @@ -6,28 +6,23 @@ import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload -import org.jetbrains.kotlinx.dataframe.columns.ColumnKind +import org.jetbrains.kotlinx.dataframe.annotations.Interpretable +import org.jetbrains.kotlinx.dataframe.annotations.Refine import org.jetbrains.kotlinx.dataframe.columns.toColumnSet -import org.jetbrains.kotlinx.dataframe.impl.api.canBeUnfolded -import org.jetbrains.kotlinx.dataframe.impl.api.createDataFrameImpl -import org.jetbrains.kotlinx.dataframe.typeClass +import org.jetbrains.kotlinx.dataframe.impl.api.unfoldImpl +import kotlin.reflect.KCallable import kotlin.reflect.KProperty -public inline fun DataColumn.unfold(): AnyCol = - when (kind()) { - ColumnKind.Group, ColumnKind.Frame -> this +public inline fun DataColumn.unfold(vararg roots: KCallable<*>, maxDepth: Int = 0): AnyCol = + unfoldImpl { properties(roots = roots, maxDepth) } - else -> when { - !typeClass.canBeUnfolded -> this - - else -> values() - .createDataFrameImpl(typeClass) { (this as CreateDataFrameDsl).properties() } - .asColumnGroup(name()) - .asDataColumn() - } - } - -public fun DataFrame.unfold(columns: ColumnsSelector): DataFrame = replace(columns).with { it.unfold() } +@Refine +@Interpretable("DataFrameUnfold") +public fun DataFrame.unfold( + vararg roots: KCallable<*>, + maxDepth: Int = 0, + columns: ColumnsSelector, +): DataFrame = replace(columns).with { it.unfoldImpl { properties(roots = roots, maxDepth) } } public fun DataFrame.unfold(vararg columns: String): DataFrame = unfold { columns.toColumnSet() } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/unfold.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/unfold.kt new file mode 100644 index 0000000000..24503f7e6c --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/unfold.kt @@ -0,0 +1,24 @@ +package org.jetbrains.kotlinx.dataframe.impl.api + +import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.api.CreateDataFrameDsl +import org.jetbrains.kotlinx.dataframe.api.asColumnGroup +import org.jetbrains.kotlinx.dataframe.api.asDataColumn +import org.jetbrains.kotlinx.dataframe.columns.ColumnKind +import org.jetbrains.kotlinx.dataframe.typeClass + +@PublishedApi +internal fun DataColumn.unfoldImpl(body: CreateDataFrameDsl.() -> Unit): AnyCol = + when (kind()) { + ColumnKind.Group, ColumnKind.Frame -> this + + else -> when { + !typeClass.canBeUnfolded -> this + + else -> values() + .createDataFrameImpl(typeClass) { (this as CreateDataFrameDsl).body() } + .asColumnGroup(name()) + .asDataColumn() + } + } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/unfold.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/unfold.kt new file mode 100644 index 0000000000..a5683326bb --- /dev/null +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/unfold.kt @@ -0,0 +1,68 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import io.kotest.matchers.types.shouldBeInstanceOf +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.junit.Test +import kotlin.reflect.typeOf + +class UnfoldTests { + @Test + fun unfold() { + val df = dataFrameOf( + "col" to listOf(A("123", 321)), + ) + + val res = df.unfold { col("col") } + res[pathOf("col", "str")][0] shouldBe "123" + res[pathOf("col", "i")][0] shouldBe 321 + } + + @Test + fun `unfold deep`() { + val df1 = dataFrameOf( + "col" to listOf( + Group( + "1", + listOf( + Person("Alice", "Cooper", 15, "London"), + Person("Bob", "Dylan", 45, "Dubai"), + ), + ), + Group( + "2", + listOf( + Person("Charlie", "Daniels", 20, "Moscow"), + Person("Charlie", "Chaplin", 40, "Milan"), + ), + ), + ), + ) + + df1.unfold { col("col") }[pathOf("col", "participants")].type() shouldBe typeOf>() + + df1.unfold(maxDepth = 2) { col("col") }[pathOf("col", "participants")][0].shouldBeInstanceOf { + it["firstName"][0] shouldBe "Alice" + } + } + + @Test + fun `keep value type`() { + val values = listOf(1, 2, 3, 4) + val df2 = dataFrameOf("int" to values) + val column = df2.unfold { col("int") }["int"] + column.type() shouldBe typeOf() + column.values() shouldBe values + } + + data class A(val str: String, val i: Int) + + data class Person( + val firstName: String, + val lastName: String, + val age: Int, + val city: String?, + ) + + data class Group(val id: String, val participants: List) +} diff --git a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/toDataFrame.kt b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/toDataFrame.kt index d299db62a6..8c84d5d6d9 100644 --- a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/toDataFrame.kt +++ b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/toDataFrame.kt @@ -197,61 +197,6 @@ internal fun KotlinTypeFacade.toDataFrame( arg: ConeTypeProjection, traverseConfiguration: TraverseConfiguration, ): PluginDataFrameSchema { - - val anyType = session.builtinTypes.nullableAnyType.type - - fun ConeKotlinType.isValueType() = - this.isArrayTypeOrNullableArrayType || - this.classId == StandardClassIds.Unit || - this.classId == StandardClassIds.Any || - this.classId == StandardClassIds.Map || - this.classId == StandardClassIds.MutableMap || - this.classId == StandardClassIds.String || - this.classId in StandardClassIds.primitiveTypes || - this.classId in StandardClassIds.unsignedTypes || - classId in setOf( - Names.DURATION_CLASS_ID, - Names.LOCAL_DATE_CLASS_ID, - Names.LOCAL_DATE_TIME_CLASS_ID, - Names.INSTANT_CLASS_ID, - Names.DATE_TIME_PERIOD_CLASS_ID, - Names.DATE_TIME_UNIT_CLASS_ID, - Names.TIME_ZONE_CLASS_ID - ) || - this.isSubtypeOf( - StandardClassIds.Number.constructClassLikeType(emptyArray(), isNullable = true), - session - ) || - this.toRegularClassSymbol(session)?.isEnumClass ?: false || - this.isSubtypeOf( - Names.TEMPORAL_ACCESSOR_CLASS_ID.constructClassLikeType(emptyArray(), isNullable = true), session - ) || - this.isSubtypeOf( - Names.TEMPORAL_AMOUNT_CLASS_ID.constructClassLikeType(emptyArray(), isNullable = true), session - ) - - - fun FirNamedFunctionSymbol.isGetterLike(): Boolean { - val functionName = this.name.asString() - return (functionName.startsWith("get") || functionName.startsWith("is")) && - this.valueParameterSymbols.isEmpty() && - this.typeParameterSymbols.isEmpty() - } - - fun ConeKotlinType.hasProperties(): Boolean { - val symbol = this.toRegularClassSymbol(session) as? FirClassSymbol<*> ?: return false - val scope = symbol.unsubstitutedScope( - session, - ScopeSession(), - withForcedTypeCalculator = false, - memberRequiredPhase = null - ) - - return scope.collectAllProperties().any { it.visibility == Visibilities.Public } || - scope.collectAllFunctions().any { it.visibility == Visibilities.Public && it.isGetterLike() } - } - - val excludes = traverseConfiguration.excludeProperties.mapNotNullTo(mutableSetOf()) { it.calleeReference.toResolvedPropertySymbol() } val excludedClasses = traverseConfiguration.excludeClasses.mapTo(mutableSetOf()) { it.argument.resolvedType } @@ -322,7 +267,7 @@ internal fun KotlinTypeFacade.toDataFrame( val keepSubtree = depth >= maxDepth && !fieldKind.shouldBeConvertedToColumnGroup && !fieldKind.shouldBeConvertedToFrameColumn - if (keepSubtree || returnType.isValueType() || returnType.classId in preserveClasses || it in preserveProperties) { + if (keepSubtree || returnType.isValueType(session) || returnType.classId in preserveClasses || it in preserveProperties) { SimpleDataColumn( name, TypeApproximation( @@ -349,7 +294,7 @@ internal fun KotlinTypeFacade.toDataFrame( ConeStarProjection -> session.builtinTypes.nullableAnyType.type else -> session.builtinTypes.nullableAnyType.type } - if (type.isValueType()) { + if (type.isValueType(session)) { val columnType = List.constructClassLikeType(arrayOf(type), returnType.isNullable) .withNullability(ConeNullability.create(makeNullable), session.typeContext) .wrap() @@ -364,7 +309,7 @@ internal fun KotlinTypeFacade.toDataFrame( } arg.type?.let { type -> - if (type.isValueType() || !type.hasProperties()) { + if (!type.canBeUnfolded(session)) { return PluginDataFrameSchema(listOf(simpleColumnOf("value", type))) } } @@ -383,6 +328,60 @@ internal fun KotlinTypeFacade.toDataFrame( } } +fun ConeKotlinType.canBeUnfolded(session: FirSession): Boolean = + !isValueType(session) && hasProperties(session) + +private fun ConeKotlinType.isValueType(session: FirSession) = + this.isArrayTypeOrNullableArrayType || + this.classId == StandardClassIds.Unit || + this.classId == StandardClassIds.Any || + this.classId == StandardClassIds.Map || + this.classId == StandardClassIds.MutableMap || + this.classId == StandardClassIds.String || + this.classId in StandardClassIds.primitiveTypes || + this.classId in StandardClassIds.unsignedTypes || + classId in setOf( + Names.DURATION_CLASS_ID, + Names.LOCAL_DATE_CLASS_ID, + Names.LOCAL_DATE_TIME_CLASS_ID, + Names.INSTANT_CLASS_ID, + Names.DATE_TIME_PERIOD_CLASS_ID, + Names.DATE_TIME_UNIT_CLASS_ID, + Names.TIME_ZONE_CLASS_ID + ) || + this.isSubtypeOf( + StandardClassIds.Number.constructClassLikeType(emptyArray(), isNullable = true), + session + ) || + this.toRegularClassSymbol(session)?.isEnumClass ?: false || + this.isSubtypeOf( + Names.TEMPORAL_ACCESSOR_CLASS_ID.constructClassLikeType(emptyArray(), isNullable = true), session + ) || + this.isSubtypeOf( + Names.TEMPORAL_AMOUNT_CLASS_ID.constructClassLikeType(emptyArray(), isNullable = true), session + ) + + +private fun ConeKotlinType.hasProperties(session: FirSession): Boolean { + val symbol = this.toRegularClassSymbol(session) as? FirClassSymbol<*> ?: return false + val scope = symbol.unsubstitutedScope( + session, + ScopeSession(), + withForcedTypeCalculator = false, + memberRequiredPhase = null + ) + + return scope.collectAllProperties().any { it.visibility == Visibilities.Public } || + scope.collectAllFunctions().any { it.visibility == Visibilities.Public && it.isGetterLike() } +} + +private fun FirNamedFunctionSymbol.isGetterLike(): Boolean { + val functionName = this.name.asString() + return (functionName.startsWith("get") || functionName.startsWith("is")) && + this.valueParameterSymbols.isEmpty() && + this.typeParameterSymbols.isEmpty() +} + // org.jetbrains.kotlinx.dataframe.codeGen.getFieldKind private fun ConeKotlinType.getFieldKind(session: FirSession) = FieldKind.of( this, diff --git a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/unfold.kt b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/unfold.kt new file mode 100644 index 0000000000..b0836cae77 --- /dev/null +++ b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/unfold.kt @@ -0,0 +1,38 @@ +package org.jetbrains.kotlinx.dataframe.plugin.impl.api + +import org.jetbrains.kotlinx.dataframe.api.replace +import org.jetbrains.kotlinx.dataframe.api.with +import org.jetbrains.kotlinx.dataframe.plugin.impl.AbstractSchemaModificationInterpreter +import org.jetbrains.kotlinx.dataframe.plugin.impl.Arguments +import org.jetbrains.kotlinx.dataframe.plugin.impl.PluginDataFrameSchema +import org.jetbrains.kotlinx.dataframe.plugin.impl.Present +import org.jetbrains.kotlinx.dataframe.plugin.impl.SimpleColumnGroup +import org.jetbrains.kotlinx.dataframe.plugin.impl.SimpleDataColumn +import org.jetbrains.kotlinx.dataframe.plugin.impl.asDataColumn +import org.jetbrains.kotlinx.dataframe.plugin.impl.asDataFrame +import org.jetbrains.kotlinx.dataframe.plugin.impl.asSimpleColumn +import org.jetbrains.kotlinx.dataframe.plugin.impl.dataFrame +import org.jetbrains.kotlinx.dataframe.plugin.impl.ignore +import org.jetbrains.kotlinx.dataframe.plugin.impl.toPluginDataFrameSchema + +class DataFrameUnfold : AbstractSchemaModificationInterpreter() { + val Arguments.receiver: PluginDataFrameSchema by dataFrame() + val Arguments.properties by ignore() + val Arguments.maxDepth: Int by arg(defaultValue = Present(0)) + val Arguments.columns: ColumnsResolver by arg() + + override fun Arguments.interpret(): PluginDataFrameSchema { + return receiver.asDataFrame().replace { columns }.with { + val column = it.asSimpleColumn() as? SimpleDataColumn + if (column != null) { + if (!column.type.type.canBeUnfolded(session)) { + it + } else { + SimpleColumnGroup(it.name(), toDataFrame(maxDepth, column.type.type, TraverseConfiguration()).columns()).asDataColumn() + } + } else { + it + } + }.toPluginDataFrameSchema() + } +} diff --git a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/loadInterpreter.kt b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/loadInterpreter.kt index 2ada4aceac..8bfbdb268f 100644 --- a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/loadInterpreter.kt +++ b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/loadInterpreter.kt @@ -105,6 +105,7 @@ import org.jetbrains.kotlinx.dataframe.plugin.impl.api.ConcatWithKeys import org.jetbrains.kotlinx.dataframe.plugin.impl.api.DataFrameBuilderInvoke0 import org.jetbrains.kotlinx.dataframe.plugin.impl.api.DataFrameOf0 import org.jetbrains.kotlinx.dataframe.plugin.impl.api.DataFrameOf3 +import org.jetbrains.kotlinx.dataframe.plugin.impl.api.DataFrameUnfold import org.jetbrains.kotlinx.dataframe.plugin.impl.api.DataFrameXs import org.jetbrains.kotlinx.dataframe.plugin.impl.api.Drop0 import org.jetbrains.kotlinx.dataframe.plugin.impl.api.Drop1 @@ -472,6 +473,7 @@ internal inline fun String.load(): T { "DataFrameXs" -> DataFrameXs() "GroupByXs" -> GroupByXs() "ConcatWithKeys" -> ConcatWithKeys() + "DataFrameUnfold" -> DataFrameUnfold() else -> error("$this") } as T } diff --git a/plugins/kotlin-dataframe/testData/box/unfold.kt b/plugins/kotlin-dataframe/testData/box/unfold.kt new file mode 100644 index 0000000000..6e407b110b --- /dev/null +++ b/plugins/kotlin-dataframe/testData/box/unfold.kt @@ -0,0 +1,47 @@ +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.annotations.* +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.io.* + +data class A(val str: String, val i: Int) + +data class Person(val firstName: String, val lastName: String, val age: Int, val city: String?) + +data class Group(val id: String, val participants: List) + +fun box(): String { + val df = dataFrameOf( + "col" to listOf(A("123", 321)) + ) + + val res = df.unfold { col } + val str: String = res.col.str[0] + val i: Int = res.col.i[0] + + + val df1 = dataFrameOf( + "col" to listOf( + Group("1", listOf( + Person("Alice", "Cooper", 15, "London"), + Person("Bob", "Dylan", 45, "Dubai") + )), + Group("2", listOf( + Person("Charlie", "Daniels", 20, "Moscow"), + Person("Charlie", "Chaplin", 40, "Milan"), + )), + ) + ) + + val res1: DataColumn> = df1.unfold { col }.col.participants + + val res2: DataColumn> = df1.unfold(maxDepth = 2) { col }.col.participants + + val res3: DataColumn = df1.unfold(maxDepth = 2) { col }.col.participants[0].firstName + + val df2 = dataFrameOf( + "int" to listOf(1, 2, 3, 4) + ) + + val res4: DataColumn = df2.unfold { int }.int + return "OK" +} diff --git a/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java b/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java index b63aad4b04..2910a80138 100644 --- a/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java +++ b/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java @@ -688,6 +688,12 @@ public void testTransformReplaceFunctionCall() { runTest("testData/box/transformReplaceFunctionCall.kt"); } + @Test + @TestMetadata("unfold.kt") + public void testUnfold() { + runTest("testData/box/unfold.kt"); + } + @Test @TestMetadata("ungroup.kt") public void testUngroup() {