From 4afa8231311d1f8e35705ae79ceec5e7f559afdf Mon Sep 17 00:00:00 2001 From: Martin Rueckl Date: Mon, 3 Feb 2025 09:55:48 +0100 Subject: [PATCH] generalize toListImpl to support conversions into lists and sequences The previous implementation already was based on iterating over rows, with this generalization, the user can now decide whether to eagerly convert a dataframe into a list, or use lazy transformations via a sequence. --- core/api/core.api | 4 ++-- .../org/jetbrains/kotlinx/dataframe/api/toList.kt | 6 +++--- .../jetbrains/kotlinx/dataframe/api/toSequence.kt | 14 ++++++++++++++ .../impl/api/{toList.kt => toSequence.kt} | 8 ++++---- .../org/jetbrains/kotlinx/dataframe/api/toList.kt | 6 +++--- .../jetbrains/kotlinx/dataframe/api/toSequence.kt | 14 ++++++++++++++ .../impl/api/{toList.kt => toSequence.kt} | 8 ++++---- docs/StardustDocs/topics/collectionsInterop.md | 13 ++++++++++++- 8 files changed, 56 insertions(+), 17 deletions(-) create mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toSequence.kt rename core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/{toList.kt => toSequence.kt} (92%) create mode 100644 core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toSequence.kt rename core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/{toList.kt => toSequence.kt} (92%) diff --git a/core/api/core.api b/core/api/core.api index 4f7f9ea0c6..863620f272 100644 --- a/core/api/core.api +++ b/core/api/core.api @@ -9808,8 +9808,8 @@ public final class org/jetbrains/kotlinx/dataframe/impl/api/ToDataFrameKt { public static final fun createDataFrameImpl (Ljava/lang/Iterable;Lkotlin/reflect/KClass;Lkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; } -public final class org/jetbrains/kotlinx/dataframe/impl/api/ToListKt { - public static final fun toListImpl (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/reflect/KType;)Ljava/util/List; +public final class org/jetbrains/kotlinx/dataframe/impl/api/ToSequenceKt { + public static final fun toSequenceImpl (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/reflect/KType;)Ljava/lang/Iterable; } public final class org/jetbrains/kotlinx/dataframe/impl/api/UpdateKt { diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toList.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toList.kt index b9eb5a8739..672a87ed5c 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toList.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toList.kt @@ -2,13 +2,13 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.impl.api.toListImpl +import org.jetbrains.kotlinx.dataframe.impl.api.toSequenceImpl import kotlin.reflect.typeOf // region DataFrame -public inline fun DataFrame.toList(): List = toListImpl(typeOf()) as List +public inline fun DataFrame.toList(): List = toSequenceImpl(typeOf()).toList() as List -public inline fun AnyFrame.toListOf(): List = toListImpl(typeOf()) as List +public inline fun AnyFrame.toListOf(): List = toSequenceImpl(typeOf()).toList() as List // endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toSequence.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toSequence.kt new file mode 100644 index 0000000000..c1fb266c8a --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toSequence.kt @@ -0,0 +1,14 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.impl.api.toSequenceImpl +import kotlin.reflect.typeOf + +// region DataFrame + +public inline fun DataFrame.toSequence(): Sequence = toSequenceImpl(typeOf()) as Sequence + +public inline fun AnyFrame.toSequenceOf(): Sequence = toSequenceImpl(typeOf()) as Sequence + +// endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toList.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toSequence.kt similarity index 92% rename from core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toList.kt rename to core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toSequence.kt index fcf4b6cffe..fe3a0015cc 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toList.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toSequence.kt @@ -21,7 +21,7 @@ import kotlin.reflect.full.withNullability import kotlin.reflect.jvm.jvmErasure @PublishedApi -internal fun AnyFrame.toListImpl(type: KType): List { +internal fun AnyFrame.toSequenceImpl(type: KType): Sequence { val clazz = type.jvmErasure require(clazz.isData) { "`$clazz` is not a data class. `toList` is supported only for data classes." } @@ -46,7 +46,7 @@ internal fun AnyFrame.toListImpl(type: KType): List { val col: AnyCol = if (it.type.jvmErasure == List::class) { val elementType = it.type.arguments[0].type require(elementType != null) { "FrameColumn can not be converted to type `List<*>`" } - column.asAnyFrameColumn().map { it.toListImpl(elementType) } + column.asAnyFrameColumn().map { it.toSequenceImpl(elementType).toList() } } else { error("FrameColumn can not be converted to type `${it.type}`") } @@ -54,7 +54,7 @@ internal fun AnyFrame.toListImpl(type: KType): List { } ColumnKind.Group -> { - DataColumn.createValueColumn(column.name(), column.asColumnGroup().toListImpl(it.type)) + DataColumn.createValueColumn(column.name(), column.asColumnGroup().toSequenceImpl(it.type).toList()) } ColumnKind.Value -> { @@ -74,7 +74,7 @@ internal fun AnyFrame.toListImpl(type: KType): List { convertedColumn } - return rows().map { row -> + return rows().asSequence().map { row -> val parameters = convertedColumns .map { row[it] } .toTypedArray() diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toList.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toList.kt index b9eb5a8739..672a87ed5c 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toList.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toList.kt @@ -2,13 +2,13 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.impl.api.toListImpl +import org.jetbrains.kotlinx.dataframe.impl.api.toSequenceImpl import kotlin.reflect.typeOf // region DataFrame -public inline fun DataFrame.toList(): List = toListImpl(typeOf()) as List +public inline fun DataFrame.toList(): List = toSequenceImpl(typeOf()).toList() as List -public inline fun AnyFrame.toListOf(): List = toListImpl(typeOf()) as List +public inline fun AnyFrame.toListOf(): List = toSequenceImpl(typeOf()).toList() as List // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toSequence.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toSequence.kt new file mode 100644 index 0000000000..c1fb266c8a --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toSequence.kt @@ -0,0 +1,14 @@ +package org.jetbrains.kotlinx.dataframe.api + +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.impl.api.toSequenceImpl +import kotlin.reflect.typeOf + +// region DataFrame + +public inline fun DataFrame.toSequence(): Sequence = toSequenceImpl(typeOf()) as Sequence + +public inline fun AnyFrame.toSequenceOf(): Sequence = toSequenceImpl(typeOf()) as Sequence + +// endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toList.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toSequence.kt similarity index 92% rename from core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toList.kt rename to core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toSequence.kt index fcf4b6cffe..fe3a0015cc 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toList.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toSequence.kt @@ -21,7 +21,7 @@ import kotlin.reflect.full.withNullability import kotlin.reflect.jvm.jvmErasure @PublishedApi -internal fun AnyFrame.toListImpl(type: KType): List { +internal fun AnyFrame.toSequenceImpl(type: KType): Sequence { val clazz = type.jvmErasure require(clazz.isData) { "`$clazz` is not a data class. `toList` is supported only for data classes." } @@ -46,7 +46,7 @@ internal fun AnyFrame.toListImpl(type: KType): List { val col: AnyCol = if (it.type.jvmErasure == List::class) { val elementType = it.type.arguments[0].type require(elementType != null) { "FrameColumn can not be converted to type `List<*>`" } - column.asAnyFrameColumn().map { it.toListImpl(elementType) } + column.asAnyFrameColumn().map { it.toSequenceImpl(elementType).toList() } } else { error("FrameColumn can not be converted to type `${it.type}`") } @@ -54,7 +54,7 @@ internal fun AnyFrame.toListImpl(type: KType): List { } ColumnKind.Group -> { - DataColumn.createValueColumn(column.name(), column.asColumnGroup().toListImpl(it.type)) + DataColumn.createValueColumn(column.name(), column.asColumnGroup().toSequenceImpl(it.type).toList()) } ColumnKind.Value -> { @@ -74,7 +74,7 @@ internal fun AnyFrame.toListImpl(type: KType): List { convertedColumn } - return rows().map { row -> + return rows().asSequence().map { row -> val parameters = convertedColumns .map { row[it] } .toTypedArray() diff --git a/docs/StardustDocs/topics/collectionsInterop.md b/docs/StardustDocs/topics/collectionsInterop.md index 3ae7d45ae4..9291248db0 100644 --- a/docs/StardustDocs/topics/collectionsInterop.md +++ b/docs/StardustDocs/topics/collectionsInterop.md @@ -82,7 +82,7 @@ for Gradle or the [Kotlin Jupyter kernel](gettingStartedJupyterNotebook.md) -After your data is transformed, [`DataFrame`](DataFrame.md) instances can be exported +After your data is transformed, [`DataFrame`](DataFrame.md) instances can be exported eagerly into [`List`](https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.collections/-list/) of another data class using [toList](toList.md) or [toListOf](toList.md#tolistof) extensions: @@ -93,6 +93,17 @@ data class Output(val a: Int, val b: Int, val c: Int) val result = df2.toListOf() ``` +Alternatively, one can create lazy [`Sequence`](https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.collections/-sequence/) objects. +This avoids holding the entire list of objects in memory as objects are created on the fly as needed. + + + +```kotlin +data class Output(val a: Int, val b: Int, val c: Int) + +val result = df2.toSequenceOf() +``` + ### Converting columns with object instances to ColumnGroup