Skip to content

generalize toListImpl to support conversions into lists and sequences #1046

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions core/api/core.api
Original file line number Diff line number Diff line change
Expand Up @@ -9808,8 +9808,8 @@ public final class org/jetbrains/kotlinx/dataframe/impl/api/ToDataFrameKt {
public static final fun createDataFrameImpl (Ljava/lang/Iterable;Lkotlin/reflect/KClass;Lkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
}

public final class org/jetbrains/kotlinx/dataframe/impl/api/ToListKt {
public static final fun toListImpl (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/reflect/KType;)Ljava/util/List;
public final class org/jetbrains/kotlinx/dataframe/impl/api/ToSequenceKt {
public static final fun toSequenceImpl (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/reflect/KType;)Ljava/lang/Iterable;
}

public final class org/jetbrains/kotlinx/dataframe/impl/api/UpdateKt {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@ package org.jetbrains.kotlinx.dataframe.api

import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.impl.api.toListImpl
import org.jetbrains.kotlinx.dataframe.impl.api.toSequenceImpl
import kotlin.reflect.typeOf

// region DataFrame

public inline fun <reified T> DataFrame<T>.toList(): List<T> = toListImpl(typeOf<T>()) as List<T>
public inline fun <reified T> DataFrame<T>.toList(): List<T> = toSequenceImpl(typeOf<T>()).toList() as List<T>

public inline fun <reified T> AnyFrame.toListOf(): List<T> = toListImpl(typeOf<T>()) as List<T>
public inline fun <reified T> AnyFrame.toListOf(): List<T> = toSequenceImpl(typeOf<T>()).toList() as List<T>

// endregion
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package org.jetbrains.kotlinx.dataframe.api

import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.impl.api.toSequenceImpl
import kotlin.reflect.typeOf

// region DataFrame

public inline fun <reified T> DataFrame<T>.toSequence(): Sequence<T> = toSequenceImpl(typeOf<T>()) as Sequence<T>

public inline fun <reified T> AnyFrame.toSequenceOf(): Sequence<T> = toSequenceImpl(typeOf<T>()) as Sequence<T>

// endregion
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import kotlin.reflect.full.withNullability
import kotlin.reflect.jvm.jvmErasure

@PublishedApi
internal fun AnyFrame.toListImpl(type: KType): List<Any> {
internal fun AnyFrame.toSequenceImpl(type: KType): Sequence<Any> {
val clazz = type.jvmErasure
require(clazz.isData) { "`$clazz` is not a data class. `toList` is supported only for data classes." }

Expand All @@ -46,15 +46,15 @@ internal fun AnyFrame.toListImpl(type: KType): List<Any> {
val col: AnyCol = if (it.type.jvmErasure == List::class) {
val elementType = it.type.arguments[0].type
require(elementType != null) { "FrameColumn can not be converted to type `List<*>`" }
column.asAnyFrameColumn().map { it.toListImpl(elementType) }
column.asAnyFrameColumn().map { it.toSequenceImpl(elementType).toList() }
} else {
error("FrameColumn can not be converted to type `${it.type}`")
}
col
}

ColumnKind.Group -> {
DataColumn.createValueColumn(column.name(), column.asColumnGroup().toListImpl(it.type))
DataColumn.createValueColumn(column.name(), column.asColumnGroup().toSequenceImpl(it.type).toList())
}

ColumnKind.Value -> {
Expand All @@ -74,7 +74,7 @@ internal fun AnyFrame.toListImpl(type: KType): List<Any> {
convertedColumn
}

return rows().map { row ->
return rows().asSequence().map { row ->
val parameters = convertedColumns
.map { row[it] }
.toTypedArray()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@ package org.jetbrains.kotlinx.dataframe.api

import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.impl.api.toListImpl
import org.jetbrains.kotlinx.dataframe.impl.api.toSequenceImpl
import kotlin.reflect.typeOf

// region DataFrame

public inline fun <reified T> DataFrame<T>.toList(): List<T> = toListImpl(typeOf<T>()) as List<T>
public inline fun <reified T> DataFrame<T>.toList(): List<T> = toSequenceImpl(typeOf<T>()).toList() as List<T>

public inline fun <reified T> AnyFrame.toListOf(): List<T> = toListImpl(typeOf<T>()) as List<T>
public inline fun <reified T> AnyFrame.toListOf(): List<T> = toSequenceImpl(typeOf<T>()).toList() as List<T>

// endregion
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package org.jetbrains.kotlinx.dataframe.api

import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.impl.api.toSequenceImpl
import kotlin.reflect.typeOf

// region DataFrame

public inline fun <reified T> DataFrame<T>.toSequence(): Sequence<T> = toSequenceImpl(typeOf<T>()) as Sequence<T>

public inline fun <reified T> AnyFrame.toSequenceOf(): Sequence<T> = toSequenceImpl(typeOf<T>()) as Sequence<T>

// endregion
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import kotlin.reflect.full.withNullability
import kotlin.reflect.jvm.jvmErasure

@PublishedApi
internal fun AnyFrame.toListImpl(type: KType): List<Any> {
internal fun AnyFrame.toSequenceImpl(type: KType): Sequence<Any> {
val clazz = type.jvmErasure
require(clazz.isData) { "`$clazz` is not a data class. `toList` is supported only for data classes." }

Expand All @@ -46,15 +46,15 @@ internal fun AnyFrame.toListImpl(type: KType): List<Any> {
val col: AnyCol = if (it.type.jvmErasure == List::class) {
val elementType = it.type.arguments[0].type
require(elementType != null) { "FrameColumn can not be converted to type `List<*>`" }
column.asAnyFrameColumn().map { it.toListImpl(elementType) }
column.asAnyFrameColumn().map { it.toSequenceImpl(elementType).toList() }
} else {
error("FrameColumn can not be converted to type `${it.type}`")
}
col
}

ColumnKind.Group -> {
DataColumn.createValueColumn(column.name(), column.asColumnGroup().toListImpl(it.type))
DataColumn.createValueColumn(column.name(), column.asColumnGroup().toSequenceImpl(it.type).toList())
}

ColumnKind.Value -> {
Expand All @@ -74,7 +74,7 @@ internal fun AnyFrame.toListImpl(type: KType): List<Any> {
convertedColumn
}

return rows().map { row ->
return rows().asSequence().map { row ->
val parameters = convertedColumns
.map { row[it] }
.toTypedArray()
Expand Down
13 changes: 12 additions & 1 deletion docs/StardustDocs/topics/collectionsInterop.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ for Gradle or the [Kotlin Jupyter kernel](gettingStartedJupyterNotebook.md)

</tip>

After your data is transformed, [`DataFrame`](DataFrame.md) instances can be exported
After your data is transformed, [`DataFrame`](DataFrame.md) instances can be exported eagerly
into [`List`](https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.collections/-list/) of another data class using [toList](toList.md) or [toListOf](toList.md#tolistof) extensions:

<!---FUN listInterop4-->
Expand All @@ -93,6 +93,17 @@ data class Output(val a: Int, val b: Int, val c: Int)
val result = df2.toListOf<Output>()
```

Alternatively, one can create lazy [`Sequence`](https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.collections/-sequence/) objects.
This avoids holding the entire list of objects in memory as objects are created on the fly as needed.

<!---FUN listInterop5-->

```kotlin
data class Output(val a: Int, val b: Int, val c: Int)

val result = df2.toSequenceOf<Output>()
```

<!---END-->

### Converting columns with object instances to ColumnGroup
Expand Down
Loading