Skip to content

Add convert asColumn operation as compiler plugin friendly variant oа replace with #1143

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions core/api/core.api
Original file line number Diff line number Diff line change
Expand Up @@ -1434,10 +1434,13 @@ public final class org/jetbrains/kotlinx/dataframe/api/ConstructorsKt {
public final class org/jetbrains/kotlinx/dataframe/api/Convert {
public fun <init> (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/jvm/functions/Function2;)V
public final fun cast ()Lorg/jetbrains/kotlinx/dataframe/api/Convert;
public final fun getColumns ()Lkotlin/jvm/functions/Function2;
public final fun getDf ()Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public fun toString ()Ljava/lang/String;
}

public final class org/jetbrains/kotlinx/dataframe/api/ConvertKt {
public static final fun asColumn (Lorg/jetbrains/kotlinx/dataframe/api/Convert;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun asFrame (Lorg/jetbrains/kotlinx/dataframe/api/Convert;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun convert (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/api/Convert;
public static final fun convert (Lorg/jetbrains/kotlinx/dataframe/DataFrame;[Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/api/Convert;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import org.jetbrains.kotlinx.dataframe.annotations.Converter
import org.jetbrains.kotlinx.dataframe.annotations.HasSchema
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
import org.jetbrains.kotlinx.dataframe.annotations.Refine
import org.jetbrains.kotlinx.dataframe.columns.BaseColumn
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
Expand Down Expand Up @@ -97,7 +98,10 @@ public inline fun <T, C, reified R> Convert<T, C?>.notNull(
}

@HasSchema(schemaArg = 0)
public class Convert<T, out C>(internal val df: DataFrame<T>, internal val columns: ColumnsSelector<T, C>) {
public class Convert<T, out C>(
@PublishedApi internal val df: DataFrame<T>,
@PublishedApi internal val columns: ColumnsSelector<T, C>,
) {
public fun <R> cast(): Convert<T, R> = Convert(df, columns as ColumnsSelector<T, R>)

@Refine
Expand Down Expand Up @@ -129,6 +133,21 @@ public fun <T, C, R> Convert<T, DataRow<C>>.asFrame(
body: ColumnsContainer<T>.(ColumnGroup<C>) -> DataFrame<R>,
): DataFrame<T> = to { body(this, it.asColumnGroup()).asColumnGroup(it.name()) }

/**
* Compiler plugin-friendly variant of [ReplaceClause.with]
* [ReplaceClause.with] allows to change both column types and names.
* Tracking of column name changes in arbitrary lambda expression is unreliable and generally impossible
* to do statically.
* This function ensures that all column names remain as is and only their type changes to [R]
* Example:
* `df.convert { colsOf<String>() }.asColumn { it.asList().parallelStream().map { heavyIO(it) }.toList().toColumn() }`
*/
@Refine
@Interpretable("ConvertAsColumn")
public inline fun <T, C, R> Convert<T, C>.asColumn(
crossinline columnConverter: DataFrame<T>.(DataColumn<C>) -> BaseColumn<R>,
): DataFrame<T> = df.replace(columns).with { columnConverter(df, it).rename(it.name()) }

@Refine
@Interpretable("PerRowCol")
public inline fun <T, C, reified R> Convert<T, C>.perRowCol(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ public fun <T, C> ReplaceClause<T, C>.with(newColumns: List<AnyCol>): DataFrame<
}

// TODO: Issue #418: breaks if running on ColumnGroup and its child

/**
* For an alternative supported in the compiler plugin use [Convert.asColumn]
*/
public fun <T, C> ReplaceClause<T, C>.with(transform: ColumnsContainer<T>.(DataColumn<C>) -> AnyBaseCol): DataFrame<T> {
val removeResult = df.removeImpl(columns = columns)
val toInsert = removeResult.removedColumns.map {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
import org.jetbrains.kotlinx.dataframe.api.add
import org.jetbrains.kotlinx.dataframe.api.after
import org.jetbrains.kotlinx.dataframe.api.asColumn
import org.jetbrains.kotlinx.dataframe.api.asFrame
import org.jetbrains.kotlinx.dataframe.api.asGroupBy
import org.jetbrains.kotlinx.dataframe.api.at
Expand Down Expand Up @@ -87,6 +88,7 @@ import org.jetbrains.kotlinx.dataframe.api.sortWith
import org.jetbrains.kotlinx.dataframe.api.split
import org.jetbrains.kotlinx.dataframe.api.sum
import org.jetbrains.kotlinx.dataframe.api.to
import org.jetbrains.kotlinx.dataframe.api.toColumn
import org.jetbrains.kotlinx.dataframe.api.toFloat
import org.jetbrains.kotlinx.dataframe.api.toStart
import org.jetbrains.kotlinx.dataframe.api.toMap
Expand Down Expand Up @@ -114,6 +116,7 @@ import org.junit.Test
import java.net.URL
import java.time.format.DateTimeFormatter
import java.util.*
import java.util.stream.Collectors

@Suppress("ktlint:standard:chain-method-continuation", "ktlint:standard:argument-list-wrapping")
class Modify : TestBase() {
Expand Down Expand Up @@ -235,6 +238,16 @@ class Modify : TestBase() {
// SampleEnd
}

@Test
@TransformDataFrameExpressions
fun convertAsColumn() {
// SampleStart
df.convert { name }.asColumn { col ->
col.toList().parallelStream().map { it.toString() }.collect(Collectors.toList()).toColumn()
}
// SampleEnd
}

@Test
@TransformDataFrameExpressions
fun parseAll() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import org.jetbrains.kotlinx.dataframe.api.addId
import org.jetbrains.kotlinx.dataframe.api.after
import org.jetbrains.kotlinx.dataframe.api.aggregate
import org.jetbrains.kotlinx.dataframe.api.append
import org.jetbrains.kotlinx.dataframe.api.asColumn
import org.jetbrains.kotlinx.dataframe.api.asColumnGroup
import org.jetbrains.kotlinx.dataframe.api.asDataFrame
import org.jetbrains.kotlinx.dataframe.api.asFrame
Expand Down Expand Up @@ -82,7 +83,9 @@ import org.jetbrains.kotlinx.dataframe.api.single
import org.jetbrains.kotlinx.dataframe.api.sortBy
import org.jetbrains.kotlinx.dataframe.api.split
import org.jetbrains.kotlinx.dataframe.api.sumOf
import org.jetbrains.kotlinx.dataframe.api.toColumn
import org.jetbrains.kotlinx.dataframe.api.toColumnAccessor
import org.jetbrains.kotlinx.dataframe.api.toStr
import org.jetbrains.kotlinx.dataframe.api.toTop
import org.jetbrains.kotlinx.dataframe.api.under
import org.jetbrains.kotlinx.dataframe.api.ungroup
Expand All @@ -100,7 +103,9 @@ import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
import org.jetbrains.kotlinx.dataframe.columns.depth
import org.jetbrains.kotlinx.dataframe.hasNulls
import org.junit.Test
import java.util.stream.Collectors
import kotlin.reflect.typeOf
import kotlin.streams.toList

class DataFrameTreeTests : BaseTest() {

Expand Down Expand Up @@ -485,6 +490,15 @@ class DataFrameTreeTests : BaseTest() {
.sorted()
}

@Test
fun `convert column expression ignoring name changes`() {
val res = df.convert { colsOf<Double?>() }.asColumn {
it.toList().parallelStream().map { it.toString() }.collect(Collectors.toList()).toColumn("123")
}

res shouldBe df.convert { colsOf<Double?>() }.toStr()
}

@Test
fun extensionPropertiesTest() {
val code = CodeGenerator.create()
Expand Down
Loading
Loading