Skip to content

Commit 8756bb9

Browse files
committed
Add convert asColumn operation as compiler plugin friendly variant of replace with
1 parent 5f5b866 commit 8756bb9

File tree

11 files changed

+612
-3
lines changed

11 files changed

+612
-3
lines changed

core/api/core.api

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1434,10 +1434,13 @@ public final class org/jetbrains/kotlinx/dataframe/api/ConstructorsKt {
14341434
public final class org/jetbrains/kotlinx/dataframe/api/Convert {
14351435
public fun <init> (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/jvm/functions/Function2;)V
14361436
public final fun cast ()Lorg/jetbrains/kotlinx/dataframe/api/Convert;
1437+
public final fun getColumns ()Lkotlin/jvm/functions/Function2;
1438+
public final fun getDf ()Lorg/jetbrains/kotlinx/dataframe/DataFrame;
14371439
public fun toString ()Ljava/lang/String;
14381440
}
14391441

14401442
public final class org/jetbrains/kotlinx/dataframe/api/ConvertKt {
1443+
public static final fun asColumn (Lorg/jetbrains/kotlinx/dataframe/api/Convert;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
14411444
public static final fun asFrame (Lorg/jetbrains/kotlinx/dataframe/api/Convert;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
14421445
public static final fun convert (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/api/Convert;
14431446
public static final fun convert (Lorg/jetbrains/kotlinx/dataframe/DataFrame;[Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/api/Convert;

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import org.jetbrains.kotlinx.dataframe.annotations.Converter
2121
import org.jetbrains.kotlinx.dataframe.annotations.HasSchema
2222
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
2323
import org.jetbrains.kotlinx.dataframe.annotations.Refine
24+
import org.jetbrains.kotlinx.dataframe.columns.BaseColumn
2425
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
2526
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
2627
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
@@ -97,7 +98,10 @@ public inline fun <T, C, reified R> Convert<T, C?>.notNull(
9798
}
9899

99100
@HasSchema(schemaArg = 0)
100-
public class Convert<T, out C>(internal val df: DataFrame<T>, internal val columns: ColumnsSelector<T, C>) {
101+
public class Convert<T, out C>(
102+
@PublishedApi internal val df: DataFrame<T>,
103+
@PublishedApi internal val columns: ColumnsSelector<T, C>,
104+
) {
101105
public fun <R> cast(): Convert<T, R> = Convert(df, columns as ColumnsSelector<T, R>)
102106

103107
@Refine
@@ -129,6 +133,21 @@ public fun <T, C, R> Convert<T, DataRow<C>>.asFrame(
129133
body: ColumnsContainer<T>.(ColumnGroup<C>) -> DataFrame<R>,
130134
): DataFrame<T> = to { body(this, it.asColumnGroup()).asColumnGroup(it.name()) }
131135

136+
/**
137+
* Compiler plugin-friendly variant of [ReplaceClause.with]
138+
* [ReplaceClause.with] allows to change both column types and names.
139+
* Tracking of column name changes in arbitrary lambda expression is unreliable and generally impossible
140+
* to do statically.
141+
* This function ensures that all column names remain as is and only their type changes to [R]
142+
* Example:
143+
* `df.convert { colsOf<String>() }.asColumn { it.asList().parallelStream().map { heavyIO(it) }.toList().toColumn() }`
144+
*/
145+
@Refine
146+
@Interpretable("ConvertAsColumn")
147+
public inline fun <T, C, R> Convert<T, C>.asColumn(
148+
crossinline columnConverter: DataFrame<T>.(DataColumn<C>) -> BaseColumn<R>,
149+
): DataFrame<T> = df.replace(columns).with { columnConverter(df, it).rename(it.name()) }
150+
132151
@Refine
133152
@Interpretable("PerRowCol")
134153
public inline fun <T, C, reified R> Convert<T, C>.perRowCol(

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/replace.kt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,10 @@ public fun <T, C> ReplaceClause<T, C>.with(newColumns: List<AnyCol>): DataFrame<
5353
}
5454

5555
// TODO: Issue #418: breaks if running on ColumnGroup and its child
56+
57+
/**
58+
* For an alternative supported in the compiler plugin use [Convert.asColumn]
59+
*/
5660
public fun <T, C> ReplaceClause<T, C>.with(transform: ColumnsContainer<T>.(DataColumn<C>) -> AnyBaseCol): DataFrame<T> {
5761
val removeResult = df.removeImpl(columns = columns)
5862
val toInsert = removeResult.removedColumns.map {

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
1010
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
1111
import org.jetbrains.kotlinx.dataframe.api.add
1212
import org.jetbrains.kotlinx.dataframe.api.after
13+
import org.jetbrains.kotlinx.dataframe.api.asColumn
1314
import org.jetbrains.kotlinx.dataframe.api.asFrame
1415
import org.jetbrains.kotlinx.dataframe.api.asGroupBy
1516
import org.jetbrains.kotlinx.dataframe.api.at
@@ -87,6 +88,7 @@ import org.jetbrains.kotlinx.dataframe.api.sortWith
8788
import org.jetbrains.kotlinx.dataframe.api.split
8889
import org.jetbrains.kotlinx.dataframe.api.sum
8990
import org.jetbrains.kotlinx.dataframe.api.to
91+
import org.jetbrains.kotlinx.dataframe.api.toColumn
9092
import org.jetbrains.kotlinx.dataframe.api.toFloat
9193
import org.jetbrains.kotlinx.dataframe.api.toStart
9294
import org.jetbrains.kotlinx.dataframe.api.toMap
@@ -114,6 +116,7 @@ import org.junit.Test
114116
import java.net.URL
115117
import java.time.format.DateTimeFormatter
116118
import java.util.*
119+
import java.util.stream.Collectors
117120

118121
@Suppress("ktlint:standard:chain-method-continuation", "ktlint:standard:argument-list-wrapping")
119122
class Modify : TestBase() {
@@ -235,6 +238,16 @@ class Modify : TestBase() {
235238
// SampleEnd
236239
}
237240

241+
@Test
242+
@TransformDataFrameExpressions
243+
fun convertAsColumn() {
244+
// SampleStart
245+
df.convert { name }.asColumn { col ->
246+
col.toList().parallelStream().map { it.toString() }.collect(Collectors.toList()).toColumn()
247+
}
248+
// SampleEnd
249+
}
250+
238251
@Test
239252
@TransformDataFrameExpressions
240253
fun parseAll() {

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTreeTests.kt

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import org.jetbrains.kotlinx.dataframe.api.addId
1919
import org.jetbrains.kotlinx.dataframe.api.after
2020
import org.jetbrains.kotlinx.dataframe.api.aggregate
2121
import org.jetbrains.kotlinx.dataframe.api.append
22+
import org.jetbrains.kotlinx.dataframe.api.asColumn
2223
import org.jetbrains.kotlinx.dataframe.api.asColumnGroup
2324
import org.jetbrains.kotlinx.dataframe.api.asDataFrame
2425
import org.jetbrains.kotlinx.dataframe.api.asFrame
@@ -82,7 +83,9 @@ import org.jetbrains.kotlinx.dataframe.api.single
8283
import org.jetbrains.kotlinx.dataframe.api.sortBy
8384
import org.jetbrains.kotlinx.dataframe.api.split
8485
import org.jetbrains.kotlinx.dataframe.api.sumOf
86+
import org.jetbrains.kotlinx.dataframe.api.toColumn
8587
import org.jetbrains.kotlinx.dataframe.api.toColumnAccessor
88+
import org.jetbrains.kotlinx.dataframe.api.toStr
8689
import org.jetbrains.kotlinx.dataframe.api.toTop
8790
import org.jetbrains.kotlinx.dataframe.api.under
8891
import org.jetbrains.kotlinx.dataframe.api.ungroup
@@ -100,7 +103,9 @@ import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
100103
import org.jetbrains.kotlinx.dataframe.columns.depth
101104
import org.jetbrains.kotlinx.dataframe.hasNulls
102105
import org.junit.Test
106+
import java.util.stream.Collectors
103107
import kotlin.reflect.typeOf
108+
import kotlin.streams.toList
104109

105110
class DataFrameTreeTests : BaseTest() {
106111

@@ -485,6 +490,15 @@ class DataFrameTreeTests : BaseTest() {
485490
.sorted()
486491
}
487492

493+
@Test
494+
fun `convert column expression ignoring name changes`() {
495+
val res = df.convert { colsOf<Double?>() }.asColumn {
496+
it.toList().parallelStream().map { it.toString() }.collect(Collectors.toList()).toColumn("123")
497+
}
498+
499+
res shouldBe df.convert { colsOf<Double?>() }.toStr()
500+
}
501+
488502
@Test
489503
fun extensionPropertiesTest() {
490504
val code = CodeGenerator.create()

0 commit comments

Comments
 (0)