Skip to content

Add more operation to compiler plugin #1052

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Feb 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions core/api/core.api
Original file line number Diff line number Diff line change
Expand Up @@ -6003,16 +6003,23 @@ public final class org/jetbrains/kotlinx/dataframe/api/Merge {
}

public final class org/jetbrains/kotlinx/dataframe/api/MergeKt {
public static final fun asStrings (Lorg/jetbrains/kotlinx/dataframe/api/Merge;)Lorg/jetbrains/kotlinx/dataframe/api/Merge;
public static final fun by (Lorg/jetbrains/kotlinx/dataframe/api/Merge;Ljava/lang/CharSequence;Ljava/lang/CharSequence;Ljava/lang/CharSequence;ILjava/lang/CharSequence;)Lorg/jetbrains/kotlinx/dataframe/api/Merge;
public static synthetic fun by$default (Lorg/jetbrains/kotlinx/dataframe/api/Merge;Ljava/lang/CharSequence;Ljava/lang/CharSequence;Ljava/lang/CharSequence;ILjava/lang/CharSequence;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/api/Merge;
public static final fun asStrings (Lorg/jetbrains/kotlinx/dataframe/api/Merge;)Lorg/jetbrains/kotlinx/dataframe/api/MergeWithTransform;
public static final fun by (Lorg/jetbrains/kotlinx/dataframe/api/Merge;Ljava/lang/CharSequence;Ljava/lang/CharSequence;Ljava/lang/CharSequence;ILjava/lang/CharSequence;)Lorg/jetbrains/kotlinx/dataframe/api/MergeWithTransform;
public static synthetic fun by$default (Lorg/jetbrains/kotlinx/dataframe/api/Merge;Ljava/lang/CharSequence;Ljava/lang/CharSequence;Ljava/lang/CharSequence;ILjava/lang/CharSequence;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/api/MergeWithTransform;
public static final fun into (Lorg/jetbrains/kotlinx/dataframe/api/Merge;Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun into (Lorg/jetbrains/kotlinx/dataframe/api/Merge;Lorg/jetbrains/kotlinx/dataframe/columns/ColumnAccessor;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun into (Lorg/jetbrains/kotlinx/dataframe/api/Merge;Lorg/jetbrains/kotlinx/dataframe/columns/ColumnPath;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun into (Lorg/jetbrains/kotlinx/dataframe/api/MergeWithTransform;Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun into (Lorg/jetbrains/kotlinx/dataframe/api/MergeWithTransform;Lorg/jetbrains/kotlinx/dataframe/columns/ColumnPath;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun intoList (Lorg/jetbrains/kotlinx/dataframe/api/Merge;)Ljava/util/List;
public static final fun intoList (Lorg/jetbrains/kotlinx/dataframe/api/MergeWithTransform;)Ljava/util/List;
public static final fun merge (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/api/Merge;
public static final fun merge (Lorg/jetbrains/kotlinx/dataframe/DataFrame;[Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/api/Merge;
public static final fun notNull (Lorg/jetbrains/kotlinx/dataframe/api/Merge;)Lorg/jetbrains/kotlinx/dataframe/api/Merge;
public static final fun notNullList (Lorg/jetbrains/kotlinx/dataframe/api/Merge;)Lorg/jetbrains/kotlinx/dataframe/api/Merge;
}

public final class org/jetbrains/kotlinx/dataframe/api/MergeWithTransform {
public fun <init> (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/jvm/functions/Function2;ZLkotlin/jvm/functions/Function2;Lkotlin/reflect/KType;Lorg/jetbrains/kotlinx/dataframe/api/Infer;)V
}

public final class org/jetbrains/kotlinx/dataframe/api/MinKt {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public abstract class AggregateDsl<out T> :
DataFrame<T>,
ColumnSelectionDsl<T> {

@Interpretable("GroupByInto")
@Interpretable("AggregateDslInto")
public inline infix fun <reified R> R.into(name: String): NamedValue =
internal().yield(pathOf(name), this, typeOf<R>())

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ public fun <T> Convert<T, *>.to(type: KType): DataFrame<T> = to { it.convertTo(t
public fun <T, C> Convert<T, C>.to(columnConverter: DataFrame<T>.(DataColumn<C>) -> AnyBaseCol): DataFrame<T> =
df.replace(columns).with { columnConverter(df, it) }

@Refine
@Interpretable("With0")
public inline fun <T, C, reified R> Convert<T, C>.with(
infer: Infer = Infer.Nulls,
Expand All @@ -126,6 +127,8 @@ public fun <T, C, R> Convert<T, DataRow<C>>.asFrame(
body: ColumnsContainer<T>.(ColumnGroup<C>) -> DataFrame<R>,
): DataFrame<T> = to { body(this, it.asColumnGroup()).asColumnGroup(it.name()) }

@Refine
@Interpretable("PerRowCol")
public inline fun <T, C, reified R> Convert<T, C>.perRowCol(
infer: Infer = Infer.Nulls,
noinline expression: RowColumnExpression<T, C, R>,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ public data class Gather<T, C, K, R>(
public fun <T, C, K, R> Gather<T, C, K, R>.into(keyColumn: String, valueColumn: String): DataFrame<T> =
gatherImpl(keyColumn, valueColumn)

@AccessApiOverload
public fun <T, C, K, R> Gather<T, C, K, R>.into(
keyColumn: ColumnAccessor<K>,
valueColumn: ColumnAccessor<R>,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import org.jetbrains.kotlinx.dataframe.AnyRow
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.RowExpression
import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
import org.jetbrains.kotlinx.dataframe.annotations.Refine
import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
import org.jetbrains.kotlinx.dataframe.impl.aggregation.internal
import org.jetbrains.kotlinx.dataframe.impl.aggregation.withExpr
Expand All @@ -14,6 +16,8 @@ import kotlin.reflect.typeOf

// region GroupBy

@Refine
@Interpretable("GroupByInto")
public fun <T, G> GroupBy<T, G>.into(column: String): DataFrame<T> = toDataFrame(column)

@AccessApiOverload
Expand Down
60 changes: 52 additions & 8 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/merge.kt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import org.jetbrains.kotlinx.dataframe.ColumnsSelector
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
import org.jetbrains.kotlinx.dataframe.annotations.Refine
import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
Expand All @@ -16,6 +18,7 @@ import kotlin.reflect.KProperty
import kotlin.reflect.KType
import kotlin.reflect.typeOf

@Interpretable("Merge0")
public fun <T, C> DataFrame<T>.merge(selector: ColumnsSelector<T, C>): Merge<T, C, List<C>> =
Merge(this, selector, false, { it }, typeOf<Any?>(), Infer.Type)

Expand Down Expand Up @@ -44,19 +47,51 @@ public data class Merge<T, C, R>(
internal val infer: Infer,
)

public fun <T, C, R> Merge<T, C, R>.notNull(): Merge<T, C, R> = copy(notNull = true)
public class MergeWithTransform<T, C, R>(
internal val df: DataFrame<T>,
internal val selector: ColumnsSelector<T, C>,
internal val notNull: Boolean,
internal val transform: DataRow<T>.(List<C>) -> R,
internal val resultType: KType,
internal val infer: Infer,
)

@Interpretable("MergeId")
public fun <T, C, R> Merge<T, C, R>.notNull(): Merge<T, C & Any, R> = copy(notNull = true) as Merge<T, C & Any, R>

@JvmName("notNullList")
@Interpretable("MergeId")
public fun <T, C, R> Merge<T, C, List<R>>.notNull(): Merge<T, C & Any, List<R & Any>> =
copy(notNull = true) as Merge<T, C & Any, List<R & Any>>

@Refine
@Interpretable("MergeInto0")
public fun <T, C, R> MergeWithTransform<T, C, R>.into(columnName: String): DataFrame<T> = into(pathOf(columnName))

@Refine
@Interpretable("MergeInto0")
public fun <T, C, R> Merge<T, C, R>.into(columnName: String): DataFrame<T> = into(pathOf(columnName))

@AccessApiOverload
public fun <T, C, R> Merge<T, C, R>.into(column: ColumnAccessor<*>): DataFrame<T> = into(column.path())
public inline fun <T, C, reified R> Merge<T, C, R>.into(column: ColumnAccessor<*>): DataFrame<T> = into(column.path())

@AccessApiOverload
public inline fun <T, C, reified R> MergeWithTransform<T, C, R>.into(column: ColumnAccessor<*>): DataFrame<T> =
into(column.path())

public fun <T, C, R> Merge<T, C, R>.intoList(): List<R> =
df.select(selector).rows().map { transform(it, it.values() as List<C>) }

public fun <T, C, R> Merge<T, C, R>.into(path: ColumnPath): DataFrame<T> {
public fun <T, C, R> MergeWithTransform<T, C, R>.intoList(): List<R> =
df.select(selector).rows().map { transform(it, it.values() as List<C>) }

public fun <T, C, R> MergeWithTransform<T, C, R>.into(path: ColumnPath): DataFrame<T> {
// If target path exists, merge into temp path
val mergePath = if (df.getColumnOrNull(path) != null) pathOf(nameGenerator().addUnique("temp")) else path
val mergePath = if (df.getColumnOrNull(path) != null) {
pathOf(df.nameGenerator().addUnique("temp"))
} else {
path
}

// move columns into group
val grouped = df.move(selector).under { mergePath }
Expand All @@ -82,16 +117,21 @@ public fun <T, C, R> Merge<T, C, R>.into(path: ColumnPath): DataFrame<T> {
return res
}

public fun <T, C, R> Merge<T, C, R>.asStrings(): Merge<T, C, String> = by(", ")
public fun <T, C, R> Merge<T, C, R>.into(path: ColumnPath): DataFrame<T> =
MergeWithTransform(df, selector, notNull, transform, resultType, infer).into(path)

@Interpretable("MergeId")
public fun <T, C, R> Merge<T, C, R>.asStrings(): MergeWithTransform<T, C, String> = by(", ")

@Interpretable("MergeBy0")
public fun <T, C, R> Merge<T, C, R>.by(
separator: CharSequence = ", ",
prefix: CharSequence = "",
postfix: CharSequence = "",
limit: Int = -1,
truncated: CharSequence = "...",
): Merge<T, C, String> =
Merge(
): MergeWithTransform<T, C, String> =
MergeWithTransform(
df = df,
selector = selector,
notNull = notNull,
Expand All @@ -108,7 +148,11 @@ public fun <T, C, R> Merge<T, C, R>.by(
infer = Infer.Nulls,
)

@Interpretable("MergeBy1")
public inline fun <T, C, R, reified V> Merge<T, C, R>.by(
infer: Infer = Infer.Nulls,
crossinline transform: DataRow<T>.(R) -> V,
): Merge<T, C, V> = Merge(df, selector, notNull, { transform(this@by.transform(this, it)) }, typeOf<V>(), infer)
): MergeWithTransform<T, C, V> =
MergeWithTransform(df, selector, notNull, {
transform(this@by.transform(this, it))
}, typeOf<V>(), infer)
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ import kotlin.reflect.KProperty

// region DataFrame

@Refine
@Interpretable("RenameMapping")
public fun <T> DataFrame<T>.rename(vararg mappings: Pair<String, String>): DataFrame<T> =
rename { mappings.map { it.first.toColumnAccessor() }.toColumnSet() }
.into(*mappings.map { it.second }.toTypedArray())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ import org.jetbrains.kotlinx.dataframe.ColumnsSelector
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.Selector
import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
import org.jetbrains.kotlinx.dataframe.annotations.Refine
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
import org.jetbrains.kotlinx.dataframe.impl.api.reorderImpl
Expand Down Expand Up @@ -52,6 +54,8 @@ public fun <T, V : Comparable<V>> DataFrame<T>.reorderColumnsBy(
inFrameColumns = atAnyDepth,
).reorderImpl(desc, expression)

@Refine
@Interpretable("ReorderColumnsByName")
public fun <T> DataFrame<T>.reorderColumnsByName(atAnyDepth: Boolean = true, desc: Boolean = false): DataFrame<T> =
reorderColumnsBy(atAnyDepth, desc) { name() }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import org.jetbrains.kotlinx.dataframe.api.GroupBy
import org.jetbrains.kotlinx.dataframe.api.GroupClause
import org.jetbrains.kotlinx.dataframe.api.InsertClause
import org.jetbrains.kotlinx.dataframe.api.Merge
import org.jetbrains.kotlinx.dataframe.api.MergeWithTransform
import org.jetbrains.kotlinx.dataframe.api.MoveClause
import org.jetbrains.kotlinx.dataframe.api.Pivot
import org.jetbrains.kotlinx.dataframe.api.PivotGroupBy
Expand Down Expand Up @@ -165,6 +166,7 @@ public object KotlinNotebookPluginUtils {
is SplitWithTransform<*, *, *>,
is Split<*, *>,
is Merge<*, *, *>,
is MergeWithTransform<*, *, *>,
is Gather<*, *, *, *>,
is Update<*, *>,
is Convert<*, *>,
Expand Down Expand Up @@ -213,6 +215,13 @@ public object KotlinNotebookPluginUtils {
),
)

is MergeWithTransform<*, *, *> -> dataframeLike.into(
generateRandomVariationOfColumnName(
"merged",
dataframeLike.df.columnNames(),
),
)

is Gather<*, *, *, *> -> dataframeLike.into(
generateRandomVariationOfColumnName("key", dataframeLike.df.columnNames()),
generateRandomVariationOfColumnName("value", dataframeLike.df.columnNames()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.api.ExcessiveColumns
import org.jetbrains.kotlinx.dataframe.api.GroupBy
import org.jetbrains.kotlinx.dataframe.api.Infer
import org.jetbrains.kotlinx.dataframe.api.Merge
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
import org.jetbrains.kotlinx.dataframe.api.add
import org.jetbrains.kotlinx.dataframe.api.addAll
Expand Down Expand Up @@ -190,6 +191,7 @@ import org.jetbrains.kotlinx.dataframe.typeClass
import org.junit.Test
import java.math.BigDecimal
import java.time.LocalDate
import kotlin.reflect.KType
import kotlin.reflect.jvm.jvmErasure
import kotlin.reflect.typeOf

Expand Down Expand Up @@ -1397,6 +1399,35 @@ class DataFrameTests : BaseTest() {
res shouldBe expected
}

@Test
fun `merge into temp`() {
dataFrameOf("a", "b", "temp")(1, null, 3)
.merge { cols("a", "b") }.into("b")
}

inline fun <T, reified C, R> Merge<T, C, R>.typeOfElement() = typeOf<C>()

@Test
fun `merge not null`() {
val merge = dataFrameOf("a", "b")(1, null).merge { col("a") }
merge.typeOfElement() shouldBe typeOf<Any?>()
merge.notNull().typeOfElement() shouldBe typeOf<Any>()
}

inline fun <reified T> List<T>.typeOfElement(): KType = typeOf<List<T>>().arguments[0].type!!

@Test
fun `merge cols into list`() {
val merge = dataFrameOf("a", "b")(1, null).merge { col("a") }
merge.intoList().typeOfElement() shouldBe typeOf<List<Any?>>()
merge.by { it }.intoList().typeOfElement() shouldBe typeOf<List<Any?>>()
// here we can safely narrow down List<Any?> to List<Any> after notNull because the default transformer creates a List from C
merge.notNull().intoList().typeOfElement() shouldBe typeOf<List<Any>>()
// if by notNull could go after by { },
// we won't be able to do so because non-default transformer could introduce nulls itself:
merge.notNull().by { listOf(1, null) }.intoList().typeOfElement() shouldBe typeOf<List<Int?>>()
}

@Test
fun `generic column type`() {
val d = typed.convert { city }.with { it?.toCharArray()?.toList() ?: emptyList() }
Expand Down
10 changes: 5 additions & 5 deletions docs/StardustDocs/topics/reorder.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Returns [`DataFrame`](DataFrame.md) with a new order of selected columns.
```text
reorder { columns }
[.cast<ColumnType>() ]
.by { columnExpression } | .byDesc { columnExpression } | .byName(desc = false) { columnExpression }
.by { columnExpression } | .byDesc { columnExpression } | .byName(desc = false)

columnExpression: DataColumn.(DataColumn) -> Value
```
Expand Down Expand Up @@ -74,19 +74,19 @@ df.reorder { name }.byName(desc = true) // [name.lastName, name.firstName]
Reorders all columns

```text
reorderColumnsBy(dfs = true, desc = false) { columnExpression }
reorderColumnsBy(atAnyDepth = true, desc = false) { columnExpression }
```

**Parameters:**
* `dfs` — reorder columns inside [`ColumnGroups`](DataColumn.md#columngroup) and [`FrameColumn`](DataColumn.md#framecolumn) recursively
* `atAnyDepth` — reorder columns inside [`ColumnGroups`](DataColumn.md#columngroup) and [`FrameColumn`](DataColumn.md#framecolumn) recursively
* `desc` — apply descending order

## reorderColumnsByName

```text
reorderColumnsByName(dfs = true, desc = false)
reorderColumnsByName(atAnyDepth = true, desc = false)
```

**Parameters:**
* `dfs` — reorder columns inside [`ColumnGroups`](DataColumn.md#columngroup) and [`FrameColumn`](DataColumn.md#framecolumn) recursively
* `atAnyDepth` — reorder columns inside [`ColumnGroups`](DataColumn.md#columngroup) and [`FrameColumn`](DataColumn.md#framecolumn) recursively
* `desc` — apply descending order
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
@file:Suppress("INVISIBLE_REFERENCE")

package org.jetbrains.kotlinx.dataframe.plugin.impl

import org.jetbrains.kotlinx.dataframe.AnyCol
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.asDataColumn
import org.jetbrains.kotlinx.dataframe.api.cast
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnGroupImpl
import org.jetbrains.kotlinx.dataframe.plugin.impl.api.TypeApproximation

fun PluginDataFrameSchema.asDataFrame(): DataFrame<ConeTypesAdapter> {
Expand All @@ -28,11 +26,10 @@ private fun List<SimpleCol>.map(): DataFrame<ConeTypesAdapter> {
return dataFrameOf(columns).cast()
}

@Suppress("INVISIBLE_REFERENCE")
fun SimpleCol.asDataColumn(): DataColumn<*> {
val column = when (this) {
is SimpleDataColumn -> DataColumn.createByType(this.name, listOf(this.type))
is SimpleColumnGroup -> DataColumn.createColumnGroup(this.name, this.columns().map()) as ColumnGroupImpl<*>
is SimpleColumnGroup -> DataColumn.createColumnGroup(this.name, this.columns().map()).asDataColumn()
is SimpleFrameColumn -> DataColumn.createFrameColumn(this.name, listOf(this.columns().map()))
}
return column
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@ import org.jetbrains.kotlinx.dataframe.plugin.impl.SimpleColumnGroup
import org.jetbrains.kotlinx.dataframe.plugin.impl.dataFrame
import org.jetbrains.kotlinx.dataframe.plugin.impl.simpleColumnOf
import org.jetbrains.kotlinx.dataframe.plugin.impl.dsl
import org.jetbrains.kotlinx.dataframe.plugin.impl.ignore
import org.jetbrains.kotlinx.dataframe.plugin.impl.type

typealias TypeApproximation = Marker

class Add : AbstractSchemaModificationInterpreter() {
val Arguments.receiver: PluginDataFrameSchema by dataFrame()
val Arguments.name: String by arg()
val Arguments.infer by ignore()
val Arguments.type: TypeApproximation by type(name("expression"))

override fun Arguments.interpret(): PluginDataFrameSchema {
Expand Down
Loading