From a7e48745242dba823e847ffbf6766d323187871d Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 17 Oct 2024 14:38:37 +0200 Subject: [PATCH 01/14] adding debug mode check for nulls in FrameColumnImpl and simplifying KType.toColumnKind() --- .../dataframe/impl/columns/FrameColumnImpl.kt | 10 +++++++++ .../kotlinx/dataframe/impl/columns/Utils.kt | 20 ++++++++++------- .../kotlinx/dataframe/columns/DataColumns.kt | 22 +++++++++++++++++++ 3 files changed, 44 insertions(+), 8 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/FrameColumnImpl.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/FrameColumnImpl.kt index d07c0b9d0d..930ab72fbf 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/FrameColumnImpl.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/FrameColumnImpl.kt @@ -1,12 +1,14 @@ package org.jetbrains.kotlinx.dataframe.impl.columns import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.BuildConfig import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.api.schema import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext import org.jetbrains.kotlinx.dataframe.columns.FrameColumn +import org.jetbrains.kotlinx.dataframe.impl.anyNull import org.jetbrains.kotlinx.dataframe.impl.createStarProjectedType import org.jetbrains.kotlinx.dataframe.impl.schema.intersectSchemas import org.jetbrains.kotlinx.dataframe.nrow @@ -26,6 +28,14 @@ internal open class FrameColumnImpl constructor( ), FrameColumn { + init { + // Checks for nulls in the `values` list. + // This only runs with `kotlin.dataframe.debug=true` in gradle.properties. + if (BuildConfig.DEBUG) { + require(!values.anyNull()) { "FrameColumn cannot null values." } + } + } + override fun rename(newName: String) = FrameColumnImpl(newName, values, schema, distinct) override fun defaultValue() = null diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/Utils.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/Utils.kt index 4df8f5cda9..4eac81874f 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/Utils.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/Utils.kt @@ -2,10 +2,11 @@ package org.jetbrains.kotlinx.dataframe.impl.columns import org.jetbrains.kotlinx.dataframe.AnyBaseCol import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.AnyRow import org.jetbrains.kotlinx.dataframe.ColumnsContainer import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.api.asColumnGroup import org.jetbrains.kotlinx.dataframe.api.cast import org.jetbrains.kotlinx.dataframe.api.name @@ -43,7 +44,6 @@ import org.jetbrains.kotlinx.dataframe.nrow import org.jetbrains.kotlinx.dataframe.type import kotlin.reflect.KType import kotlin.reflect.full.isSubtypeOf -import kotlin.reflect.jvm.jvmErasure import kotlin.reflect.typeOf internal fun BaseColumn.checkEquals(other: Any?): Boolean { @@ -471,13 +471,17 @@ internal fun List>.allColumnsExceptKeepingStructure( return subtrees.map { it.data!!.addPath(it.pathFromRoot()) } } +/** + * Retrieves the correct [ColumnKind] based on the [type][KType] of the values in the column. + * + * NOTE: nullable DataFrames cannot become a [FrameColumns][FrameColumn], + * so they become [ValueColumns][ValueColumn] instead. + */ internal fun KType.toColumnKind(): ColumnKind = - jvmErasure.let { - when (it) { - DataFrame::class -> ColumnKind.Frame - DataRow::class -> ColumnKind.Group - else -> ColumnKind.Value - } + when { + this.isSubtypeOf(typeOf()) -> ColumnKind.Frame + this.isSubtypeOf(typeOf()) -> ColumnKind.Group + else -> ColumnKind.Value } internal fun ColumnsResolver.resolve( diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/columns/DataColumns.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/columns/DataColumns.kt index 6061b8aba3..d79ce5cf98 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/columns/DataColumns.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/columns/DataColumns.kt @@ -1,6 +1,10 @@ package org.jetbrains.kotlinx.dataframe.columns +import io.kotest.assertions.throwables.shouldThrow import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf import org.jetbrains.kotlinx.dataframe.api.toColumn import org.jetbrains.kotlinx.dataframe.api.toDataFrame import org.junit.Test @@ -28,4 +32,22 @@ class DataColumns { val col = listOf(URI.create("http://example.com"), null).toColumn("a") col.type().toString() shouldBe "java.net.URI?" } + + @Test + fun `allow no nulls in frame columns`() { + // enable kotlin.dataframe.debug=true for this + shouldThrow { + DataColumn.createFrameColumn( + name = "", + groups = listOf(dataFrameOf("a")(1), null) as List, + ) + } + + shouldThrow { + DataColumn.create( + "", + listOf(dataFrameOf("a")(1), null), + ) + } + } } From e29991cf5c5efe04daa0f5635b0f5625198f1dd0 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 17 Oct 2024 14:54:43 +0200 Subject: [PATCH 02/14] renamed guessColumnType -> createColumnGuessingType to better reflect what it does --- .../kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt | 4 ++-- .../org/jetbrains/kotlinx/dataframe/api/inferType.kt | 4 ++-- .../org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt | 6 +++--- .../kotlinx/dataframe/impl/ColumnDataCollector.kt | 4 ++-- .../kotlinx/dataframe/impl/aggregation/getColumns.kt | 4 ++-- .../org/jetbrains/kotlinx/dataframe/impl/api/concat.kt | 4 ++-- .../kotlinx/dataframe/impl/columns/constructors.kt | 8 +++----- .../kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt | 6 +++--- 8 files changed, 19 insertions(+), 21 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt index b61c9ae2dd..b4e8e86236 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt @@ -20,7 +20,7 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnGroupImpl import org.jetbrains.kotlinx.dataframe.impl.columns.FrameColumnImpl import org.jetbrains.kotlinx.dataframe.impl.columns.ValueColumnImpl import org.jetbrains.kotlinx.dataframe.impl.columns.addPath -import org.jetbrains.kotlinx.dataframe.impl.columns.guessColumnType +import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnKind import org.jetbrains.kotlinx.dataframe.impl.getValuesType import org.jetbrains.kotlinx.dataframe.impl.splitByIndices @@ -98,7 +98,7 @@ public interface DataColumn : BaseColumn { name: String, values: List, nullable: Boolean? = null, - ): DataColumn = guessColumnType(name, values, nullable = nullable) + ): DataColumn = createColumnGuessingType(name, values, nullable = nullable) public fun create( name: String, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/inferType.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/inferType.kt index eab0222ee2..96a66253a0 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/inferType.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/inferType.kt @@ -6,11 +6,11 @@ import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.columns.ColumnReference import org.jetbrains.kotlinx.dataframe.columns.toColumnSet -import org.jetbrains.kotlinx.dataframe.impl.columns.guessColumnType +import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType import org.jetbrains.kotlinx.dataframe.type import kotlin.reflect.KProperty -public fun AnyCol.inferType(): DataColumn<*> = guessColumnType(name, toList(), type, true) +public fun AnyCol.inferType(): DataColumn<*> = createColumnGuessingType(name, toList(), type, true) // region DataFrame diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt index cdc56fccb8..56c98ec80d 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt @@ -13,7 +13,7 @@ import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator import org.jetbrains.kotlinx.dataframe.impl.api.createDataFrameImpl import org.jetbrains.kotlinx.dataframe.impl.asList import org.jetbrains.kotlinx.dataframe.impl.columnName -import org.jetbrains.kotlinx.dataframe.impl.columns.guessColumnType +import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType import org.jetbrains.kotlinx.dataframe.index import kotlin.reflect.KCallable import kotlin.reflect.KClass @@ -121,12 +121,12 @@ public fun Iterable>.toDataFrameFromPairs(): Da @JvmName("toDataFrameColumnPathAnyNullable") public fun Iterable>>.toDataFrameFromPairs(): AnyFrame = map { - it.first to guessColumnType(it.first.last(), it.second.asList()) + it.first to createColumnGuessingType(it.first.last(), it.second.asList()) }.toDataFrameFromPairs() public fun Iterable>>.toDataFrameFromPairs(): AnyFrame = map { - ColumnPath(it.first) to guessColumnType(it.first, it.second.asList()) + ColumnPath(it.first) to createColumnGuessingType(it.first, it.second.asList()) }.toDataFrameFromPairs() public interface TraversePropertiesDsl { diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/ColumnDataCollector.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/ColumnDataCollector.kt index 2d7b7e7d79..16423a28a5 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/ColumnDataCollector.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/ColumnDataCollector.kt @@ -7,7 +7,7 @@ import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.api.concat import org.jetbrains.kotlinx.dataframe.api.toDataFrame -import org.jetbrains.kotlinx.dataframe.impl.columns.guessColumnType +import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType import kotlin.reflect.KClass import kotlin.reflect.KType import kotlin.reflect.full.isSubclassOf @@ -54,7 +54,7 @@ internal abstract class DataCollectorBase(initCapacity: Int) : DataCollector< internal open class ColumnDataCollector(initCapacity: Int = 0, val typeOf: (KClass<*>) -> KType) : DataCollectorBase(initCapacity) { - override fun toColumn(name: String) = guessColumnType(name, values) + override fun toColumn(name: String) = createColumnGuessingType(name, values) } internal class TypedColumnDataCollector(initCapacity: Int = 0, val type: KType, val checkTypes: Boolean = true) : diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/getColumns.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/getColumns.kt index 46dd1129f1..283ec46109 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/getColumns.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/getColumns.kt @@ -7,7 +7,7 @@ import org.jetbrains.kotlinx.dataframe.aggregation.NamedValue import org.jetbrains.kotlinx.dataframe.api.filter import org.jetbrains.kotlinx.dataframe.api.isComparable import org.jetbrains.kotlinx.dataframe.api.isNumber -import org.jetbrains.kotlinx.dataframe.impl.columns.guessColumnType +import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType internal inline fun Aggregatable.remainingColumns( crossinline predicate: (AnyCol) -> Boolean, @@ -19,7 +19,7 @@ internal fun Aggregatable.comparableColumns() = internal fun Aggregatable.numberColumns() = remainingColumns { it.isNumber() } as ColumnsSelector internal fun NamedValue.toColumnWithPath() = - path to guessColumnType( + path to createColumnGuessingType( name = path.last(), values = listOf(value), suggestedType = type, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/concat.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/concat.kt index fbb86bd012..98508d8935 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/concat.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/concat.kt @@ -9,7 +9,7 @@ import org.jetbrains.kotlinx.dataframe.api.dataFrameOf import org.jetbrains.kotlinx.dataframe.api.emptyDataFrame import org.jetbrains.kotlinx.dataframe.api.isColumnGroup import org.jetbrains.kotlinx.dataframe.hasNulls -import org.jetbrains.kotlinx.dataframe.impl.columns.guessColumnType +import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType import org.jetbrains.kotlinx.dataframe.impl.commonType import org.jetbrains.kotlinx.dataframe.impl.getListType import org.jetbrains.kotlinx.dataframe.impl.guessValueType @@ -73,7 +73,7 @@ internal fun concatImpl(name: String, columns: List?>, columnS } else { getListType(baseType.withNullability(listOfNullable)) } - return guessColumnType( + return createColumnGuessingType( name = name, values = list, suggestedType = tartypeOf, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt index bb9fcc8ab5..2e240c4d2c 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt @@ -87,7 +87,7 @@ internal fun ColumnsContainer.newColumnWithActualType( expression: AddExpression, ): DataColumn { val (_, values) = computeValues(this as DataFrame, expression) - return guessColumnType(name, values) + return createColumnGuessingType(name, values) } internal fun computeValues(df: DataFrame, expression: AddExpression): Pair> { @@ -129,7 +129,7 @@ internal fun createColumn(values: Iterable, suggestedType: KType, guessTy ).asDataColumn().cast() guessType -> - guessColumnType( + createColumnGuessingType( name = "", values = values.asList(), suggestedType = suggestedType, @@ -217,10 +217,8 @@ internal fun Array.toNumberColumns() = toColumnsSetOf() // endregion -internal fun guessColumnType(name: String, values: List) = guessColumnType(name, values, null) - @PublishedApi -internal fun guessColumnType( +internal fun createColumnGuessingType( name: String, values: List, suggestedType: KType? = null, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt index 9c06fc58a5..0386206651 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt @@ -4,7 +4,7 @@ import com.github.kittinunf.fuel.httpGet import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.api.toDataFrame -import org.jetbrains.kotlinx.dataframe.impl.columns.guessColumnType +import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType import java.io.File import java.io.IOException import java.io.InputStream @@ -33,7 +33,7 @@ public fun List>.toDataFrame(containsColumns: Boolean = false): AnyF if (it.isEmpty()) return@mapNotNull null val name = it[0].toString() val values = it.drop(1) - guessColumnType(name, values) + createColumnGuessingType(name, values) }.toDataFrame() } @@ -50,7 +50,7 @@ public fun List>.toDataFrame(containsColumns: Boolean = false): AnyF row[colIndex] } } - guessColumnType(name, values) + createColumnGuessingType(name, values) }.toDataFrame() } } From 6f733542120484c352dac2043f2ef47a30268b68 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 17 Oct 2024 14:55:38 +0200 Subject: [PATCH 03/14] small kdoc enhancements to clarify DataColumn.createX functions --- .../kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt | 8 ++++++++ .../jetbrains/kotlinx/dataframe/columns/ColumnGroup.kt | 2 ++ .../jetbrains/kotlinx/dataframe/columns/FrameColumn.kt | 2 ++ .../jetbrains/kotlinx/dataframe/columns/ValueColumn.kt | 2 ++ 4 files changed, 14 insertions(+) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt index b4e8e86236..ee40522187 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt @@ -100,6 +100,10 @@ public interface DataColumn : BaseColumn { nullable: Boolean? = null, ): DataColumn = createColumnGuessingType(name, values, nullable = nullable) + /** + * Calls [createColumnGroup], [createFrameColumn], or [createValueColumn] based on + * [type] without checking the actual values in [values]. + */ public fun create( name: String, values: List, @@ -112,6 +116,10 @@ public interface DataColumn : BaseColumn { ColumnKind.Frame -> createFrameColumn(name, values as List).asDataColumn().cast() } + /** + * Calls [createColumnGroup], [createFrameColumn], or [createValueColumn] based on + * type [T] without checking the actual values in [values]. + */ public inline fun create(name: String, values: List, infer: Infer = Infer.None): DataColumn = create(name, values, typeOf(), infer) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnGroup.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnGroup.kt index 794e50ced8..88ed161b5f 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnGroup.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnGroup.kt @@ -23,6 +23,8 @@ import kotlin.reflect.KProperty * - [ColumnAccessor] created by [columnGroup] delegate * - explicit cast using [asColumnGroup] * + * Can be instantiated by [DataColumn.createColumnGroup]. + * * @param T Schema marker. See [DataFrame] for details. */ @HasSchema(schemaArg = 0) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/FrameColumn.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/FrameColumn.kt index 70b988740f..2e7fc6f229 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/FrameColumn.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/FrameColumn.kt @@ -7,6 +7,8 @@ import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema /** * Column that stores values of type [DataFrame] * + * Can be instantiated by [DataColumn.createFrameColumn]. + * * @param T schema marker of contained dataframes. See [DataFrame] for details. */ public interface FrameColumn : DataColumn> { diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ValueColumn.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ValueColumn.kt index a1d8984293..17c92a44d5 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ValueColumn.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ValueColumn.kt @@ -6,6 +6,8 @@ import kotlin.reflect.KProperty /** * Column that stores values. * + * Can be instantiated by [DataColumn.createValueColumn]. + * * @param T - type of values */ public interface ValueColumn : DataColumn { From 798edb09bfdf0ede52d9815ef9a69964f99630a8 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 17 Oct 2024 16:46:36 +0200 Subject: [PATCH 04/14] renamed DataColumn.create to DataColumn.crateUnsafe and added clarifying KDocs for that suite of functions to explain what they're for --- .../jetbrains/kotlinx/dataframe/DataColumn.kt | 99 ++++++++++++++++--- .../kotlinx/dataframe/api/TypeConversions.kt | 18 ++-- .../kotlinx/dataframe/api/constructors.kt | 8 +- .../jetbrains/kotlinx/dataframe/api/map.kt | 8 +- .../jetbrains/kotlinx/dataframe/api/sort.kt | 2 +- .../kotlinx/dataframe/api/valueCounts.kt | 4 +- .../kotlinx/dataframe/impl/api/parse.kt | 2 +- .../dataframe/impl/columns/FrameColumnImpl.kt | 11 +++ .../dataframe/impl/columns/constructors.kt | 4 +- .../kotlinx/dataframe/columns/DataColumns.kt | 2 +- gradle.properties | 2 +- .../dataframe/plugin/impl/DataFrameAdapter.kt | 2 +- 12 files changed, 126 insertions(+), 36 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt index ee40522187..d835609267 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt @@ -25,6 +25,9 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnKind import org.jetbrains.kotlinx.dataframe.impl.getValuesType import org.jetbrains.kotlinx.dataframe.impl.splitByIndices import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema +import org.jetbrains.kotlinx.dataframe.util.CREATE_FRAME_COLUMN +import org.jetbrains.kotlinx.dataframe.util.CREATE_FRAME_COLUMN_IMPORT +import org.jetbrains.kotlinx.dataframe.util.CREATE_FRAME_COLUMN_REPLACE import kotlin.reflect.KClass import kotlin.reflect.KProperty import kotlin.reflect.KType @@ -45,6 +48,9 @@ public interface DataColumn : BaseColumn { /** * Creates [ValueColumn] using given [name], [values] and [type]. * + * Be careful; values are NOT checked to adhere to [type] for efficiency, + * unless you specify [infer]. + * * @param name name of the column * @param values list of column values * @param type type of the column @@ -56,11 +62,20 @@ public interface DataColumn : BaseColumn { type: KType, infer: Infer = Infer.None, defaultValue: T? = null, - ): ValueColumn = ValueColumnImpl(values, name, getValuesType(values, type, infer), defaultValue) + ): ValueColumn = + ValueColumnImpl( + values = values, + name = name, + type = getValuesType(values, type, infer), + defaultValue = defaultValue, + ) /** * Creates [ValueColumn] using given [name], [values] and reified column [type]. * + * Be careful; values are NOT checked to adhere to [type] for efficiency, + * unless you specify [infer]. + * * Note, that column [type] will be defined at compile-time using [T] argument * * @param T type of the column @@ -74,26 +89,56 @@ public interface DataColumn : BaseColumn { infer: Infer = Infer.None, ): ValueColumn = createValueColumn( - name, - values, - getValuesType( - values, - typeOf(), - infer, - ), + name = name, + values = values, + type = typeOf(), + infer = infer, ) + /** + * Creates [ColumnGroup] using the given [name] and [df] representing the group of columns. + * + * @param name name of the column group + * @param df the collection of columns representing the column group + */ public fun createColumnGroup(name: String, df: DataFrame): ColumnGroup = ColumnGroupImpl(name, df) + // TODO this shouldn't be here public fun createFrameColumn(name: String, df: DataFrame, startIndices: Iterable): FrameColumn = FrameColumnImpl(name, df.splitByIndices(startIndices.asSequence()).toList(), lazy { df.schema() }) + /** + * Creates [FrameColumn] using the given [name] and list of dataframes [groups]. + * + * Be careful; [groups] must be a non-null list of [DataFrames][DataFrame]. + * This is NOT checked at runtime for efficiency, nor is the validity of given [schema]. + * + * @param name name of the frame column + * @param groups the dataframes to be put in the column + * @param schema an optional (lazily calculated) [DataFrameSchema] representing + * the intersecting schema of [groups] + */ public fun createFrameColumn( name: String, groups: List>, schema: Lazy? = null, ): FrameColumn = FrameColumnImpl(name, groups, schema) + /** + * Creates either a [FrameColumn], [ColumnGroup], or [ValueColumn] by analyzing each value in + * [values]. + * This is safer but less efficient than the other functions. + * + * Some conversions are done automatically to attempt to unify the values, like: + * - `null` -> [DataFrame.empty][DataFrame.empty]`()` and [DataRow] -> single-row [DataFrame] when there are other + * [DataFrames][DataFrame] present in [values] + * - [List][List]`<`[DataRow][DataRow]`<*>>` -> [DataFrame] + * etc. + * + * @param name name of the column + * @param values the values to represent each row in the column + * @param nullable optionally you can specify whether [values] contains nulls, if `null` it is inferred. + */ public fun createWithTypeInference( name: String, values: List, @@ -102,9 +147,21 @@ public interface DataColumn : BaseColumn { /** * Calls [createColumnGroup], [createFrameColumn], or [createValueColumn] based on - * [type] without checking the actual values in [values]. + * [type]. + * + * Be careful; Values in [values] are NOT checked to adhere to the given [type], nor + * do we check whether there are nulls among the values when the given type is [DataFrame] + * (a [FrameColumn] cannot contain `null`, this causes runtime exceptions). + * When [type] is `DataFrame<*>?`, a [ValueColumn] is created to avoid this issue. + * + * This may be unsafe but is more efficient than [createWithTypeInference]. + * + * @param name the name of the column + * @param values the values to represent each row in the column + * @param type the (unchecked) common type of [values] + * @param infer in case a [ValueColumn] is created, this controls how/whether types need to be inferred */ - public fun create( + public fun createUnsafe( name: String, values: List, type: KType, @@ -118,11 +175,27 @@ public interface DataColumn : BaseColumn { /** * Calls [createColumnGroup], [createFrameColumn], or [createValueColumn] based on - * type [T] without checking the actual values in [values]. + * type [T]. + * + * Be careful; Values in [values] are NOT checked to adhere to the given [type], nor + * do we check whether there are nulls among the values when the given type is [DataFrame] + * (a [FrameColumn] cannot contain `null`, this causes runtime exceptions). + * When [type] is `DataFrame<*>?`, a [ValueColumn] is created to avoid this issue. + * + * This may be unsafe but is more efficient than [createWithTypeInference]. + * + * @param T the (unchecked) common type of [values] + * @param name the name of the column + * @param values the values to represent each row in the column + * @param infer in case a [ValueColumn] is created, this controls how/whether types need to be inferred */ - public inline fun create(name: String, values: List, infer: Infer = Infer.None): DataColumn = - create(name, values, typeOf(), infer) + public inline fun createUnsafe( + name: String, + values: List, + infer: Infer = Infer.None, + ): DataColumn = createUnsafe(name, values, typeOf(), infer) + /** Creates an empty [DataColumn] with given [name]. */ public fun empty(name: String = ""): AnyCol = createValueColumn(name, emptyList(), typeOf()) } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/TypeConversions.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/TypeConversions.kt index 0aebbdd7a4..41e9802066 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/TypeConversions.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/TypeConversions.kt @@ -234,16 +234,22 @@ public enum class Infer { /** * Use reified type argument of an inline [DataFrame] operation as [DataColumn.type]. + * + * This is the most efficient but least safe option. */ None, /** - * Use reified type argument of an inline [DataFrame] operation as [DataColumn.type], but compute [DataColumn.hasNulls] by checking column [DataColumn.values] for an actual presence of *null* values. + * Use reified type argument of an inline [DataFrame] operation as [DataColumn.type], + * but compute [DataColumn.hasNulls] by checking column [DataColumn.values] for an actual presence of `null` values. */ Nulls, /** - * Infer [DataColumn.type] and [DataColumn.hasNulls] from actual [DataColumn.values] using optionally provided base type as an upper bound. + * Infer [DataColumn.type] and [DataColumn.hasNulls] from actual [DataColumn.values] using an optionally provided + * base type as an upper bound. + * + * This is the least efficient but safest option. */ Type, @@ -306,17 +312,17 @@ public inline fun Iterable.toColumn(name: String = "", infer: Inf if (infer == Infer.Type) { DataColumn.createWithTypeInference(name, asList()) } else { - DataColumn.create(name, asList(), typeOf(), infer) + DataColumn.createUnsafe(name, asList(), typeOf(), infer) }.forceResolve() public inline fun Iterable<*>.toColumnOf(name: String = ""): DataColumn = - DataColumn.create(name, asList() as List, typeOf()).forceResolve() + DataColumn.createUnsafe(name, asList() as List, typeOf()).forceResolve() public inline fun Iterable.toColumn(ref: ColumnReference): DataColumn = - DataColumn.create(ref.name(), asList()).forceResolve() + DataColumn.createUnsafe(ref.name(), asList()).forceResolve() public inline fun Iterable.toColumn(property: KProperty): DataColumn = - DataColumn.create(property.columnName, asList()).forceResolve() + DataColumn.createUnsafe(property.columnName, asList()).forceResolve() public fun Iterable.toPath(): ColumnPath = ColumnPath(asList()) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt index fbd77787bd..381d30f7fa 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt @@ -290,7 +290,7 @@ public fun dataFrameOf(header: Iterable, values: Iterable): DataFr public inline fun dataFrameOf(header: Iterable, fill: (T) -> Iterable): DataFrame<*> = header.map { value -> fill(value).asList().let { - DataColumn.create(value.toString(), it) + DataColumn.createUnsafe(value.toString(), it) } }.toDataFrame() @@ -325,7 +325,7 @@ public class DataFrameBuilder(private val header: List) { public inline operator fun invoke(crossinline valuesBuilder: (String) -> Iterable): DataFrame<*> = withColumns { name -> valuesBuilder(name).let { - DataColumn.create( + DataColumn.createUnsafe( name = name, values = it.asList(), ) @@ -345,7 +345,7 @@ public class DataFrameBuilder(private val header: List) { public inline fun fillIndexed(nrow: Int, crossinline init: (Int, String) -> C): DataFrame<*> = withColumns { name -> - DataColumn.create( + DataColumn.createUnsafe( name, List(nrow) { init(it, name) }, ) @@ -353,7 +353,7 @@ public class DataFrameBuilder(private val header: List) { public inline fun fill(nrow: Int, crossinline init: (Int) -> C): DataFrame<*> = withColumns { name -> - DataColumn.create( + DataColumn.createUnsafe( name = name, values = List(nrow, init), ) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/map.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/map.kt index 6907c9e0bf..1f077f950c 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/map.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/map.kt @@ -34,12 +34,12 @@ public inline fun DataColumn.map( crossinline transform: (T) -> R, ): DataColumn { val newValues = Array(size()) { transform(get(it)) }.asList() - return DataColumn.create(name(), newValues, typeOf(), infer) + return DataColumn.createUnsafe(name(), newValues, typeOf(), infer) } public fun DataColumn.map(type: KType, infer: Infer = Infer.Nulls, transform: (T) -> R): DataColumn { val values = Array(size()) { transform(get(it)) }.asList() - return DataColumn.create(name(), values, type, infer).cast() + return DataColumn.createUnsafe(name(), values, type, infer).cast() } public inline fun DataColumn.mapIndexed( @@ -47,7 +47,7 @@ public inline fun DataColumn.mapIndexed( crossinline transform: (Int, T) -> R, ): DataColumn { val newValues = Array(size()) { transform(it, get(it)) }.asList() - return DataColumn.create(name(), newValues, typeOf(), infer) + return DataColumn.createUnsafe(name(), newValues, typeOf(), infer) } public fun DataColumn.mapIndexed( @@ -56,7 +56,7 @@ public fun DataColumn.mapIndexed( transform: (Int, T) -> R, ): DataColumn { val values = Array(size()) { transform(it, get(it)) }.asList() - return DataColumn.create(name(), values, type, infer).cast() + return DataColumn.createUnsafe(name(), values, type, infer).cast() } // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/sort.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/sort.kt index 80a627fbf4..48a1aa5ef3 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/sort.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/sort.kt @@ -94,7 +94,7 @@ private interface CommonDataColumnSortWithDocs /** @include [CommonDataColumnSortWithDocs] */ public fun > C.sortWith(comparator: Comparator): C = - DataColumn.create(name, values().sortedWith(comparator), type) as C + DataColumn.createUnsafe(name, values().sortedWith(comparator), type) as C /** @include [CommonDataColumnSortWithDocs] */ public fun > C.sortWith(comparator: (T, T) -> Int): C = sortWith(Comparator(comparator)) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/valueCounts.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/valueCounts.kt index 1c96776b44..5fbd81ef73 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/valueCounts.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/valueCounts.kt @@ -40,9 +40,9 @@ public fun DataColumn.valueCounts( } if (dropNA) grouped = grouped.filter { !it.first.isNA } val nulls = if (dropNA) false else hasNulls() - val values = DataColumn.create(name(), grouped.map { it.first }, type().withNullability(nulls)) + val values = DataColumn.createUnsafe(name(), grouped.map { it.first }, type().withNullability(nulls)) val countName = if (resultColumn == name()) resultColumn + "1" else resultColumn - val counts = DataColumn.create(countName, grouped.map { it.second }, typeOf()) + val counts = DataColumn.createUnsafe(countName, grouped.map { it.second }, typeOf()) return dataFrameOf(values, counts).cast() } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt index c8577a2f98..2e7f8363cf 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt @@ -525,7 +525,7 @@ internal fun DataColumn.tryParseImpl(options: ParserOptions?): DataColu if (type.jvmErasure == String::class && !nullStringParsed) { return this // nothing parsed } - return DataColumn.create(name(), parsedValues, type) + return DataColumn.createUnsafe(name(), parsedValues, type) } internal fun DataColumn.parse(parser: StringParser, options: ParserOptions?): DataColumn { diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/FrameColumnImpl.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/FrameColumnImpl.kt index 930ab72fbf..f36adb112f 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/FrameColumnImpl.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/FrameColumnImpl.kt @@ -33,6 +33,17 @@ internal open class FrameColumnImpl constructor( // This only runs with `kotlin.dataframe.debug=true` in gradle.properties. if (BuildConfig.DEBUG) { require(!values.anyNull()) { "FrameColumn cannot null values." } + +// val schema = columnSchema?.value +// ?: values.mapNotNull { it.takeIf { it.nrow > 0 }?.schema() }.intersectSchemas() +// +// for (df in values) { +// val dfSchema = df.schema() +// if (dfSchema.columns.isEmpty()) continue +// require(dfSchema.compare(schema).isDerivedOrEqual()) { +// "DataFrames in FrameColumn don't adhere to the given schema:\nGiven:\n$schema\n\nActual:\n$dfSchema" +// } +// } } } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt index 2e240c4d2c..72d9a50770 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt @@ -59,7 +59,7 @@ internal fun ColumnsContainer.newColumn( val df = this as? DataFrame ?: dataFrameOf(columns()).cast() val (nullable, values) = computeValues(df, expression) return when (infer) { - Infer.Nulls -> DataColumn.create( + Infer.Nulls -> DataColumn.createUnsafe( name = name, values = values, type = type.withNullability(nullable).replaceGenericTypeParametersWithUpperbound(), @@ -72,7 +72,7 @@ internal fun ColumnsContainer.newColumn( nullable = nullable, ) - Infer.None -> DataColumn.create( + Infer.None -> DataColumn.createUnsafe( name = name, values = values, type = type.replaceGenericTypeParametersWithUpperbound(), diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/columns/DataColumns.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/columns/DataColumns.kt index d79ce5cf98..efb35e65de 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/columns/DataColumns.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/columns/DataColumns.kt @@ -44,7 +44,7 @@ class DataColumns { } shouldThrow { - DataColumn.create( + DataColumn.createUnsafe( "", listOf(dataFrameOf("a")(1), null), ) diff --git a/gradle.properties b/gradle.properties index c54b38e032..0f2b9e1916 100644 --- a/gradle.properties +++ b/gradle.properties @@ -15,4 +15,4 @@ kotlin.dataframe.add.ksp=false # Enables debug mode for dataframe. # This can make certain tests run that should not be run in production. # It can also be turned on from the command line with `-Pkotlin.dataframe.debug=true` -kotlin.dataframe.debug=false +kotlin.dataframe.debug=true diff --git a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/DataFrameAdapter.kt b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/DataFrameAdapter.kt index 185578aa5c..806e9b771d 100644 --- a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/DataFrameAdapter.kt +++ b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/DataFrameAdapter.kt @@ -17,7 +17,7 @@ interface ConeTypesAdapter private fun List.map(): DataFrame = map { when (it) { - is SimpleDataColumn -> DataColumn.create(it.name, listOf(it.type)) + is SimpleDataColumn -> DataColumn.createUnsafe(it.name, listOf(it.type)) is SimpleColumnGroup -> DataColumn.createColumnGroup(it.name, it.columns().map()) is SimpleFrameColumn -> DataColumn.createFrameColumn(it.name, listOf(it.columns().map())) } From 25353788bcf6871c8ae06cddd2262a4ad500fed4 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 17 Oct 2024 19:41:37 +0200 Subject: [PATCH 05/14] deprecated DataColumn.createFrameColumn with startIndices, moved it to `chunked` --- .../jetbrains/kotlinx/dataframe/DataColumn.kt | 6 +++++- .../jetbrains/kotlinx/dataframe/api/chunked.kt | 18 +++++++++++++++++- .../kotlinx/dataframe/impl/api/groupBy.kt | 4 ++-- .../kotlinx/dataframe/impl/io/readJson.kt | 14 ++++++++------ .../dataframe/util/deprecationMessages.kt | 4 ++++ 5 files changed, 36 insertions(+), 10 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt index d835609267..37480ff2b3 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt @@ -103,7 +103,11 @@ public interface DataColumn : BaseColumn { */ public fun createColumnGroup(name: String, df: DataFrame): ColumnGroup = ColumnGroupImpl(name, df) - // TODO this shouldn't be here + @Deprecated( + message = CREATE_FRAME_COLUMN, + replaceWith = ReplaceWith(CREATE_FRAME_COLUMN_REPLACE, CREATE_FRAME_COLUMN_IMPORT), + level = DeprecationLevel.WARNING, + ) public fun createFrameColumn(name: String, df: DataFrame, startIndices: Iterable): FrameColumn = FrameColumnImpl(name, df.splitByIndices(startIndices.asSequence()).toList(), lazy { df.schema() }) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt index 67f3dec6d2..0628deb1b4 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt @@ -7,12 +7,28 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.FrameColumn import org.jetbrains.kotlinx.dataframe.columns.ValueColumn import org.jetbrains.kotlinx.dataframe.impl.getListType +import org.jetbrains.kotlinx.dataframe.impl.splitByIndices import org.jetbrains.kotlinx.dataframe.nrow import org.jetbrains.kotlinx.dataframe.type +/** + * Creates a [FrameColumn] from [this] by splitting the dataframe into + * smaller ones, based on the given [startIndices]. + */ +public fun DataFrame.chunked(startIndices: Iterable, name: String = "groups"): FrameColumn = + DataColumn.createFrameColumn( + name = name, + groups = this.splitByIndices(startIndices.asSequence()).toList(), + schema = lazy { this.schema() }, + ) + +/** + * Creates a [FrameColumn] from [this] by splitting the dataframe into + * smaller ones, with their number of rows at most [size]. + */ public fun DataFrame.chunked(size: Int, name: String = "groups"): FrameColumn { val startIndices = (0 until nrow step size) - return DataColumn.createFrameColumn(name, this, startIndices) + return this.chunked(startIndices, name) } public fun DataColumn.chunked(size: Int): ValueColumn> { diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/groupBy.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/groupBy.kt index 8656eecf97..8caa80846c 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/groupBy.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/groupBy.kt @@ -1,12 +1,12 @@ package org.jetbrains.kotlinx.dataframe.impl.api import org.jetbrains.kotlinx.dataframe.ColumnsSelector -import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.api.GroupBy import org.jetbrains.kotlinx.dataframe.api.GroupedDataRow import org.jetbrains.kotlinx.dataframe.api.cast +import org.jetbrains.kotlinx.dataframe.api.chunked import org.jetbrains.kotlinx.dataframe.api.getColumnsWithPaths import org.jetbrains.kotlinx.dataframe.api.getRows import org.jetbrains.kotlinx.dataframe.api.indices @@ -62,7 +62,7 @@ internal fun DataFrame.groupByImpl(moveToTop: Boolean, columns: ColumnsSe } val groupedColumnName = keyColumnsDf.nameGenerator().addUnique(GroupBy.groupedColumnAccessor.name()) - val groupedColumn = DataColumn.createFrameColumn(groupedColumnName, sorted, startIndices.asIterable()) + val groupedColumn = sorted.chunked(startIndices.asIterable(), groupedColumnName) val df = keyColumnsDf + groupedColumn return GroupByImpl(df, groupedColumn, columns) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt index fca2bd4e76..2df7fe8d6a 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt @@ -23,6 +23,7 @@ import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.api.JsonPath import org.jetbrains.kotlinx.dataframe.api.KeyValueProperty import org.jetbrains.kotlinx.dataframe.api.cast +import org.jetbrains.kotlinx.dataframe.api.chunked import org.jetbrains.kotlinx.dataframe.api.columnOf import org.jetbrains.kotlinx.dataframe.api.concat import org.jetbrains.kotlinx.dataframe.api.dataFrameOf @@ -298,11 +299,12 @@ internal fun fromJsonListAnyColumns( ) } - else -> DataColumn.createFrameColumn( - name = ARRAY_COLUMN_NAME, // will be erased - df = parsed.unwrapUnnamedColumns(), - startIndices = startIndices, - ) + else -> + parsed.unwrapUnnamedColumns() + .chunked( + startIndices = startIndices, + name = ARRAY_COLUMN_NAME, // will be erased + ) } listOf(UnnamedColumn(res)) } @@ -640,7 +642,7 @@ internal fun fromJsonListArrayAndValueColumns( ) } - else -> DataColumn.createFrameColumn(colName, parsed.unwrapUnnamedColumns(), startIndices) + else -> parsed.unwrapUnnamedColumns().chunked(startIndices, colName) } UnnamedColumn(res) } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt index 05951691a8..baa4b8dcdf 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt @@ -15,6 +15,10 @@ internal const val DF_READ_NO_CSV = "This function is deprecated and should be r internal const val DF_READ_NO_CSV_REPLACE = "this.readCSV(fileOrUrl, delimiter, header, colTypes, skipLines, readLines, duplicate, charset)" +internal const val CREATE_FRAME_COLUMN = "Replaced by df.chunked(). $MESSAGE_0_16" +internal const val CREATE_FRAME_COLUMN_REPLACE = "df.chunked(startIndices, name)" +internal const val CREATE_FRAME_COLUMN_IMPORT = "org.jetbrains.kotlinx.dataframe.api.chunked" + // endregion // region WARNING in 0.16, ERROR in 0.17 From 32bee077c34001d82a979d58690e4a912a334244 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Fri, 18 Oct 2024 17:14:25 +0200 Subject: [PATCH 06/14] added some tests to confirm behavior from createColumnGuessingType() from before the PR. Added allColsMakesColGroup argument for createColumnGuessingType() and guessValueType() so the old behavior of createColumn() is now controlled in the same place as all other conversions --- .../jetbrains/kotlinx/dataframe/DataColumn.kt | 11 +- .../kotlinx/dataframe/api/constructors.kt | 17 ++- .../kotlinx/dataframe/impl/TypeUtils.kt | 38 +++++- .../dataframe/impl/aggregation/getColumns.kt | 2 +- .../kotlinx/dataframe/impl/api/concat.kt | 2 +- .../dataframe/impl/columns/FrameColumnImpl.kt | 11 -- .../dataframe/impl/columns/constructors.kt | 111 +++++++++-------- .../kotlinx/dataframe/impl/io/readJson.kt | 13 +- .../kotlinx/dataframe/api/constructors.kt | 117 ++++++++++++++++++ .../kotlinx/dataframe/columns/DataColumns.kt | 10 +- .../kotlinx/dataframe/types/UtilTests.kt | 16 +++ gradle.properties | 2 +- 12 files changed, 266 insertions(+), 84 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt index 37480ff2b3..cb0fcad5a2 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt @@ -142,12 +142,21 @@ public interface DataColumn : BaseColumn { * @param name name of the column * @param values the values to represent each row in the column * @param nullable optionally you can specify whether [values] contains nulls, if `null` it is inferred. + * @param allColsMakesColGroup if `true`, then, if all values are non-null same-sized columns, + * a column group will be created instead of a [DataColumn][DataColumn]`<`[AnyCol][AnyCol]`>`. */ public fun createWithTypeInference( name: String, values: List, nullable: Boolean? = null, - ): DataColumn = createColumnGuessingType(name, values, nullable = nullable) + allColsMakesColGroup: Boolean = false, + ): DataColumn = + createColumnGuessingType( + name = name, + values = values, + nullable = nullable, + allColsMakesColGroup = allColsMakesColGroup, + ) /** * Calls [createColumnGroup], [createFrameColumn], or [createValueColumn] based on diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt index 381d30f7fa..ff9023d3b6 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt @@ -24,7 +24,7 @@ import org.jetbrains.kotlinx.dataframe.impl.api.withValuesImpl import org.jetbrains.kotlinx.dataframe.impl.asList import org.jetbrains.kotlinx.dataframe.impl.columnName import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnAccessorImpl -import org.jetbrains.kotlinx.dataframe.impl.columns.createColumn +import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType import org.jetbrains.kotlinx.dataframe.impl.columns.createComputedColumnReference import org.jetbrains.kotlinx.dataframe.impl.columns.forceResolve import org.jetbrains.kotlinx.dataframe.impl.columns.unbox @@ -223,7 +223,13 @@ public class ColumnDelegate(private val parent: ColumnGroupReference? = null) // region create DataColumn public inline fun columnOf(vararg values: T): DataColumn = - createColumn(values.asIterable(), typeOf(), true).forceResolve() + createColumnGuessingType( + values = values.asIterable(), + suggestedType = typeOf(), + guessTypeWithSuggestedAsUpperbound = true, + listifyValues = false, + allColsMakesColGroup = true, + ).forceResolve() public fun columnOf(vararg values: AnyBaseCol): DataColumn = columnOf(values.asIterable()).forceResolve() @@ -244,7 +250,12 @@ public fun columnOf(frames: Iterable>): FrameColumn = ).forceResolve() public inline fun column(values: Iterable): DataColumn = - createColumn(values, typeOf(), false).forceResolve() + createColumnGuessingType( + values = values, + suggestedType = typeOf(), + guessTypeWithSuggestedAsUpperbound = false, + allColsMakesColGroup = true, + ).forceResolve() // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt index 1ec8f67639..30d6198624 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt @@ -2,8 +2,10 @@ package org.jetbrains.kotlinx.dataframe.impl +import org.jetbrains.kotlinx.dataframe.AnyCol import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.api.Infer @@ -391,14 +393,24 @@ internal fun getValuesType(values: List, type: KType, infer: Infer): KTyp * @param listifyValues if true, then values and nulls will be wrapped in a list if they appear among other lists. * For example: `[1, null, listOf(1, 2, 3)]` will become `List` instead of `Any?` * Note: this parameter is ignored if another [Collection] is present in the values. + * @param allColsMakesRow if true, then, if all values are non-null same-sized columns, we assume + * that a column group should be created instead of a [DataColumn][DataColumn]`<`[AnyCol][AnyCol]`>`, + * so the function will return [DataRow]. */ @PublishedApi -internal fun guessValueType(values: Sequence, upperBound: KType? = null, listifyValues: Boolean = false): KType { +internal fun guessValueType( + values: Sequence, + upperBound: KType? = null, + listifyValues: Boolean = false, + allColsMakesRow: Boolean = false, +): KType { val classes = mutableSetOf>() val collectionClasses = mutableSetOf>>() var hasNulls = false var hasFrames = false var hasRows = false + var hasCols = false + val colSizes = mutableListOf() var hasList = false var allListsAreEmpty = true val classesInCollection = mutableSetOf>() @@ -412,6 +424,11 @@ internal fun guessValueType(values: Sequence, upperBound: KType? = null, l is AnyFrame -> hasFrames = true + is AnyCol -> { + hasCols = true + colSizes += it.size() + } + is List<*> -> { hasList = true if (it.isNotEmpty()) allListsAreEmpty = false @@ -449,6 +466,7 @@ internal fun guessValueType(values: Sequence, upperBound: KType? = null, l classes.isNotEmpty() -> { if (hasRows) classes.add(DataRow::class) if (hasFrames) classes.add(DataFrame::class) + if (hasCols) classes.add(DataColumn::class) if (hasList) { if (listifyValues) { val typeInLists = classesInCollection.commonType( @@ -466,15 +484,24 @@ internal fun guessValueType(values: Sequence, upperBound: KType? = null, l return classes.commonType(hasNulls, upperBound) } - hasNulls && !hasFrames && !hasRows && !hasList -> nothingType(nullable = true) + hasNulls && !hasFrames && !hasRows && !hasList && !hasCols -> nothingType(nullable = true) (hasFrames && (!hasList || allListsWithRows)) || (!hasFrames && allListsWithRows) -> DataFrame::class.createStarProjectedType(hasNulls) - hasRows && !hasFrames && !hasList -> + hasRows && !hasFrames && !hasList && !hasCols -> + DataRow::class.createStarProjectedType(false) + + allColsMakesRow && + hasCols && + !hasFrames && + !hasList && + !hasRows && + !hasNulls && + colSizes.distinct().size == 1 -> DataRow::class.createStarProjectedType(false) - collectionClasses.isNotEmpty() && !hasFrames && !hasRows -> { + collectionClasses.isNotEmpty() && !hasFrames && !hasRows && !hasCols -> { val elementType = upperBound?.let { if (it.jvmErasure.isSubclassOf(Collection::class)) { it.projectUpTo(Collection::class).arguments[0].type @@ -492,7 +519,7 @@ internal fun guessValueType(values: Sequence, upperBound: KType? = null, l ).withNullability(hasNulls) } - hasList && collectionClasses.isEmpty() && !hasFrames && !hasRows -> { + hasList && collectionClasses.isEmpty() && !hasFrames && !hasRows && !hasCols -> { val elementType = upperBound?.let { if (it.jvmErasure == List::class) it.arguments[0].type else null } List::class .createTypeWithArgument( @@ -507,6 +534,7 @@ internal fun guessValueType(values: Sequence, upperBound: KType? = null, l if (hasRows) classes.add(DataRow::class) if (hasFrames) classes.add(DataFrame::class) if (hasList) classes.add(List::class) + if (hasCols) classes.add(DataColumn::class) if (collectionClasses.isNotEmpty()) classes.addAll(collectionClasses) return classes.commonType(hasNulls, upperBound) } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/getColumns.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/getColumns.kt index 283ec46109..22480fbad7 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/getColumns.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/getColumns.kt @@ -23,6 +23,6 @@ internal fun NamedValue.toColumnWithPath() = name = path.last(), values = listOf(value), suggestedType = type, - suggestedTypeIsUpperBound = guessType, + guessTypeWithSuggestedAsUpperbound = guessType, defaultValue = default, ) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/concat.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/concat.kt index 98508d8935..f63a400758 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/concat.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/concat.kt @@ -77,7 +77,7 @@ internal fun concatImpl(name: String, columns: List?>, columnS name = name, values = list, suggestedType = tartypeOf, - suggestedTypeIsUpperBound = guessType, + guessTypeWithSuggestedAsUpperbound = guessType, defaultValue = defaultValue, ).cast() } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/FrameColumnImpl.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/FrameColumnImpl.kt index f36adb112f..930ab72fbf 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/FrameColumnImpl.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/FrameColumnImpl.kt @@ -33,17 +33,6 @@ internal open class FrameColumnImpl constructor( // This only runs with `kotlin.dataframe.debug=true` in gradle.properties. if (BuildConfig.DEBUG) { require(!values.anyNull()) { "FrameColumn cannot null values." } - -// val schema = columnSchema?.value -// ?: values.mapNotNull { it.takeIf { it.nrow > 0 }?.schema() }.intersectSchemas() -// -// for (df in values) { -// val dfSchema = df.schema() -// if (dfSchema.columns.isEmpty()) continue -// require(dfSchema.compare(schema).isDerivedOrEqual()) { -// "DataFrames in FrameColumn don't adhere to the given schema:\nGiven:\n$schema\n\nActual:\n$dfSchema" -// } -// } } } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt index 72d9a50770..c00d66b42f 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt @@ -28,6 +28,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext import org.jetbrains.kotlinx.dataframe.columns.ColumnSet import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath import org.jetbrains.kotlinx.dataframe.columns.ColumnsResolver +import org.jetbrains.kotlinx.dataframe.columns.ValueColumn import org.jetbrains.kotlinx.dataframe.columns.toColumnsSetOf import org.jetbrains.kotlinx.dataframe.impl.DataFrameReceiver import org.jetbrains.kotlinx.dataframe.impl.DataRowImpl @@ -102,48 +103,6 @@ internal fun computeValues(df: DataFrame, expression: AddExpression createColumn(values: Iterable, suggestedType: KType, guessType: Boolean = false): DataColumn = - when { - // values is a non-empty list of AnyRows - values.any() && values.all { it is AnyRow } -> - DataColumn.createColumnGroup( - name = "", - df = (values as Iterable).toDataFrame(), - ).asDataColumn().cast() - - // values is a non-empty list of DataColumns - values.any() && values.all { it is AnyCol } -> - DataColumn.createColumnGroup( - name = "", - df = (values as Iterable).toDataFrame(), - ).asDataColumn().cast() - - // values is a non-empty list of DataFrames and nulls - // (but not just nulls; we cannot assume that should create a FrameColumn) - values.any() && values.all { it is AnyFrame? } && !values.all { it == null } -> - DataColumn.createFrameColumn( - name = "", - groups = values.map { it as? AnyFrame ?: DataFrame.empty() }, - ).asDataColumn().cast() - - guessType -> - createColumnGuessingType( - name = "", - values = values.asList(), - suggestedType = suggestedType, - suggestedTypeIsUpperBound = true, - ).cast() - - else -> - DataColumn.createValueColumn( - name = "", - values = values.toList(), - type = suggestedType, - ) - } - // endregion // region create Columns @@ -217,21 +176,66 @@ internal fun Array.toNumberColumns() = toColumnsSetOf() // endregion +/** + * Creates a new column by doing type inference on the given values and + * some conversions to unify the values if necessary. + * + * @param values values to create a column from + * @param suggestedType optional suggested type for values. + * If set to `null` (default) the type will be inferred. + * @param guessTypeWithSuggestedAsUpperbound Only relevant when [suggestedType]` != null`. + * If `true`, type inference will happen with the given [suggestedType] as the supertype. + * @param defaultValue optional default value for the column used when a [ValueColumn] is created. + * @param nullable optional hint for the column nullability, used when a [ValueColumn] is created. + * @param listifyValues if `true`, then values and nulls will be wrapped in a list if they appear among other lists. + * For example: `[1, null, listOf(1, 2, 3)]` will become `[[1], [], [1, 2, 3]]`. + * Note: this parameter is ignored if another [Collection] is present in the values. + * @param allColsMakesColGroup if `true`, then, if all values are non-null same-sized columns, + * a column group will be created instead of a [DataColumn][DataColumn]`<`[AnyCol][AnyCol]`>`. + */ +@PublishedApi +internal fun createColumnGuessingType( + values: Iterable, + suggestedType: KType? = null, + guessTypeWithSuggestedAsUpperbound: Boolean = false, + defaultValue: T? = null, + nullable: Boolean? = null, + listifyValues: Boolean = false, + allColsMakesColGroup: Boolean = false, +): DataColumn = + createColumnGuessingType( + name = "", + values = values, + suggestedType = suggestedType, + guessTypeWithSuggestedAsUpperbound = guessTypeWithSuggestedAsUpperbound, + defaultValue = defaultValue, + nullable = nullable, + listifyValues = listifyValues, + allColsMakesColGroup = allColsMakesColGroup, + ) + +/** + * @include [createColumnGuessingType] + * @param name name for the column + */ @PublishedApi internal fun createColumnGuessingType( name: String, - values: List, + values: Iterable, suggestedType: KType? = null, - suggestedTypeIsUpperBound: Boolean = false, + guessTypeWithSuggestedAsUpperbound: Boolean = false, defaultValue: T? = null, nullable: Boolean? = null, + listifyValues: Boolean = false, + allColsMakesColGroup: Boolean = false, ): DataColumn { - val detectType = suggestedType == null || suggestedTypeIsUpperBound + val detectType = suggestedType == null || guessTypeWithSuggestedAsUpperbound val type = if (detectType) { guessValueType( values = values.asSequence(), upperBound = suggestedType, - listifyValues = false, + listifyValues = listifyValues, + allColsMakesRow = allColsMakesColGroup, ) } else { suggestedType!! @@ -239,8 +243,15 @@ internal fun createColumnGuessingType( return when (type.classifier!! as KClass<*>) { DataRow::class -> { - val df = values.map { (it as AnyRow?)?.toDataFrame() ?: DataFrame.empty(1) }.concat() - DataColumn.createColumnGroup(name, df).asDataColumn().cast() + // guessValueType can only return DataRow if all values are AnyRow? + // or all are AnyCol and they all have the same size + if (values.firstOrNull() is AnyCol) { + val df = dataFrameOf(values as Iterable) + DataColumn.createColumnGroup(name, df) + } else { + val df = values.map { (it as AnyRow?)?.toDataFrame() ?: DataFrame.empty(1) }.concat() + DataColumn.createColumnGroup(name, df) + }.asDataColumn().cast() } DataFrame::class -> { @@ -292,7 +303,7 @@ internal fun createColumnGuessingType( if (nullable == null) { DataColumn.createValueColumn( name = name, - values = values, + values = values.asList(), type = type, infer = if (detectType) Infer.None else Infer.Nulls, defaultValue = defaultValue, @@ -300,7 +311,7 @@ internal fun createColumnGuessingType( } else { DataColumn.createValueColumn( name = name, - values = values, + values = values.asList(), type = type.withNullability(nullable), defaultValue = defaultValue, ) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt index 2df7fe8d6a..683e037f9d 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt @@ -39,7 +39,7 @@ import org.jetbrains.kotlinx.dataframe.columns.FrameColumn import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator import org.jetbrains.kotlinx.dataframe.impl.DataCollectorBase import org.jetbrains.kotlinx.dataframe.impl.asList -import org.jetbrains.kotlinx.dataframe.impl.columns.createColumn +import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType import org.jetbrains.kotlinx.dataframe.impl.commonType import org.jetbrains.kotlinx.dataframe.impl.createDataCollector import org.jetbrains.kotlinx.dataframe.impl.guessValueType @@ -336,8 +336,11 @@ internal fun fromJsonListAnyColumns( dataFrameOf( columnOf(*map.keys.toTypedArray()).named(KeyValueProperty<*>::key.name), - createColumn(values = map.values, suggestedType = valueType, guessType = false) - .named(KeyValueProperty<*>::value.name), + createColumnGuessingType( + values = map.values, + suggestedType = valueType, + guessTypeWithSuggestedAsUpperbound = false, + ).named(KeyValueProperty<*>::value.name), ) } @@ -516,10 +519,10 @@ internal fun fromJsonListArrayAndValueColumns( dataFrameOf( columnOf(*map.keys.toTypedArray()).named(KeyValueProperty<*>::key.name), - createColumn( + createColumnGuessingType( values = map.values, suggestedType = valueType, - guessType = false, + guessTypeWithSuggestedAsUpperbound = false, ).named(KeyValueProperty<*>::value.name), ) } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt index 53232dbdcc..59da8454a0 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt @@ -1,9 +1,20 @@ package org.jetbrains.kotlinx.dataframe.api import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup +import org.jetbrains.kotlinx.dataframe.columns.ColumnKind +import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType import org.jetbrains.kotlinx.dataframe.impl.nothingType import org.jetbrains.kotlinx.dataframe.type +import org.jetbrains.kotlinx.dataframe.values import org.junit.Test +import kotlin.reflect.typeOf class ConstructorsTests { @@ -32,4 +43,110 @@ class ConstructorsTests { dataFrameOf("a" to emptyList())["a"].type shouldBe nothingType(false) dataFrameOf("a" to listOf(null))["a"].type shouldBe nothingType(true) } + + // region createColumn + + @Test + fun `guess column group from rows`() { + val row = dataFrameOf("a", "b")(1, 2).single() + val col = createColumnGuessingType(listOf(row, DataRow.empty), typeOf(), true) + col shouldBe columnOf(row, DataRow.empty) + + col.hasNulls() shouldBe false + col.type() shouldBe typeOf() + col.kind() shouldBe ColumnKind.Group + col[0] shouldBe row + col[1].isEmpty() shouldBe true + } + + @Test + fun `guess column group from rows with null`() { + val row = dataFrameOf("a", "b")(1, 2).single() + val col = createColumnGuessingType(listOf(row, DataRow.empty, null), typeOf(), true) + col shouldBe columnOf(row, DataRow.empty, null) + + col.hasNulls() shouldBe false + col.type() shouldBe typeOf() + col.kind() shouldBe ColumnKind.Group + col[0] shouldBe row + col[1]!!.isEmpty() shouldBe true + col[2]!!.isEmpty() shouldBe true + } + + @Test + fun `guess column group from columns`() { + val col1 = columnOf(1, 2) + val col2 = columnOf("a", "b") + val col = createColumnGuessingType( + values = listOf(col1, col2), + suggestedType = typeOf(), + guessTypeWithSuggestedAsUpperbound = true, + allColsMakesColGroup = true, + ) + col shouldBe columnOf(col1, col2) + + col as ColumnGroup<*> + + col.hasNulls() shouldBe false + col.type() shouldBe typeOf() + col.kind() shouldBe ColumnKind.Group + col.getColumn(0).values shouldBe col1.values + col.getColumn(1).values shouldBe col2.values + } + + @Test + fun `guess value column from columns and null`() { + val col1 = columnOf(1, 2) + val col2 = columnOf("a", "b") + val col = createColumnGuessingType( + listOf(col1, col2, null), + typeOf(), + true, + ) + col.values shouldBe columnOf(col1, col2, null).values + + col.hasNulls() shouldBe true + col.type() shouldBe typeOf?>() // becomes a column with value columns and nulls + col.kind() shouldBe ColumnKind.Value + col[0] shouldBe col1 + col[1] shouldBe col2 + col[2] shouldBe null + } + + @Test + fun `guess frame column from dataframes and null`() { + val df1 = dataFrameOf("a", "b")(1, 2) + val df2 = dataFrameOf("a", "b")(3, 4) + val col = createColumnGuessingType( + listOf(df1, df2, null), + typeOf(), + true, + ) + col.values shouldBe columnOf(df1, df2, null).values + + col.hasNulls() shouldBe false + col.type() shouldBe typeOf() // becomes frame column, making nulls empty dataframes + col.kind() shouldBe ColumnKind.Frame + col[0] shouldBe df1 + col[1] shouldBe df2 + col[2] shouldBe DataFrame.empty() + } + + @Test + fun `guess value column from nulls`() { + val col = createColumnGuessingType( + listOf(null, null), + nothingType(true), + true, + ) + col.values shouldBe columnOf(null, null).values + + col.hasNulls() shouldBe true + col.type() shouldBe nothingType(true) + col.kind() shouldBe ColumnKind.Value + col[0] shouldBe null + col[1] shouldBe null + } + + // endregion } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/columns/DataColumns.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/columns/DataColumns.kt index efb35e65de..e40bd257d6 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/columns/DataColumns.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/columns/DataColumns.kt @@ -43,11 +43,9 @@ class DataColumns { ) } - shouldThrow { - DataColumn.createUnsafe( - "", - listOf(dataFrameOf("a")(1), null), - ) - } + DataColumn.createUnsafe( + name = "", + values = listOf(dataFrameOf("a")(1), null), + ).kind() shouldBe ColumnKind.Value } } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/types/UtilTests.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/types/UtilTests.kt index 95cf67b930..436aa2aa0f 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/types/UtilTests.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/types/UtilTests.kt @@ -1,6 +1,9 @@ package org.jetbrains.kotlinx.dataframe.types import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.api.columnOf import org.jetbrains.kotlinx.dataframe.impl.asArrayAsListOrNull import org.jetbrains.kotlinx.dataframe.impl.commonParent import org.jetbrains.kotlinx.dataframe.impl.commonParents @@ -151,6 +154,19 @@ class UtilTests { guessValueType(sequenceOf(1, 2, listOf(1), emptySet())) shouldBe typeOf() guessValueType(sequenceOf(listOf(1), setOf(1.0, 2.0))) shouldBe typeOf>() + + guessValueType( + sequenceOf(DataColumn.empty(), columnOf(1)), + allColsMakesRow = true, + ) shouldBe typeOf>() + + guessValueType( + sequenceOf(columnOf("a"), columnOf(1)), + allColsMakesRow = true, + ) shouldBe typeOf>() + guessValueType( + sequenceOf(columnOf("a"), columnOf(1)), + ) shouldBe typeOf>() } @Test diff --git a/gradle.properties b/gradle.properties index 0f2b9e1916..c54b38e032 100644 --- a/gradle.properties +++ b/gradle.properties @@ -15,4 +15,4 @@ kotlin.dataframe.add.ksp=false # Enables debug mode for dataframe. # This can make certain tests run that should not be run in production. # It can also be turned on from the command line with `-Pkotlin.dataframe.debug=true` -kotlin.dataframe.debug=true +kotlin.dataframe.debug=false From 64d1f99eb931137ca390bc42d76b0cb8fcf26bac Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Fri, 18 Oct 2024 20:20:46 +0200 Subject: [PATCH 07/14] Changed parsing to use createColumnGuessingType() to fix issue #593 for CSV and other parsing usages --- .../kotlinx/dataframe/impl/api/parse.kt | 8 ++++++-- .../dataframe/impl/columns/constructors.kt | 4 +++- .../kotlinx/dataframe/io/ParserTests.kt | 18 ++++++++++++++++++ 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt index 2e7f8363cf..d3839c4f1e 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt @@ -32,6 +32,7 @@ import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException import org.jetbrains.kotlinx.dataframe.hasNulls import org.jetbrains.kotlinx.dataframe.impl.canParse import org.jetbrains.kotlinx.dataframe.impl.catchSilent +import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType import org.jetbrains.kotlinx.dataframe.impl.createStarProjectedType import org.jetbrains.kotlinx.dataframe.impl.javaDurationCanParse import org.jetbrains.kotlinx.dataframe.io.isURL @@ -525,7 +526,10 @@ internal fun DataColumn.tryParseImpl(options: ParserOptions?): DataColu if (type.jvmErasure == String::class && !nullStringParsed) { return this // nothing parsed } - return DataColumn.createUnsafe(name(), parsedValues, type) + + // Create a new column with the parsed values, + // createColumnGuessingType is used to handle unifying values if needed + return createColumnGuessingType(name(), parsedValues, type) } internal fun DataColumn.parse(parser: StringParser, options: ParserOptions?): DataColumn { @@ -535,7 +539,7 @@ internal fun DataColumn.parse(parser: StringParser, options: Par handler(it.trim()) ?: throw IllegalStateException("Couldn't parse '$it' into type ${parser.type}") } } - return DataColumn.createValueColumn(name(), parsedValues, parser.type.withNullability(hasNulls)) as DataColumn + return createColumnGuessingType(name(), parsedValues, parser.type.withNullability(hasNulls)) } internal fun DataFrame.parseImpl(options: ParserOptions?, columns: ColumnsSelector): DataFrame { diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt index c00d66b42f..57c88f0316 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt @@ -249,7 +249,9 @@ internal fun createColumnGuessingType( val df = dataFrameOf(values as Iterable) DataColumn.createColumnGroup(name, df) } else { - val df = values.map { (it as AnyRow?)?.toDataFrame() ?: DataFrame.empty(1) }.concat() + val df = values.map { + (it as AnyRow?)?.toDataFrame() ?: DataFrame.empty(1) + }.concat() DataColumn.createColumnGroup(name, df) }.asDataColumn().cast() } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt index 6ca02e2f41..b864ea8e84 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt @@ -7,6 +7,7 @@ import kotlinx.datetime.LocalTime import kotlinx.datetime.TimeZone import kotlinx.datetime.toKotlinLocalDate import kotlinx.datetime.toKotlinLocalDateTime +import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.api.cast @@ -16,11 +17,15 @@ import org.jetbrains.kotlinx.dataframe.api.convertToDouble import org.jetbrains.kotlinx.dataframe.api.convertToLocalDate import org.jetbrains.kotlinx.dataframe.api.convertToLocalDateTime import org.jetbrains.kotlinx.dataframe.api.convertToLocalTime +import org.jetbrains.kotlinx.dataframe.api.first +import org.jetbrains.kotlinx.dataframe.api.isEmpty +import org.jetbrains.kotlinx.dataframe.api.isFrameColumn import org.jetbrains.kotlinx.dataframe.api.parse import org.jetbrains.kotlinx.dataframe.api.parser import org.jetbrains.kotlinx.dataframe.api.plus import org.jetbrains.kotlinx.dataframe.api.times import org.jetbrains.kotlinx.dataframe.api.tryParse +import org.jetbrains.kotlinx.dataframe.columns.ColumnKind import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException import org.junit.Test import java.math.BigDecimal @@ -215,4 +220,17 @@ class ParserTests { Locale.setDefault(currentLocale) } } + + @Test + fun `Issue #593, mixing null and json`() { + val col by columnOf("[\"str\"]", "[]", "null") + val parsed = col.parse() + parsed.type() shouldBe typeOf() + parsed.kind() shouldBe ColumnKind.Frame + require(parsed.isFrameColumn()) + + parsed[0]["value"].first() shouldBe "str" + parsed[1].isEmpty() shouldBe true + parsed[2].isEmpty() shouldBe true + } } From 88ee08d0db2507d4159649dca2dd1a23a9166e86 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Mon, 21 Oct 2024 11:22:10 +0200 Subject: [PATCH 08/14] moved to chunkImpl --- .../jetbrains/kotlinx/dataframe/api/chunked.kt | 15 ++------------- .../kotlinx/dataframe/impl/api/chunked.kt | 18 ++++++++++++++++++ .../kotlinx/dataframe/impl/api/groupBy.kt | 3 +-- .../kotlinx/dataframe/impl/io/readJson.kt | 6 +++--- 4 files changed, 24 insertions(+), 18 deletions(-) create mode 100644 core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/chunked.kt diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt index 0628deb1b4..421d1d64e6 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt @@ -6,29 +6,18 @@ import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.FrameColumn import org.jetbrains.kotlinx.dataframe.columns.ValueColumn +import org.jetbrains.kotlinx.dataframe.impl.api.chunkedImpl import org.jetbrains.kotlinx.dataframe.impl.getListType -import org.jetbrains.kotlinx.dataframe.impl.splitByIndices import org.jetbrains.kotlinx.dataframe.nrow import org.jetbrains.kotlinx.dataframe.type -/** - * Creates a [FrameColumn] from [this] by splitting the dataframe into - * smaller ones, based on the given [startIndices]. - */ -public fun DataFrame.chunked(startIndices: Iterable, name: String = "groups"): FrameColumn = - DataColumn.createFrameColumn( - name = name, - groups = this.splitByIndices(startIndices.asSequence()).toList(), - schema = lazy { this.schema() }, - ) - /** * Creates a [FrameColumn] from [this] by splitting the dataframe into * smaller ones, with their number of rows at most [size]. */ public fun DataFrame.chunked(size: Int, name: String = "groups"): FrameColumn { val startIndices = (0 until nrow step size) - return this.chunked(startIndices, name) + return this.chunkedImpl(startIndices, name) } public fun DataColumn.chunked(size: Int): ValueColumn> { diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/chunked.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/chunked.kt new file mode 100644 index 0000000000..c3ad444897 --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/chunked.kt @@ -0,0 +1,18 @@ +package org.jetbrains.kotlinx.dataframe.impl.api + +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.api.schema +import org.jetbrains.kotlinx.dataframe.columns.FrameColumn +import org.jetbrains.kotlinx.dataframe.impl.splitByIndices + +/** + * Creates a [FrameColumn] from [this] by splitting the dataframe into + * smaller ones, based on the given [startIndices]. + */ +internal fun DataFrame.chunkedImpl(startIndices: Iterable, name: String = "groups"): FrameColumn = + DataColumn.createFrameColumn( + name = name, + groups = this.splitByIndices(startIndices.asSequence()).toList(), + schema = lazy { this.schema() }, + ) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/groupBy.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/groupBy.kt index 8caa80846c..beba1b686d 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/groupBy.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/groupBy.kt @@ -6,7 +6,6 @@ import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.api.GroupBy import org.jetbrains.kotlinx.dataframe.api.GroupedDataRow import org.jetbrains.kotlinx.dataframe.api.cast -import org.jetbrains.kotlinx.dataframe.api.chunked import org.jetbrains.kotlinx.dataframe.api.getColumnsWithPaths import org.jetbrains.kotlinx.dataframe.api.getRows import org.jetbrains.kotlinx.dataframe.api.indices @@ -62,7 +61,7 @@ internal fun DataFrame.groupByImpl(moveToTop: Boolean, columns: ColumnsSe } val groupedColumnName = keyColumnsDf.nameGenerator().addUnique(GroupBy.groupedColumnAccessor.name()) - val groupedColumn = sorted.chunked(startIndices.asIterable(), groupedColumnName) + val groupedColumn = sorted.chunkedImpl(startIndices.asIterable(), groupedColumnName) val df = keyColumnsDf + groupedColumn return GroupByImpl(df, groupedColumn, columns) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt index 683e037f9d..d95f20b08b 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt @@ -23,7 +23,6 @@ import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.api.JsonPath import org.jetbrains.kotlinx.dataframe.api.KeyValueProperty import org.jetbrains.kotlinx.dataframe.api.cast -import org.jetbrains.kotlinx.dataframe.api.chunked import org.jetbrains.kotlinx.dataframe.api.columnOf import org.jetbrains.kotlinx.dataframe.api.concat import org.jetbrains.kotlinx.dataframe.api.dataFrameOf @@ -38,6 +37,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.FrameColumn import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator import org.jetbrains.kotlinx.dataframe.impl.DataCollectorBase +import org.jetbrains.kotlinx.dataframe.impl.api.chunkedImpl import org.jetbrains.kotlinx.dataframe.impl.asList import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType import org.jetbrains.kotlinx.dataframe.impl.commonType @@ -301,7 +301,7 @@ internal fun fromJsonListAnyColumns( else -> parsed.unwrapUnnamedColumns() - .chunked( + .chunkedImpl( startIndices = startIndices, name = ARRAY_COLUMN_NAME, // will be erased ) @@ -645,7 +645,7 @@ internal fun fromJsonListArrayAndValueColumns( ) } - else -> parsed.unwrapUnnamedColumns().chunked(startIndices, colName) + else -> parsed.unwrapUnnamedColumns().chunkedImpl(startIndices, colName) } UnnamedColumn(res) } From 1633e2c8b87bf57e833af43c0f1c17e3bfbea41c Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Mon, 21 Oct 2024 13:46:16 +0200 Subject: [PATCH 09/14] added tests and some small fixes for dataFrameOf constructors --- .../kotlinx/dataframe/api/constructors.kt | 53 ++++-- .../kotlinx/dataframe/api/constructors.kt | 171 ++++++++++++++++++ 2 files changed, 206 insertions(+), 18 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt index ff9023d3b6..13dd3919df 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt @@ -285,8 +285,10 @@ public fun dataFrameOf(vararg columns: AnyBaseCol): DataFrame<*> = dataFrameOf(c @Interpretable("DataFrameOf0") public fun dataFrameOf(vararg header: String): DataFrameBuilder = dataFrameOf(header.toList()) -public inline fun dataFrameOf(vararg header: String, fill: (String) -> Iterable): DataFrame<*> = - dataFrameOf(header.asIterable(), fill) +public inline fun dataFrameOf( + vararg header: String, + crossinline fill: (String) -> Iterable, +): DataFrame<*> = dataFrameOf(header.asIterable()).invoke(fill) public fun dataFrameOf(header: Iterable): DataFrameBuilder = DataFrameBuilder(header.asList()) @@ -300,9 +302,12 @@ public fun dataFrameOf(header: Iterable, values: Iterable): DataFr public inline fun dataFrameOf(header: Iterable, fill: (T) -> Iterable): DataFrame<*> = header.map { value -> - fill(value).asList().let { - DataColumn.createUnsafe(value.toString(), it) - } + createColumnGuessingType( + name = value.toString(), + values = fill(value).asList(), + suggestedType = typeOf(), + guessTypeWithSuggestedAsUpperbound = true, + ) }.toDataFrame() public fun dataFrameOf(header: CharProgression): DataFrameBuilder = dataFrameOf(header.map { it.toString() }) @@ -331,16 +336,19 @@ public class DataFrameBuilder(private val header: List) { public operator fun invoke(args: Sequence): DataFrame<*> = invoke(*args.toList().toTypedArray()) - public fun withColumns(columnBuilder: (String) -> AnyCol): DataFrame<*> = header.map(columnBuilder).toDataFrame() + public fun withColumns(columnBuilder: (String) -> AnyCol): DataFrame<*> = + header + .map { columnBuilder(it) named it } // create a columns and make sure to rename them to the given header + .toDataFrame() public inline operator fun invoke(crossinline valuesBuilder: (String) -> Iterable): DataFrame<*> = withColumns { name -> - valuesBuilder(name).let { - DataColumn.createUnsafe( - name = name, - values = it.asList(), - ) - } + createColumnGuessingType( + name = name, + values = valuesBuilder(name).asList(), + suggestedType = typeOf(), + guessTypeWithSuggestedAsUpperbound = true, + ) } public inline fun fill(nrow: Int, value: C): DataFrame<*> = @@ -352,30 +360,39 @@ public class DataFrameBuilder(private val header: List) { ) } + public fun fill(nrow: Int, dataFrame: AnyFrame): DataFrame<*> = + withColumns { name -> + DataColumn.createFrameColumn( + name = name, + groups = List(nrow) { dataFrame }, + schema = lazy { dataFrame.schema() }, + ) + } + public inline fun nulls(nrow: Int): DataFrame<*> = fill(nrow, null) public inline fun fillIndexed(nrow: Int, crossinline init: (Int, String) -> C): DataFrame<*> = withColumns { name -> - DataColumn.createUnsafe( - name, - List(nrow) { init(it, name) }, + DataColumn.createWithTypeInference( + name = name, + values = List(nrow) { init(it, name) }, ) } public inline fun fill(nrow: Int, crossinline init: (Int) -> C): DataFrame<*> = withColumns { name -> - DataColumn.createUnsafe( + DataColumn.createWithTypeInference( name = name, values = List(nrow, init), ) } - private inline fun fillNotNull(nrow: Int, crossinline init: (Int) -> C) = + private inline fun fillNotNull(nrow: Int, crossinline init: (Int) -> C & Any) = withColumns { name -> DataColumn.createValueColumn( name = name, values = List(nrow, init), - type = typeOf(), + type = typeOf().withNullability(false), ) } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt index 59da8454a0..125ff99ba6 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt @@ -149,4 +149,175 @@ class ConstructorsTests { } // endregion + + // region dataFrameOf + @Test + fun `dataFrameOf withColumns`() { + val df = dataFrameOf("value", "value2", "frameCol").withColumns { + when (it) { + "value" -> columnOf(1, 2, 3, null) + + "value2" -> columnOf( + columnOf(1, 2), + columnOf(3, 4), + columnOf(5, null), + null, + ) + + "frameCol" -> columnOf( + dataFrameOf("a", "b")(1, 2), + dataFrameOf("a", "b")(3, 4), + dataFrameOf("a", "b")(5, null), + null, + ) + + else -> error("Unexpected column name: $it") + } + } + + df["value"].type shouldBe typeOf() + df["value"].kind() shouldBe ColumnKind.Value + + df["value2"].type shouldBe typeOf?>() + df["value2"].kind() shouldBe ColumnKind.Value + + df["frameCol"].type shouldBe typeOf>() + df["frameCol"].kind() shouldBe ColumnKind.Frame + df["frameCol"].last() shouldBe DataFrame.empty() + } + + @Test + fun `dataFrameOf invoke`() { + val df1 = dataFrameOf("value", "value2", "frameCol") { + when (it) { + "value" -> listOf(1, 2, 3, null) + + "value2" -> listOf( + columnOf(1, 2), + columnOf(3, 4), + columnOf(5, null), + null, + ) + + "frameCol" -> listOf( + dataFrameOf("a", "b")(1, 2), + dataFrameOf("a", "b")(3, 4), + dataFrameOf("a", "b")(5, null), + null, + ) + + else -> error("Unexpected column name: $it") + } + } + + val df2 = dataFrameOf("value", "value2", "frameCol").invoke { + when (it) { + "value" -> listOf(1, 2, 3, null) + + "value2" -> listOf(columnOf(1, 2), columnOf(3, 4), columnOf(5, null), null) + + "frameCol" -> listOf( + dataFrameOf("a", "b")(1, 2), + dataFrameOf("a", "b")(3, 4), + dataFrameOf("a", "b")(5, null), + null, + ) + + else -> error("Unexpected column name: $it") + } + } + + val names = listOf("value", "value2", "frameCol") + val df3 = dataFrameOf(listOf(1, 2, 3)) { + when (it) { + 1 -> listOf(1, 2, 3, null) + + 2 -> listOf(columnOf(1, 2), columnOf(3, 4), columnOf(5, null), null) + + 3 -> listOf( + dataFrameOf("a", "b")(1, 2), + dataFrameOf("a", "b")(3, 4), + dataFrameOf("a", "b")(5, null), + null, + ) + + else -> error("Unexpected column name: $it") + } + }.rename { all() }.into { names[it.name.toInt() - 1] } + + val df4 = dataFrameOf(names).invoke { + when (it) { + "value" -> listOf(1, 2, 3, null) + + "value2" -> listOf(columnOf(1, 2), columnOf(3, 4), columnOf(5, null), null) + + "frameCol" -> listOf( + dataFrameOf("a", "b")(1, 2), + dataFrameOf("a", "b")(3, 4), + dataFrameOf("a", "b")(5, null), + null, + ) + + else -> error("Unexpected column name: $it") + } + } + + df1 shouldBe df2 + df2 shouldBe df3 + df3 shouldBe df4 + + df1["value"].type shouldBe typeOf() + df1["value"].kind() shouldBe ColumnKind.Value + + df1["value2"].type shouldBe typeOf?>() + df1["value2"].kind() shouldBe ColumnKind.Value + + df1["frameCol"].type shouldBe typeOf>() + df1["frameCol"].kind() shouldBe ColumnKind.Frame + df1["frameCol"].last() shouldBe DataFrame.empty() + } + + @Test + fun `dataFrameOf fill`() { + val df1 = dataFrameOf("a", "b").fill(2, "lol") + + df1["a"].values shouldBe listOf("lol", "lol") + df1["a"].kind() shouldBe ColumnKind.Value + df1["b"].values shouldBe listOf("lol", "lol") + df1["b"].kind() shouldBe ColumnKind.Value + + val df2 = dataFrameOf("a", "b").fill(2, dataFrameOf("a", "b")(1, 2)) + df2["a"].type() shouldBe typeOf>() + df2["a"].kind() shouldBe ColumnKind.Frame + df2["b"].type() shouldBe typeOf>() + df2["b"].kind() shouldBe ColumnKind.Frame + + val df3 = dataFrameOf("a", "b").fill(2) { it } + df3["a"].values shouldBe listOf(0, 1) + df3["a"].kind() shouldBe ColumnKind.Value + df3["b"].values shouldBe listOf(0, 1) + df3["b"].kind() shouldBe ColumnKind.Value + + val df4 = dataFrameOf("a", "b").fill(2) { dataFrameOf("a", "b")(1, 2) } + df4["a"].type() shouldBe typeOf>() + df4["a"].kind() shouldBe ColumnKind.Frame + df4["b"].type() shouldBe typeOf>() + df4["b"].kind() shouldBe ColumnKind.Frame + + val a = listOf(1, 2) + val b = listOf(dataFrameOf("a", "b")(1, 2), null) + val df5 = dataFrameOf("a", "b").fillIndexed(2) { it, colName -> + when (colName) { + "a" -> a[it] + "b" -> b[it] + else -> error("Unexpected column name: $colName") + } + } + df5["a"].values shouldBe a + df5["a"].kind() shouldBe ColumnKind.Value + df5["b"].values shouldBe listOf(b[0], DataFrame.empty()) + df5["b"].kind() shouldBe ColumnKind.Frame + } + + // endregion } From dfaf46e49643fb5ffd75bf5629f92efde35703f7 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Fri, 25 Oct 2024 17:07:45 +0200 Subject: [PATCH 10/14] refactored suggestedType+guessTypeWithSuggestedAsUpperbound into TypeSuggestion, working on feedback --- .../jetbrains/kotlinx/dataframe/DataColumn.kt | 17 ++--- .../kotlinx/dataframe/api/constructors.kt | 17 ++--- .../kotlinx/dataframe/api/inferType.kt | 8 ++- ...{TypeConversions.kt => typeConversions.kt} | 8 ++- .../dataframe/columns/TypeSuggestion.kt | 39 ++++++++++ .../dataframe/impl/aggregation/getColumns.kt | 4 +- .../kotlinx/dataframe/impl/api/concat.kt | 4 +- .../kotlinx/dataframe/impl/api/parse.kt | 14 +++- .../kotlinx/dataframe/impl/api/toDataFrame.kt | 2 +- .../dataframe/impl/columns/constructors.kt | 72 +++++++++---------- .../kotlinx/dataframe/impl/io/readJson.kt | 7 +- 11 files changed, 119 insertions(+), 73 deletions(-) rename core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/{TypeConversions.kt => typeConversions.kt} (96%) create mode 100644 core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/TypeSuggestion.kt diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt index cb0fcad5a2..a3714ec3d7 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt @@ -15,6 +15,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnPath import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath import org.jetbrains.kotlinx.dataframe.columns.FrameColumn +import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion import org.jetbrains.kotlinx.dataframe.columns.ValueColumn import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnGroupImpl import org.jetbrains.kotlinx.dataframe.impl.columns.FrameColumnImpl @@ -73,11 +74,10 @@ public interface DataColumn : BaseColumn { /** * Creates [ValueColumn] using given [name], [values] and reified column [type]. * - * Be careful; values are NOT checked to adhere to [type] for efficiency, + * The column [type] will be defined at compile-time using [T] argument. + * Be careful with casting; values are NOT checked to adhere to `reified` type [T] for efficiency, * unless you specify [infer]. * - * Note, that column [type] will be defined at compile-time using [T] argument - * * @param T type of the column * @param name name of the column * @param values list of column values @@ -114,7 +114,8 @@ public interface DataColumn : BaseColumn { /** * Creates [FrameColumn] using the given [name] and list of dataframes [groups]. * - * Be careful; [groups] must be a non-null list of [DataFrames][DataFrame]. + * [groups] must be a non-null list of [DataFrames][DataFrame], as [FrameColumn] does + * not allow `null` values. * This is NOT checked at runtime for efficiency, nor is the validity of given [schema]. * * @param name name of the frame column @@ -141,21 +142,21 @@ public interface DataColumn : BaseColumn { * * @param name name of the column * @param values the values to represent each row in the column + * @param suggestedType optional suggested type for values. Default is [TypeSuggestion.Infer]. + * See [TypeSuggestion] for more information. * @param nullable optionally you can specify whether [values] contains nulls, if `null` it is inferred. - * @param allColsMakesColGroup if `true`, then, if all values are non-null same-sized columns, - * a column group will be created instead of a [DataColumn][DataColumn]`<`[AnyCol][AnyCol]`>`. */ public fun createWithTypeInference( name: String, values: List, + suggestedType: TypeSuggestion = TypeSuggestion.Infer, nullable: Boolean? = null, - allColsMakesColGroup: Boolean = false, ): DataColumn = createColumnGuessingType( name = name, values = values, + suggestedType = suggestedType, nullable = nullable, - allColsMakesColGroup = allColsMakesColGroup, ) /** diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt index 13dd3919df..9b40ee4df2 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt @@ -15,6 +15,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor import org.jetbrains.kotlinx.dataframe.columns.ColumnPath import org.jetbrains.kotlinx.dataframe.columns.ColumnReference import org.jetbrains.kotlinx.dataframe.columns.FrameColumn +import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion import org.jetbrains.kotlinx.dataframe.exceptions.DuplicateColumnNamesException import org.jetbrains.kotlinx.dataframe.exceptions.UnequalColumnSizesException import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator @@ -225,8 +226,7 @@ public class ColumnDelegate(private val parent: ColumnGroupReference? = null) public inline fun columnOf(vararg values: T): DataColumn = createColumnGuessingType( values = values.asIterable(), - suggestedType = typeOf(), - guessTypeWithSuggestedAsUpperbound = true, + suggestedType = TypeSuggestion.InferWithUpperbound(typeOf()), listifyValues = false, allColsMakesColGroup = true, ).forceResolve() @@ -252,8 +252,7 @@ public fun columnOf(frames: Iterable>): FrameColumn = public inline fun column(values: Iterable): DataColumn = createColumnGuessingType( values = values, - suggestedType = typeOf(), - guessTypeWithSuggestedAsUpperbound = false, + suggestedType = TypeSuggestion.Use(typeOf()), allColsMakesColGroup = true, ).forceResolve() @@ -305,8 +304,7 @@ public inline fun dataFrameOf(header: Iterable, fill: (T) -> I createColumnGuessingType( name = value.toString(), values = fill(value).asList(), - suggestedType = typeOf(), - guessTypeWithSuggestedAsUpperbound = true, + suggestedType = TypeSuggestion.InferWithUpperbound(typeOf()), ) }.toDataFrame() @@ -346,8 +344,7 @@ public class DataFrameBuilder(private val header: List) { createColumnGuessingType( name = name, values = valuesBuilder(name).asList(), - suggestedType = typeOf(), - guessTypeWithSuggestedAsUpperbound = true, + suggestedType = TypeSuggestion.InferWithUpperbound(typeOf()), ) } @@ -387,12 +384,12 @@ public class DataFrameBuilder(private val header: List) { ) } - private inline fun fillNotNull(nrow: Int, crossinline init: (Int) -> C & Any) = + private inline fun fillNotNull(nrow: Int, crossinline init: (Int) -> C) = withColumns { name -> DataColumn.createValueColumn( name = name, values = List(nrow, init), - type = typeOf().withNullability(false), + type = typeOf(), ) } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/inferType.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/inferType.kt index 96a66253a0..140b6b3a61 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/inferType.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/inferType.kt @@ -5,12 +5,18 @@ import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion import org.jetbrains.kotlinx.dataframe.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType import org.jetbrains.kotlinx.dataframe.type import kotlin.reflect.KProperty -public fun AnyCol.inferType(): DataColumn<*> = createColumnGuessingType(name, toList(), type, true) +public fun AnyCol.inferType(): DataColumn<*> = + createColumnGuessingType( + name = name, + values = toList(), + suggestedType = TypeSuggestion.InferWithUpperbound(type), + ) // region DataFrame diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/TypeConversions.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/typeConversions.kt similarity index 96% rename from core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/TypeConversions.kt rename to core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/typeConversions.kt index 41e9802066..de78382079 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/TypeConversions.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/typeConversions.kt @@ -233,14 +233,14 @@ public inline fun Iterable.toValueColumn(column: KProperty): V public enum class Infer { /** - * Use reified type argument of an inline [DataFrame] operation as [DataColumn.type]. + * Use `reified` type argument of an inline [DataFrame] operation as [DataColumn.type]. * * This is the most efficient but least safe option. */ None, /** - * Use reified type argument of an inline [DataFrame] operation as [DataColumn.type], + * Use `reified` type argument of an inline [DataFrame] operation as [DataColumn.type], * but compute [DataColumn.hasNulls] by checking column [DataColumn.values] for an actual presence of `null` values. */ Nulls, @@ -250,6 +250,10 @@ public enum class Infer { * base type as an upper bound. * * This is the least efficient but safest option. + * + * It's useful, for instance, + * if you have a column of type `Any?` and want its schema type to be inferred based on the actual values. + * In many cases, letting the library infer by `reified` types is enough and more efficient. */ Type, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/TypeSuggestion.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/TypeSuggestion.kt new file mode 100644 index 0000000000..66b37d4ade --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/TypeSuggestion.kt @@ -0,0 +1,39 @@ +package org.jetbrains.kotlinx.dataframe.columns + +import kotlin.reflect.KType + +/** + * The suggestion of how to find a column type. + * + * The suggestion can either be: + * + * - [Infer] - {@include [Infer]} + * - [InferWithUpperbound] - {@include [InferWithUpperbound]} + * - [Use] - {@include [Use]} + * + * It can be either an [exact type][Use] or an [upper bound][InferWithUpperbound] of possible types + * after which the library will infer the exact type. + */ +public sealed interface TypeSuggestion { + + public companion object { + + /** Creates a new [TypeSuggestion] instance based on the given parameters. */ + public fun create(suggestedType: KType?, guessType: Boolean): TypeSuggestion = + when { + suggestedType != null && guessType -> InferWithUpperbound(suggestedType) + suggestedType != null && !guessType -> Use(suggestedType) + suggestedType == null && guessType -> Infer + else -> error("Cannot create TypeSuggestion with no suggested type and no guessing allowed.") + } + } + + /** The library will try to infer the type by checking all the values. */ + public data object Infer : TypeSuggestion + + /** The library will infer the type by checking all the values taking a given upper bound into account. */ + public data class InferWithUpperbound(val upperbound: KType) : TypeSuggestion + + /** The library will use the specified type without inference. */ + public data class Use(val type: KType) : TypeSuggestion +} diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/getColumns.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/getColumns.kt index 22480fbad7..0f244fec4d 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/getColumns.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/getColumns.kt @@ -7,6 +7,7 @@ import org.jetbrains.kotlinx.dataframe.aggregation.NamedValue import org.jetbrains.kotlinx.dataframe.api.filter import org.jetbrains.kotlinx.dataframe.api.isComparable import org.jetbrains.kotlinx.dataframe.api.isNumber +import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType internal inline fun Aggregatable.remainingColumns( @@ -22,7 +23,6 @@ internal fun NamedValue.toColumnWithPath() = path to createColumnGuessingType( name = path.last(), values = listOf(value), - suggestedType = type, - guessTypeWithSuggestedAsUpperbound = guessType, + suggestedType = TypeSuggestion.create(type, guessType), defaultValue = default, ) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/concat.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/concat.kt index f63a400758..48e03407e8 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/concat.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/concat.kt @@ -8,6 +8,7 @@ import org.jetbrains.kotlinx.dataframe.api.cast import org.jetbrains.kotlinx.dataframe.api.dataFrameOf import org.jetbrains.kotlinx.dataframe.api.emptyDataFrame import org.jetbrains.kotlinx.dataframe.api.isColumnGroup +import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion import org.jetbrains.kotlinx.dataframe.hasNulls import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType import org.jetbrains.kotlinx.dataframe.impl.commonType @@ -76,8 +77,7 @@ internal fun concatImpl(name: String, columns: List?>, columnS return createColumnGuessingType( name = name, values = list, - suggestedType = tartypeOf, - guessTypeWithSuggestedAsUpperbound = guessType, + suggestedType = TypeSuggestion.create(tartypeOf, guessType), defaultValue = defaultValue, ).cast() } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt index d3839c4f1e..19428518db 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt @@ -27,12 +27,12 @@ import org.jetbrains.kotlinx.dataframe.api.isFrameColumn import org.jetbrains.kotlinx.dataframe.api.isSubtypeOf import org.jetbrains.kotlinx.dataframe.api.toColumn import org.jetbrains.kotlinx.dataframe.api.tryParse +import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion import org.jetbrains.kotlinx.dataframe.columns.size import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException import org.jetbrains.kotlinx.dataframe.hasNulls import org.jetbrains.kotlinx.dataframe.impl.canParse import org.jetbrains.kotlinx.dataframe.impl.catchSilent -import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType import org.jetbrains.kotlinx.dataframe.impl.createStarProjectedType import org.jetbrains.kotlinx.dataframe.impl.javaDurationCanParse import org.jetbrains.kotlinx.dataframe.io.isURL @@ -529,7 +529,11 @@ internal fun DataColumn.tryParseImpl(options: ParserOptions?): DataColu // Create a new column with the parsed values, // createColumnGuessingType is used to handle unifying values if needed - return createColumnGuessingType(name(), parsedValues, type) + return DataColumn.createWithTypeInference( + name = name(), + values = parsedValues, + suggestedType = TypeSuggestion.Use(type), + ) } internal fun DataColumn.parse(parser: StringParser, options: ParserOptions?): DataColumn { @@ -539,7 +543,11 @@ internal fun DataColumn.parse(parser: StringParser, options: Par handler(it.trim()) ?: throw IllegalStateException("Couldn't parse '$it' into type ${parser.type}") } } - return createColumnGuessingType(name(), parsedValues, parser.type.withNullability(hasNulls)) + return DataColumn.createWithTypeInference( + name = name(), + values = parsedValues, + suggestedType = TypeSuggestion.Use(parser.type.withNullability(hasNulls)), + ) } internal fun DataFrame.parseImpl(options: ParserOptions?, columns: ColumnsSelector): DataFrame { diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toDataFrame.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toDataFrame.kt index 87fe03866b..6219a40ab5 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toDataFrame.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toDataFrame.kt @@ -263,7 +263,7 @@ internal fun convertToDataFrame( val shouldCreateColumnGroup = kClass == DataRow::class when { - hasExceptions -> DataColumn.createWithTypeInference(it.columnName, values, nullable) + hasExceptions -> DataColumn.createWithTypeInference(it.columnName, values, nullable = nullable) shouldCreateValueCol -> DataColumn.createValueColumn( diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt index 57c88f0316..d4a89cc10f 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt @@ -28,6 +28,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext import org.jetbrains.kotlinx.dataframe.columns.ColumnSet import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath import org.jetbrains.kotlinx.dataframe.columns.ColumnsResolver +import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion import org.jetbrains.kotlinx.dataframe.columns.ValueColumn import org.jetbrains.kotlinx.dataframe.columns.toColumnsSetOf import org.jetbrains.kotlinx.dataframe.impl.DataFrameReceiver @@ -181,10 +182,8 @@ internal fun Array.toNumberColumns() = toColumnsSetOf() * some conversions to unify the values if necessary. * * @param values values to create a column from - * @param suggestedType optional suggested type for values. - * If set to `null` (default) the type will be inferred. - * @param guessTypeWithSuggestedAsUpperbound Only relevant when [suggestedType]` != null`. - * If `true`, type inference will happen with the given [suggestedType] as the supertype. + * @param suggestedType optional suggested type for values. Default is [TypeSuggestion.Infer]. + * See [TypeSuggestion] for more information. * @param defaultValue optional default value for the column used when a [ValueColumn] is created. * @param nullable optional hint for the column nullability, used when a [ValueColumn] is created. * @param listifyValues if `true`, then values and nulls will be wrapped in a list if they appear among other lists. @@ -196,8 +195,7 @@ internal fun Array.toNumberColumns() = toColumnsSetOf() @PublishedApi internal fun createColumnGuessingType( values: Iterable, - suggestedType: KType? = null, - guessTypeWithSuggestedAsUpperbound: Boolean = false, + suggestedType: TypeSuggestion = TypeSuggestion.Infer, defaultValue: T? = null, nullable: Boolean? = null, listifyValues: Boolean = false, @@ -207,7 +205,6 @@ internal fun createColumnGuessingType( name = "", values = values, suggestedType = suggestedType, - guessTypeWithSuggestedAsUpperbound = guessTypeWithSuggestedAsUpperbound, defaultValue = defaultValue, nullable = nullable, listifyValues = listifyValues, @@ -222,30 +219,29 @@ internal fun createColumnGuessingType( internal fun createColumnGuessingType( name: String, values: Iterable, - suggestedType: KType? = null, - guessTypeWithSuggestedAsUpperbound: Boolean = false, + suggestedType: TypeSuggestion = TypeSuggestion.Infer, defaultValue: T? = null, nullable: Boolean? = null, listifyValues: Boolean = false, allColsMakesColGroup: Boolean = false, ): DataColumn { - val detectType = suggestedType == null || guessTypeWithSuggestedAsUpperbound - val type = if (detectType) { - guessValueType( - values = values.asSequence(), - upperBound = suggestedType, - listifyValues = listifyValues, - allColsMakesRow = allColsMakesColGroup, - ) - } else { - suggestedType!! + val type = when (suggestedType) { + is TypeSuggestion.Infer, is TypeSuggestion.InferWithUpperbound -> + guessValueType( + values = values.asSequence(), + upperBound = (suggestedType as? TypeSuggestion.InferWithUpperbound)?.upperbound, + listifyValues = listifyValues, + allColsMakesRow = allColsMakesColGroup, + ) + + is TypeSuggestion.Use -> suggestedType.type } return when (type.classifier!! as KClass<*>) { + // guessValueType can only return DataRow if all values are `AnyRow?` + // or allColsMakesColGroup == true, all values are `AnyCol`, and they all have the same size DataRow::class -> { - // guessValueType can only return DataRow if all values are AnyRow? - // or all are AnyCol and they all have the same size - if (values.firstOrNull() is AnyCol) { + if (allColsMakesColGroup && values.firstOrNull() is AnyCol) { val df = dataFrameOf(values as Iterable) DataColumn.createColumnGroup(name, df) } else { @@ -301,23 +297,19 @@ internal fun createColumnGuessingType( } } - else -> { - if (nullable == null) { - DataColumn.createValueColumn( - name = name, - values = values.asList(), - type = type, - infer = if (detectType) Infer.None else Infer.Nulls, - defaultValue = defaultValue, - ) - } else { - DataColumn.createValueColumn( - name = name, - values = values.asList(), - type = type.withNullability(nullable), - defaultValue = defaultValue, - ) - } - } + else -> + DataColumn.createValueColumn( + name = name, + values = values.asList(), + type = if (nullable != null) type.withNullability(nullable) else type, + infer = when { + // even though an exact type is suggested, + // nullable is not given, so we still infer nullability + nullable == null && suggestedType is TypeSuggestion.Use -> Infer.Nulls + + // nullability already inferred by guessValueType + else -> Infer.None + }, + ) } } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt index d95f20b08b..6a1547dcb9 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt @@ -35,6 +35,7 @@ import org.jetbrains.kotlinx.dataframe.api.splitInto import org.jetbrains.kotlinx.dataframe.api.toDataFrame import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.FrameColumn +import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator import org.jetbrains.kotlinx.dataframe.impl.DataCollectorBase import org.jetbrains.kotlinx.dataframe.impl.api.chunkedImpl @@ -338,8 +339,7 @@ internal fun fromJsonListAnyColumns( columnOf(*map.keys.toTypedArray()).named(KeyValueProperty<*>::key.name), createColumnGuessingType( values = map.values, - suggestedType = valueType, - guessTypeWithSuggestedAsUpperbound = false, + suggestedType = TypeSuggestion.Use(valueType), ).named(KeyValueProperty<*>::value.name), ) } @@ -521,8 +521,7 @@ internal fun fromJsonListArrayAndValueColumns( columnOf(*map.keys.toTypedArray()).named(KeyValueProperty<*>::key.name), createColumnGuessingType( values = map.values, - suggestedType = valueType, - guessTypeWithSuggestedAsUpperbound = false, + suggestedType = TypeSuggestion.Use(valueType), ).named(KeyValueProperty<*>::value.name), ) } From 222086a78787f7d1ee5d31a59db3574aa34379ec Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Mon, 28 Oct 2024 12:43:55 +0100 Subject: [PATCH 11/14] forgot defaultValue in createColumnGuessingType(), removed column size check from guessing type. May be unexpected behavior. Fixed tests --- .../kotlinx/dataframe/api/constructors.kt | 4 +- .../dataframe/columns/TypeSuggestion.kt | 3 +- .../kotlinx/dataframe/impl/TypeUtils.kt | 46 ++++++++----------- .../dataframe/impl/columns/constructors.kt | 10 +++- .../kotlinx/dataframe/api/constructors.kt | 29 ++++++------ .../kotlinx/dataframe/types/UtilTests.kt | 2 +- 6 files changed, 46 insertions(+), 48 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt index 9b40ee4df2..3824093e93 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt @@ -301,7 +301,7 @@ public fun dataFrameOf(header: Iterable, values: Iterable): DataFr public inline fun dataFrameOf(header: Iterable, fill: (T) -> Iterable): DataFrame<*> = header.map { value -> - createColumnGuessingType( + DataColumn.createWithTypeInference( name = value.toString(), values = fill(value).asList(), suggestedType = TypeSuggestion.InferWithUpperbound(typeOf()), @@ -341,7 +341,7 @@ public class DataFrameBuilder(private val header: List) { public inline operator fun invoke(crossinline valuesBuilder: (String) -> Iterable): DataFrame<*> = withColumns { name -> - createColumnGuessingType( + DataColumn.createWithTypeInference( name = name, values = valuesBuilder(name).asList(), suggestedType = TypeSuggestion.InferWithUpperbound(typeOf()), diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/TypeSuggestion.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/TypeSuggestion.kt index 66b37d4ade..ad191ed2f6 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/TypeSuggestion.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/TypeSuggestion.kt @@ -23,8 +23,7 @@ public sealed interface TypeSuggestion { when { suggestedType != null && guessType -> InferWithUpperbound(suggestedType) suggestedType != null && !guessType -> Use(suggestedType) - suggestedType == null && guessType -> Infer - else -> error("Cannot create TypeSuggestion with no suggested type and no guessing allowed.") + else -> Infer // no type was suggested, so we need to guess, no matter what guessType is } } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt index 4398557db2..621e3f318e 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt @@ -392,7 +392,7 @@ internal fun getValuesType(values: List, type: KType, infer: Infer): KTyp * @param listifyValues if true, then values and nulls will be wrapped in a list if they appear among other lists. * For example: `[1, null, listOf(1, 2, 3)]` will become `List` instead of `Any?` * Note: this parameter is ignored if another [Collection] is present in the values. - * @param allColsMakesRow if true, then, if all values are non-null same-sized columns, we assume + * @param allColsMakesRow if true, then, if all values are non-null columns, we assume * that a column group should be created instead of a [DataColumn][DataColumn]`<`[AnyCol][AnyCol]`>`, * so the function will return [DataRow]. */ @@ -409,7 +409,6 @@ internal fun guessValueType( var hasFrames = false var hasRows = false var hasCols = false - val colSizes = mutableListOf() var hasList = false var allListsAreEmpty = true val classesInCollection = mutableSetOf>() @@ -423,10 +422,7 @@ internal fun guessValueType( is AnyFrame -> hasFrames = true - is AnyCol -> { - hasCols = true - colSizes += it.size() - } + is AnyCol -> hasCols = true is List<*> -> { hasList = true @@ -491,13 +487,7 @@ internal fun guessValueType( hasRows && !hasFrames && !hasList && !hasCols -> DataRow::class.createStarProjectedType(false) - allColsMakesRow && - hasCols && - !hasFrames && - !hasList && - !hasRows && - !hasNulls && - colSizes.distinct().size == 1 -> + allColsMakesRow && hasCols && !hasFrames && !hasList && !hasRows && !hasNulls -> DataRow::class.createStarProjectedType(false) collectionClasses.isNotEmpty() && !hasFrames && !hasRows && !hasCols -> { @@ -509,24 +499,24 @@ internal fun guessValueType( } } if (hasList) collectionClasses.add(List::class) - (commonParent(collectionClasses) ?: Collection::class) - .createTypeWithArgument( - classesInCollection.commonType( - nullable = nullsInCollection, - upperBound = elementType ?: nothingType(nullable = nullsInCollection), - ), - ).withNullability(hasNulls) + (commonParent(collectionClasses) ?: Collection::class).createTypeWithArgument( + argument = classesInCollection.commonType( + nullable = nullsInCollection, + upperBound = elementType ?: nothingType(nullable = nullsInCollection), + ), + ).withNullability(hasNulls) } hasList && collectionClasses.isEmpty() && !hasFrames && !hasRows && !hasCols -> { - val elementType = upperBound?.let { if (it.jvmErasure == List::class) it.arguments[0].type else null } - List::class - .createTypeWithArgument( - classesInCollection.commonType( - nullable = nullsInCollection, - upperBound = elementType ?: nothingType(nullable = nullsInCollection), - ), - ).withNullability(hasNulls && !listifyValues) + val elementType = upperBound?.let { + if (it.jvmErasure == List::class) it.arguments[0].type else null + } + List::class.createTypeWithArgument( + argument = classesInCollection.commonType( + nullable = nullsInCollection, + upperBound = elementType ?: nothingType(nullable = nullsInCollection), + ), + ).withNullability(hasNulls && !listifyValues) } else -> { diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt index d4a89cc10f..3643eee3c6 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt @@ -239,7 +239,7 @@ internal fun createColumnGuessingType( return when (type.classifier!! as KClass<*>) { // guessValueType can only return DataRow if all values are `AnyRow?` - // or allColsMakesColGroup == true, all values are `AnyCol`, and they all have the same size + // or allColsMakesColGroup == true, all values are `AnyCol` DataRow::class -> { if (allColsMakesColGroup && values.firstOrNull() is AnyCol) { val df = dataFrameOf(values as Iterable) @@ -293,7 +293,12 @@ internal fun createColumnGuessingType( } DataColumn.createFrameColumn(name, frames).cast() } else { - DataColumn.createValueColumn(name, lists, type, defaultValue = defaultValue).cast() + DataColumn.createValueColumn( + name = name, + values = lists, + type = type, + defaultValue = defaultValue, + ).cast() } } @@ -310,6 +315,7 @@ internal fun createColumnGuessingType( // nullability already inferred by guessValueType else -> Infer.None }, + defaultValue = defaultValue, ) } } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt index 2c6f033e16..36a79170c0 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt @@ -9,6 +9,7 @@ import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.ColumnKind +import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion.InferWithUpperbound import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType import org.jetbrains.kotlinx.dataframe.impl.nothingType import org.jetbrains.kotlinx.dataframe.type @@ -49,7 +50,10 @@ class ConstructorsTests { @Test fun `guess column group from rows`() { val row = dataFrameOf("a", "b")(1, 2).single() - val col = createColumnGuessingType(listOf(row, DataRow.empty), typeOf(), true) + val col = createColumnGuessingType( + values = listOf(row, DataRow.empty), + suggestedType = InferWithUpperbound(typeOf()), + ) col shouldBe columnOf(row, DataRow.empty) col.hasNulls() shouldBe false @@ -62,7 +66,10 @@ class ConstructorsTests { @Test fun `guess column group from rows with null`() { val row = dataFrameOf("a", "b")(1, 2).single() - val col = createColumnGuessingType(listOf(row, DataRow.empty, null), typeOf(), true) + val col = createColumnGuessingType( + values = listOf(row, DataRow.empty, null), + suggestedType = InferWithUpperbound(typeOf()), + ) col shouldBe columnOf(row, DataRow.empty, null) col.hasNulls() shouldBe false @@ -79,8 +86,7 @@ class ConstructorsTests { val col2 = columnOf("a", "b") val col = createColumnGuessingType( values = listOf(col1, col2), - suggestedType = typeOf(), - guessTypeWithSuggestedAsUpperbound = true, + suggestedType = InferWithUpperbound(typeOf()), allColsMakesColGroup = true, ) col shouldBe columnOf(col1, col2) @@ -99,9 +105,8 @@ class ConstructorsTests { val col1 = columnOf(1, 2) val col2 = columnOf("a", "b") val col = createColumnGuessingType( - listOf(col1, col2, null), - typeOf(), - true, + values = listOf(col1, col2, null), + suggestedType = InferWithUpperbound(typeOf()), ) col.values shouldBe columnOf(col1, col2, null).values @@ -118,9 +123,8 @@ class ConstructorsTests { val df1 = dataFrameOf("a", "b")(1, 2) val df2 = dataFrameOf("a", "b")(3, 4) val col = createColumnGuessingType( - listOf(df1, df2, null), - typeOf(), - true, + values = listOf(df1, df2, null), + suggestedType = InferWithUpperbound(typeOf()), ) col.values shouldBe columnOf(df1, df2, null).values @@ -135,9 +139,8 @@ class ConstructorsTests { @Test fun `guess value column from nulls`() { val col = createColumnGuessingType( - listOf(null, null), - nothingType(true), - true, + values = listOf(null, null), + suggestedType = InferWithUpperbound(nothingType(true)), ) col.values shouldBe columnOf(null, null).values diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/types/UtilTests.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/types/UtilTests.kt index 436aa2aa0f..57bcd7b78b 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/types/UtilTests.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/types/UtilTests.kt @@ -158,7 +158,7 @@ class UtilTests { guessValueType( sequenceOf(DataColumn.empty(), columnOf(1)), allColsMakesRow = true, - ) shouldBe typeOf>() + ) shouldBe typeOf>() guessValueType( sequenceOf(columnOf("a"), columnOf(1)), From baaa0146176167f750e179430b4ea2ca70b2d3b8 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Mon, 28 Oct 2024 13:25:26 +0100 Subject: [PATCH 12/14] clarifying some docs based on feedback --- .../jetbrains/kotlinx/dataframe/DataColumn.kt | 37 +++++++++++-------- .../kotlinx/dataframe/impl/TypeUtils.kt | 8 +++- .../dataframe/impl/columns/constructors.kt | 7 ++-- 3 files changed, 32 insertions(+), 20 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt index a3714ec3d7..6ac5390516 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt @@ -132,13 +132,18 @@ public interface DataColumn : BaseColumn { /** * Creates either a [FrameColumn], [ColumnGroup], or [ValueColumn] by analyzing each value in * [values]. + * * This is safer but less efficient than the other functions. * - * Some conversions are done automatically to attempt to unify the values, like: - * - `null` -> [DataFrame.empty][DataFrame.empty]`()` and [DataRow] -> single-row [DataFrame] when there are other - * [DataFrames][DataFrame] present in [values] - * - [List][List]`<`[DataRow][DataRow]`<*>>` -> [DataFrame] - * etc. + * Some conversions are done automatically to attempt to unify the values. + * + * For instance, when there are other [DataFrames][DataFrame] present in [values], we'll convert: + * - `null` -> [DataFrame.empty]`()` + * - [DataRow] -> single-row [DataFrame] + * - [List][List]`<`[DataRow][DataRow]`<*>>` -> multi-row [DataFrame] + * + * to be able to create a [FrameColumn]. + * There are more conversions for other types as well. * * @param name name of the column * @param values the values to represent each row in the column @@ -163,12 +168,12 @@ public interface DataColumn : BaseColumn { * Calls [createColumnGroup], [createFrameColumn], or [createValueColumn] based on * [type]. * + * This may be unsafe but is more efficient than [createWithTypeInference]. + * * Be careful; Values in [values] are NOT checked to adhere to the given [type], nor - * do we check whether there are nulls among the values when the given type is [DataFrame] - * (a [FrameColumn] cannot contain `null`, this causes runtime exceptions). - * When [type] is `DataFrame<*>?`, a [ValueColumn] is created to avoid this issue. + * do we check whether there are unexpected nulls among the values. * - * This may be unsafe but is more efficient than [createWithTypeInference]. + * It's recommended to use [createValueColumn], [createColumnGroup], and [createFrameColumn] instead. * * @param name the name of the column * @param values the values to represent each row in the column @@ -181,9 +186,11 @@ public interface DataColumn : BaseColumn { type: KType, infer: Infer = Infer.None, ): DataColumn = - when (type.toColumnKind()) { + when (type.toColumnKind()) { // AnyFrame -> Frame, AnyRow? -> Group, else -> Value ColumnKind.Value -> createValueColumn(name, values, type, infer) + ColumnKind.Group -> createColumnGroup(name, (values as List).concat()).asDataColumn().cast() + ColumnKind.Frame -> createFrameColumn(name, values as List).asDataColumn().cast() } @@ -191,12 +198,12 @@ public interface DataColumn : BaseColumn { * Calls [createColumnGroup], [createFrameColumn], or [createValueColumn] based on * type [T]. * - * Be careful; Values in [values] are NOT checked to adhere to the given [type], nor - * do we check whether there are nulls among the values when the given type is [DataFrame] - * (a [FrameColumn] cannot contain `null`, this causes runtime exceptions). - * When [type] is `DataFrame<*>?`, a [ValueColumn] is created to avoid this issue. + * This is generally safe, as [T] can be inferred, and more efficient than [createWithTypeInference]. * - * This may be unsafe but is more efficient than [createWithTypeInference]. + * Be careful when casting occurs; Values in [values] are NOT checked to adhere to the given/inferred type [T], + * nor do we check whether there are unexpected nulls among the values. + * + * It's recommended to use [createValueColumn], [createColumnGroup], and [createFrameColumn] instead. * * @param T the (unchecked) common type of [values] * @param name the name of the column diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt index 621e3f318e..48437b7f72 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt @@ -9,6 +9,7 @@ import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.api.Infer +import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType import kotlin.reflect.KClass import kotlin.reflect.KType import kotlin.reflect.KTypeParameter @@ -385,7 +386,12 @@ internal fun getValuesType(values: List, type: KType, infer: Infer): KTyp } /** - * Returns the value type of the given [values] sequence. + * Returns the guessed value type of the given [values] sequence. + * + * This function analyzes all [values] once and returns the expected column type. + * + * The resulting column type may need [values] to be converted to the expected type. + * See [createColumnGuessingType] for how to create a column with the guessed type. * * @param values the values to guess the type from * @param upperBound the upper bound of the type to guess diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt index 3643eee3c6..35ea3ac31c 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt @@ -239,8 +239,8 @@ internal fun createColumnGuessingType( return when (type.classifier!! as KClass<*>) { // guessValueType can only return DataRow if all values are `AnyRow?` - // or allColsMakesColGroup == true, all values are `AnyCol` - DataRow::class -> { + // or allColsMakesColGroup == true, and all values are `AnyCol` + DataRow::class -> if (allColsMakesColGroup && values.firstOrNull() is AnyCol) { val df = dataFrameOf(values as Iterable) DataColumn.createColumnGroup(name, df) @@ -250,7 +250,6 @@ internal fun createColumnGuessingType( }.concat() DataColumn.createColumnGroup(name, df) }.asDataColumn().cast() - } DataFrame::class -> { val frames = values.map { @@ -312,7 +311,7 @@ internal fun createColumnGuessingType( // nullable is not given, so we still infer nullability nullable == null && suggestedType is TypeSuggestion.Use -> Infer.Nulls - // nullability already inferred by guessValueType + // nullability already handled; inferred by guessValueType or explicitly given else -> Infer.None }, defaultValue = defaultValue, From 1181d57384666ecd4d5a2f94ae8c6374771a6762 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Wed, 30 Oct 2024 14:40:57 +0100 Subject: [PATCH 13/14] renamed `DataColumn.createX()` functions and handled other feedback --- .../jetbrains/kotlinx/dataframe/DataColumn.kt | 15 ++++++++------- .../kotlinx/dataframe/api/constructors.kt | 10 +++++----- .../org/jetbrains/kotlinx/dataframe/api/map.kt | 8 ++++---- .../org/jetbrains/kotlinx/dataframe/api/sort.kt | 2 +- .../kotlinx/dataframe/api/toDataFrame.kt | 8 ++++---- .../jetbrains/kotlinx/dataframe/api/transpose.kt | 4 ++-- .../kotlinx/dataframe/api/typeConversions.kt | 10 +++++----- .../kotlinx/dataframe/api/valueCounts.kt | 4 ++-- .../kotlinx/dataframe/impl/api/joinWith.kt | 2 +- .../kotlinx/dataframe/impl/api/parse.kt | 4 ++-- .../kotlinx/dataframe/impl/api/toDataFrame.kt | 4 ++-- .../dataframe/impl/columns/constructors.kt | 6 +++--- .../kotlinx/dataframe/columns/DataColumns.kt | 16 +++++++++------- .../kotlinx/dataframe/io/ParserTests.kt | 3 ++- .../org/jetbrains/kotlinx/dataframe/io/xlsx.kt | 2 +- .../dataframe/plugin/impl/DataFrameAdapter.kt | 2 +- 16 files changed, 52 insertions(+), 48 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt index 6ac5390516..64d7d9c1b9 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt @@ -133,7 +133,7 @@ public interface DataColumn : BaseColumn { * Creates either a [FrameColumn], [ColumnGroup], or [ValueColumn] by analyzing each value in * [values]. * - * This is safer but less efficient than the other functions. + * This is safer but slower than the other functions. * * Some conversions are done automatically to attempt to unify the values. * @@ -151,7 +151,7 @@ public interface DataColumn : BaseColumn { * See [TypeSuggestion] for more information. * @param nullable optionally you can specify whether [values] contains nulls, if `null` it is inferred. */ - public fun createWithTypeInference( + public fun createByInference( name: String, values: List, suggestedType: TypeSuggestion = TypeSuggestion.Infer, @@ -168,7 +168,7 @@ public interface DataColumn : BaseColumn { * Calls [createColumnGroup], [createFrameColumn], or [createValueColumn] based on * [type]. * - * This may be unsafe but is more efficient than [createWithTypeInference]. + * This may be unsafe but is more efficient than [createByInference]. * * Be careful; Values in [values] are NOT checked to adhere to the given [type], nor * do we check whether there are unexpected nulls among the values. @@ -180,7 +180,7 @@ public interface DataColumn : BaseColumn { * @param type the (unchecked) common type of [values] * @param infer in case a [ValueColumn] is created, this controls how/whether types need to be inferred */ - public fun createUnsafe( + public fun createByType( name: String, values: List, type: KType, @@ -198,7 +198,8 @@ public interface DataColumn : BaseColumn { * Calls [createColumnGroup], [createFrameColumn], or [createValueColumn] based on * type [T]. * - * This is generally safe, as [T] can be inferred, and more efficient than [createWithTypeInference]. + * This is generally safe, as [T] can be inferred by the compiler, + * and more efficient than [createByInference]. * * Be careful when casting occurs; Values in [values] are NOT checked to adhere to the given/inferred type [T], * nor do we check whether there are unexpected nulls among the values. @@ -210,11 +211,11 @@ public interface DataColumn : BaseColumn { * @param values the values to represent each row in the column * @param infer in case a [ValueColumn] is created, this controls how/whether types need to be inferred */ - public inline fun createUnsafe( + public inline fun createByType( name: String, values: List, infer: Infer = Infer.None, - ): DataColumn = createUnsafe(name, values, typeOf(), infer) + ): DataColumn = createByType(name, values, typeOf(), infer) /** Creates an empty [DataColumn] with given [name]. */ public fun empty(name: String = ""): AnyCol = createValueColumn(name, emptyList(), typeOf()) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt index 3824093e93..13c6ade1be 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt @@ -301,7 +301,7 @@ public fun dataFrameOf(header: Iterable, values: Iterable): DataFr public inline fun dataFrameOf(header: Iterable, fill: (T) -> Iterable): DataFrame<*> = header.map { value -> - DataColumn.createWithTypeInference( + DataColumn.createByInference( name = value.toString(), values = fill(value).asList(), suggestedType = TypeSuggestion.InferWithUpperbound(typeOf()), @@ -329,7 +329,7 @@ public class DataFrameBuilder(private val header: List) { @JvmName("invoke1") internal fun withValues(values: Iterable): DataFrame<*> = withValuesImpl(header, values.asList()).map { (name, values) -> - DataColumn.createWithTypeInference(name, values) + DataColumn.createByInference(name, values) }.toDataFrame() public operator fun invoke(args: Sequence): DataFrame<*> = invoke(*args.toList().toTypedArray()) @@ -341,7 +341,7 @@ public class DataFrameBuilder(private val header: List) { public inline operator fun invoke(crossinline valuesBuilder: (String) -> Iterable): DataFrame<*> = withColumns { name -> - DataColumn.createWithTypeInference( + DataColumn.createByInference( name = name, values = valuesBuilder(name).asList(), suggestedType = TypeSuggestion.InferWithUpperbound(typeOf()), @@ -370,7 +370,7 @@ public class DataFrameBuilder(private val header: List) { public inline fun fillIndexed(nrow: Int, crossinline init: (Int, String) -> C): DataFrame<*> = withColumns { name -> - DataColumn.createWithTypeInference( + DataColumn.createByInference( name = name, values = List(nrow) { init(it, name) }, ) @@ -378,7 +378,7 @@ public class DataFrameBuilder(private val header: List) { public inline fun fill(nrow: Int, crossinline init: (Int) -> C): DataFrame<*> = withColumns { name -> - DataColumn.createWithTypeInference( + DataColumn.createByInference( name = name, values = List(nrow, init), ) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/map.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/map.kt index 1f077f950c..18c339f213 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/map.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/map.kt @@ -34,12 +34,12 @@ public inline fun DataColumn.map( crossinline transform: (T) -> R, ): DataColumn { val newValues = Array(size()) { transform(get(it)) }.asList() - return DataColumn.createUnsafe(name(), newValues, typeOf(), infer) + return DataColumn.createByType(name(), newValues, typeOf(), infer) } public fun DataColumn.map(type: KType, infer: Infer = Infer.Nulls, transform: (T) -> R): DataColumn { val values = Array(size()) { transform(get(it)) }.asList() - return DataColumn.createUnsafe(name(), values, type, infer).cast() + return DataColumn.createByType(name(), values, type, infer).cast() } public inline fun DataColumn.mapIndexed( @@ -47,7 +47,7 @@ public inline fun DataColumn.mapIndexed( crossinline transform: (Int, T) -> R, ): DataColumn { val newValues = Array(size()) { transform(it, get(it)) }.asList() - return DataColumn.createUnsafe(name(), newValues, typeOf(), infer) + return DataColumn.createByType(name(), newValues, typeOf(), infer) } public fun DataColumn.mapIndexed( @@ -56,7 +56,7 @@ public fun DataColumn.mapIndexed( transform: (Int, T) -> R, ): DataColumn { val values = Array(size()) { transform(it, get(it)) }.asList() - return DataColumn.createUnsafe(name(), values, type, infer).cast() + return DataColumn.createByType(name(), values, type, infer).cast() } // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/sort.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/sort.kt index 48a1aa5ef3..a64f5f8439 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/sort.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/sort.kt @@ -94,7 +94,7 @@ private interface CommonDataColumnSortWithDocs /** @include [CommonDataColumnSortWithDocs] */ public fun > C.sortWith(comparator: Comparator): C = - DataColumn.createUnsafe(name, values().sortedWith(comparator), type) as C + DataColumn.createByType(name, values().sortedWith(comparator), type) as C /** @include [CommonDataColumnSortWithDocs] */ public fun > C.sortWith(comparator: (T, T) -> Int): C = sortWith(Comparator(comparator)) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt index 56c98ec80d..5809ae434f 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt @@ -190,10 +190,10 @@ public abstract class CreateDataFrameDsl : TraversePropertiesDsl { add(columnName, expression) public inline infix fun String.from(inferType: InferType): Unit = - add(DataColumn.createWithTypeInference(this, source.map { inferType.expression(it) })) + add(DataColumn.createByInference(this, source.map { inferType.expression(it) })) public inline infix fun KProperty.from(inferType: InferType): Unit = - add(DataColumn.createWithTypeInference(columnName, source.map { inferType.expression(it) })) + add(DataColumn.createByInference(columnName, source.map { inferType.expression(it) })) public data class InferType(val expression: (T) -> R) @@ -317,13 +317,13 @@ public interface ValueProperty { public fun Map>.toDataFrame(): AnyFrame = map { - DataColumn.createWithTypeInference(it.key, it.value.asList()) + DataColumn.createByInference(it.key, it.value.asList()) }.toDataFrame() @JvmName("toDataFrameColumnPathAnyNullable") public fun Map>.toDataFrame(): AnyFrame = map { - it.key to DataColumn.createWithTypeInference( + it.key to DataColumn.createByInference( name = it.key.last(), values = it.value.asList(), ) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/transpose.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/transpose.kt index 774bea2f75..f2a94926b2 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/transpose.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/transpose.kt @@ -14,7 +14,7 @@ import kotlin.reflect.typeOf // region DataRow public fun DataRow.transpose(): DataFrame> { - val valueColumn = DataColumn.createWithTypeInference(NameValuePair<*>::value.columnName, values) + val valueColumn = DataColumn.createByInference(NameValuePair<*>::value.columnName, values) val nameColumn = owner.columnNames().toValueColumn(NameValuePair<*>::name) return dataFrameOf(nameColumn, valueColumn).cast() } @@ -24,7 +24,7 @@ public inline fun AnyRow.transposeTo(): DataFrame> @PublishedApi internal fun AnyRow.transposeTo(type: KType): DataFrame> { val convertedValues = values.map { it?.convertTo(type) as T? } - val valueColumn = DataColumn.createWithTypeInference(NameValuePair::value.columnName, convertedValues) + val valueColumn = DataColumn.createByInference(NameValuePair::value.columnName, convertedValues) val nameColumn = owner.columnNames().toValueColumn(NameValuePair::name) return dataFrameOf(nameColumn, valueColumn).cast() } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/typeConversions.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/typeConversions.kt index de78382079..9cd34f7ea9 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/typeConversions.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/typeConversions.kt @@ -314,19 +314,19 @@ public fun NullabilityOptions.applyNullability(data: List, expectedNulls: public inline fun Iterable.toColumn(name: String = "", infer: Infer = Infer.Nulls): DataColumn = if (infer == Infer.Type) { - DataColumn.createWithTypeInference(name, asList()) + DataColumn.createByInference(name, asList()) } else { - DataColumn.createUnsafe(name, asList(), typeOf(), infer) + DataColumn.createByType(name, asList(), typeOf(), infer) }.forceResolve() public inline fun Iterable<*>.toColumnOf(name: String = ""): DataColumn = - DataColumn.createUnsafe(name, asList() as List, typeOf()).forceResolve() + DataColumn.createByType(name, asList() as List, typeOf()).forceResolve() public inline fun Iterable.toColumn(ref: ColumnReference): DataColumn = - DataColumn.createUnsafe(ref.name(), asList()).forceResolve() + DataColumn.createByType(ref.name(), asList()).forceResolve() public inline fun Iterable.toColumn(property: KProperty): DataColumn = - DataColumn.createUnsafe(property.columnName, asList()).forceResolve() + DataColumn.createByType(property.columnName, asList()).forceResolve() public fun Iterable.toPath(): ColumnPath = ColumnPath(asList()) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/valueCounts.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/valueCounts.kt index 5fbd81ef73..9b9a5d4bf8 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/valueCounts.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/valueCounts.kt @@ -40,9 +40,9 @@ public fun DataColumn.valueCounts( } if (dropNA) grouped = grouped.filter { !it.first.isNA } val nulls = if (dropNA) false else hasNulls() - val values = DataColumn.createUnsafe(name(), grouped.map { it.first }, type().withNullability(nulls)) + val values = DataColumn.createByType(name(), grouped.map { it.first }, type().withNullability(nulls)) val countName = if (resultColumn == name()) resultColumn + "1" else resultColumn - val counts = DataColumn.createUnsafe(countName, grouped.map { it.second }, typeOf()) + val counts = DataColumn.createByType(countName, grouped.map { it.second }, typeOf()) return dataFrameOf(values, counts).cast() } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/joinWith.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/joinWith.kt index 182ac6747d..290b8be2e4 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/joinWith.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/joinWith.kt @@ -92,7 +92,7 @@ internal fun DataFrame.joinWithImpl( } val df: DataFrame<*> = outputData.mapIndexed { index, values -> - DataColumn.createWithTypeInference(generator.names[index], values) + DataColumn.createByInference(generator.names[index], values) }.toDataFrame() return df.cast() diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt index 19428518db..be68050557 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt @@ -529,7 +529,7 @@ internal fun DataColumn.tryParseImpl(options: ParserOptions?): DataColu // Create a new column with the parsed values, // createColumnGuessingType is used to handle unifying values if needed - return DataColumn.createWithTypeInference( + return DataColumn.createByInference( name = name(), values = parsedValues, suggestedType = TypeSuggestion.Use(type), @@ -543,7 +543,7 @@ internal fun DataColumn.parse(parser: StringParser, options: Par handler(it.trim()) ?: throw IllegalStateException("Couldn't parse '$it' into type ${parser.type}") } } - return DataColumn.createWithTypeInference( + return DataColumn.createByInference( name = name(), values = parsedValues, suggestedType = TypeSuggestion.Use(parser.type.withNullability(hasNulls)), diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toDataFrame.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toDataFrame.kt index 6219a40ab5..049b525f8e 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toDataFrame.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/toDataFrame.kt @@ -263,7 +263,7 @@ internal fun convertToDataFrame( val shouldCreateColumnGroup = kClass == DataRow::class when { - hasExceptions -> DataColumn.createWithTypeInference(it.columnName, values, nullable = nullable) + hasExceptions -> DataColumn.createByInference(it.columnName, values, nullable = nullable) shouldCreateValueCol -> DataColumn.createValueColumn( @@ -301,7 +301,7 @@ internal fun convertToDataFrame( (it as? Iterable<*>)?.asList() } - DataColumn.createWithTypeInference(it.columnName, listValues) + DataColumn.createByInference(it.columnName, listValues) } elementClass.isValueType -> { diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt index 35ea3ac31c..b7c397320d 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt @@ -61,20 +61,20 @@ internal fun ColumnsContainer.newColumn( val df = this as? DataFrame ?: dataFrameOf(columns()).cast() val (nullable, values) = computeValues(df, expression) return when (infer) { - Infer.Nulls -> DataColumn.createUnsafe( + Infer.Nulls -> DataColumn.createByType( name = name, values = values, type = type.withNullability(nullable).replaceGenericTypeParametersWithUpperbound(), infer = Infer.None, ) - Infer.Type -> DataColumn.createWithTypeInference( + Infer.Type -> DataColumn.createByInference( name = name, values = values, nullable = nullable, ) - Infer.None -> DataColumn.createUnsafe( + Infer.None -> DataColumn.createByType( name = name, values = values, type = type.replaceGenericTypeParametersWithUpperbound(), diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/columns/DataColumns.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/columns/DataColumns.kt index e40bd257d6..25fc0de21c 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/columns/DataColumns.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/columns/DataColumns.kt @@ -3,6 +3,7 @@ package org.jetbrains.kotlinx.dataframe.columns import io.kotest.assertions.throwables.shouldThrow import io.kotest.matchers.shouldBe import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.BuildConfig import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.api.dataFrameOf import org.jetbrains.kotlinx.dataframe.api.toColumn @@ -35,15 +36,16 @@ class DataColumns { @Test fun `allow no nulls in frame columns`() { - // enable kotlin.dataframe.debug=true for this - shouldThrow { - DataColumn.createFrameColumn( - name = "", - groups = listOf(dataFrameOf("a")(1), null) as List, - ) + if (BuildConfig.DEBUG) { + shouldThrow { + DataColumn.createFrameColumn( + name = "", + groups = listOf(dataFrameOf("a")(1), null) as List, + ) + } } - DataColumn.createUnsafe( + DataColumn.createByType( name = "", values = listOf(dataFrameOf("a")(1), null), ).kind() shouldBe ColumnKind.Value diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt index b864ea8e84..fab887befc 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt @@ -221,8 +221,9 @@ class ParserTests { } } + /** Checks fix for [Issue #593](https://github.com/Kotlin/dataframe/issues/593) */ @Test - fun `Issue #593, mixing null and json`() { + fun `Mixing null and json`() { val col by columnOf("[\"str\"]", "[]", "null") val parsed = col.parse() parsed.type() shouldBe typeOf() diff --git a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt index 63f1f862f2..7d88e506c0 100644 --- a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt +++ b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt @@ -292,7 +292,7 @@ public fun DataFrame.Companion.readExcel( val cell: Cell? = row?.getCell(index) getCellValue(cell) } - DataColumn.createWithTypeInference(name, values) + DataColumn.createByInference(name, values) } return dataFrameOf(columns) } diff --git a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/DataFrameAdapter.kt b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/DataFrameAdapter.kt index b0e9c9cf34..3d09d52a2f 100644 --- a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/DataFrameAdapter.kt +++ b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/DataFrameAdapter.kt @@ -31,7 +31,7 @@ private fun List.map(): DataFrame { @Suppress("INVISIBLE_REFERENCE") fun SimpleCol.asDataColumn(): DataColumn<*> { val column = when (this) { - is SimpleDataColumn -> DataColumn.createUnsafe(this.name, listOf(this.type)) + is SimpleDataColumn -> DataColumn.createByType(this.name, listOf(this.type)) is SimpleColumnGroup -> DataColumn.createColumnGroup(this.name, this.columns().map()) as ColumnGroupImpl<*> is SimpleFrameColumn -> DataColumn.createFrameColumn(this.name, listOf(this.columns().map())) } From 8a8650ae197bb93deb0a1fe82bf36ccd6a85a501 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Wed, 30 Oct 2024 16:38:00 +0100 Subject: [PATCH 14/14] solved binary compatibility issues --- core/api/core.api | 54 ++++++++++++- .../jetbrains/kotlinx/dataframe/DataColumn.kt | 80 ++++++++++++++++--- .../kotlinx/dataframe/impl/TypeUtils.kt | 7 ++ .../dataframe/impl/columns/constructors.kt | 38 +++++++++ .../dataframe/util/deprecationMessages.kt | 25 +++++- 5 files changed, 187 insertions(+), 17 deletions(-) diff --git a/core/api/core.api b/core/api/core.api index bd92164076..19da77cd6d 100644 --- a/core/api/core.api +++ b/core/api/core.api @@ -249,6 +249,10 @@ public abstract interface class org/jetbrains/kotlinx/dataframe/DataColumn : org public final class org/jetbrains/kotlinx/dataframe/DataColumn$Companion { public final fun create (Ljava/lang/String;Ljava/util/List;Lkotlin/reflect/KType;Lorg/jetbrains/kotlinx/dataframe/api/Infer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static synthetic fun create$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn$Companion;Ljava/lang/String;Ljava/util/List;Lkotlin/reflect/KType;Lorg/jetbrains/kotlinx/dataframe/api/Infer;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public final fun createByInference (Ljava/lang/String;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/columns/TypeSuggestion;Ljava/lang/Boolean;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static synthetic fun createByInference$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn$Companion;Ljava/lang/String;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/columns/TypeSuggestion;Ljava/lang/Boolean;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public final fun createByType (Ljava/lang/String;Ljava/util/List;Lkotlin/reflect/KType;Lorg/jetbrains/kotlinx/dataframe/api/Infer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static synthetic fun createByType$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn$Companion;Ljava/lang/String;Ljava/util/List;Lkotlin/reflect/KType;Lorg/jetbrains/kotlinx/dataframe/api/Infer;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public final fun createColumnGroup (Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/DataFrame;)Lorg/jetbrains/kotlinx/dataframe/columns/ColumnGroup; public final fun createFrameColumn (Ljava/lang/String;Ljava/util/List;Lkotlin/Lazy;)Lorg/jetbrains/kotlinx/dataframe/columns/FrameColumn; public final fun createFrameColumn (Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/Iterable;)Lorg/jetbrains/kotlinx/dataframe/columns/FrameColumn; @@ -4150,6 +4154,7 @@ public final class org/jetbrains/kotlinx/dataframe/api/DataColumnTypeKt { public final class org/jetbrains/kotlinx/dataframe/api/DataFrameBuilder { public fun (Ljava/util/List;)V + public final fun fill (ILorg/jetbrains/kotlinx/dataframe/DataFrame;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public final fun invoke (Ljava/lang/Iterable;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public final fun invoke (Lkotlin/sequences/Sequence;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public final fun invoke ([Ljava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; @@ -9590,6 +9595,43 @@ public final class org/jetbrains/kotlinx/dataframe/columns/SingleColumnKt { public static final fun isSingleColumnWithGroup (Lorg/jetbrains/kotlinx/dataframe/columns/ColumnsResolver;Ljava/util/List;)Z } +public abstract interface class org/jetbrains/kotlinx/dataframe/columns/TypeSuggestion { + public static final field Companion Lorg/jetbrains/kotlinx/dataframe/columns/TypeSuggestion$Companion; +} + +public final class org/jetbrains/kotlinx/dataframe/columns/TypeSuggestion$Companion { + public final fun create (Lkotlin/reflect/KType;Z)Lorg/jetbrains/kotlinx/dataframe/columns/TypeSuggestion; +} + +public final class org/jetbrains/kotlinx/dataframe/columns/TypeSuggestion$Infer : org/jetbrains/kotlinx/dataframe/columns/TypeSuggestion { + public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/columns/TypeSuggestion$Infer; + public fun equals (Ljava/lang/Object;)Z + public fun hashCode ()I + public fun toString ()Ljava/lang/String; +} + +public final class org/jetbrains/kotlinx/dataframe/columns/TypeSuggestion$InferWithUpperbound : org/jetbrains/kotlinx/dataframe/columns/TypeSuggestion { + public fun (Lkotlin/reflect/KType;)V + public final fun component1 ()Lkotlin/reflect/KType; + public final fun copy (Lkotlin/reflect/KType;)Lorg/jetbrains/kotlinx/dataframe/columns/TypeSuggestion$InferWithUpperbound; + public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/columns/TypeSuggestion$InferWithUpperbound;Lkotlin/reflect/KType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/columns/TypeSuggestion$InferWithUpperbound; + public fun equals (Ljava/lang/Object;)Z + public final fun getUpperbound ()Lkotlin/reflect/KType; + public fun hashCode ()I + public fun toString ()Ljava/lang/String; +} + +public final class org/jetbrains/kotlinx/dataframe/columns/TypeSuggestion$Use : org/jetbrains/kotlinx/dataframe/columns/TypeSuggestion { + public fun (Lkotlin/reflect/KType;)V + public final fun component1 ()Lkotlin/reflect/KType; + public final fun copy (Lkotlin/reflect/KType;)Lorg/jetbrains/kotlinx/dataframe/columns/TypeSuggestion$Use; + public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/columns/TypeSuggestion$Use;Lkotlin/reflect/KType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/columns/TypeSuggestion$Use; + public fun equals (Ljava/lang/Object;)Z + public final fun getType ()Lkotlin/reflect/KType; + public fun hashCode ()I + public fun toString ()Ljava/lang/String; +} + public abstract interface class org/jetbrains/kotlinx/dataframe/columns/ValueColumn : org/jetbrains/kotlinx/dataframe/DataColumn { public abstract fun distinct ()Lorg/jetbrains/kotlinx/dataframe/columns/ValueColumn; public abstract fun get (Ljava/lang/Iterable;)Lorg/jetbrains/kotlinx/dataframe/columns/ValueColumn; @@ -9913,8 +9955,10 @@ public final class org/jetbrains/kotlinx/dataframe/impl/ExceptionUtilsKt { public final class org/jetbrains/kotlinx/dataframe/impl/TypeUtilsKt { public static final fun getValuesType (Ljava/util/List;Lkotlin/reflect/KType;Lorg/jetbrains/kotlinx/dataframe/api/Infer;)Lkotlin/reflect/KType; - public static final fun guessValueType (Lkotlin/sequences/Sequence;Lkotlin/reflect/KType;Z)Lkotlin/reflect/KType; + public static final synthetic fun guessValueType (Lkotlin/sequences/Sequence;Lkotlin/reflect/KType;Z)Lkotlin/reflect/KType; + public static final fun guessValueType (Lkotlin/sequences/Sequence;Lkotlin/reflect/KType;ZZ)Lkotlin/reflect/KType; public static synthetic fun guessValueType$default (Lkotlin/sequences/Sequence;Lkotlin/reflect/KType;ZILjava/lang/Object;)Lkotlin/reflect/KType; + public static synthetic fun guessValueType$default (Lkotlin/sequences/Sequence;Lkotlin/reflect/KType;ZZILjava/lang/Object;)Lkotlin/reflect/KType; public static final fun replaceGenericTypeParametersWithUpperbound (Lkotlin/reflect/KType;)Lkotlin/reflect/KType; } @@ -10121,9 +10165,13 @@ public final class org/jetbrains/kotlinx/dataframe/impl/columns/ComputedColumnRe } public final class org/jetbrains/kotlinx/dataframe/impl/columns/ConstructorsKt { - public static final fun createColumn (Ljava/lang/Iterable;Lkotlin/reflect/KType;Z)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final synthetic fun createColumn (Ljava/lang/Iterable;Lkotlin/reflect/KType;Z)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static synthetic fun createColumn$default (Ljava/lang/Iterable;Lkotlin/reflect/KType;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; - public static final fun guessColumnType (Ljava/lang/String;Ljava/util/List;Lkotlin/reflect/KType;ZLjava/lang/Object;Ljava/lang/Boolean;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun createColumnGuessingType (Ljava/lang/Iterable;Lorg/jetbrains/kotlinx/dataframe/columns/TypeSuggestion;Ljava/lang/Object;Ljava/lang/Boolean;ZZ)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun createColumnGuessingType (Ljava/lang/String;Ljava/lang/Iterable;Lorg/jetbrains/kotlinx/dataframe/columns/TypeSuggestion;Ljava/lang/Object;Ljava/lang/Boolean;ZZ)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static synthetic fun createColumnGuessingType$default (Ljava/lang/Iterable;Lorg/jetbrains/kotlinx/dataframe/columns/TypeSuggestion;Ljava/lang/Object;Ljava/lang/Boolean;ZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static synthetic fun createColumnGuessingType$default (Ljava/lang/String;Ljava/lang/Iterable;Lorg/jetbrains/kotlinx/dataframe/columns/TypeSuggestion;Ljava/lang/Object;Ljava/lang/Boolean;ZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final synthetic fun guessColumnType (Ljava/lang/String;Ljava/util/List;Lkotlin/reflect/KType;ZLjava/lang/Object;Ljava/lang/Boolean;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static synthetic fun guessColumnType$default (Ljava/lang/String;Ljava/util/List;Lkotlin/reflect/KType;ZLjava/lang/Object;Ljava/lang/Boolean;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun newColumn (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;Lkotlin/reflect/KType;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/api/Infer;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static synthetic fun newColumn$default (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;Lkotlin/reflect/KType;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/api/Infer;Lkotlin/jvm/functions/Function2;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt index 64d7d9c1b9..97d1c1c7b7 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt @@ -6,7 +6,6 @@ import org.jetbrains.kotlinx.dataframe.api.cast import org.jetbrains.kotlinx.dataframe.api.concat import org.jetbrains.kotlinx.dataframe.api.filter import org.jetbrains.kotlinx.dataframe.api.map -import org.jetbrains.kotlinx.dataframe.api.schema import org.jetbrains.kotlinx.dataframe.api.take import org.jetbrains.kotlinx.dataframe.columns.BaseColumn import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup @@ -17,6 +16,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath import org.jetbrains.kotlinx.dataframe.columns.FrameColumn import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion import org.jetbrains.kotlinx.dataframe.columns.ValueColumn +import org.jetbrains.kotlinx.dataframe.impl.api.chunkedImpl import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnGroupImpl import org.jetbrains.kotlinx.dataframe.impl.columns.FrameColumnImpl import org.jetbrains.kotlinx.dataframe.impl.columns.ValueColumnImpl @@ -24,11 +24,18 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.addPath import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnKind import org.jetbrains.kotlinx.dataframe.impl.getValuesType -import org.jetbrains.kotlinx.dataframe.impl.splitByIndices import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema +import org.jetbrains.kotlinx.dataframe.util.CHUNKED_IMPL_IMPORT +import org.jetbrains.kotlinx.dataframe.util.CREATE +import org.jetbrains.kotlinx.dataframe.util.CREATE_BY_INFERENCE_IMPORT +import org.jetbrains.kotlinx.dataframe.util.CREATE_BY_TYPE_IMPORT import org.jetbrains.kotlinx.dataframe.util.CREATE_FRAME_COLUMN -import org.jetbrains.kotlinx.dataframe.util.CREATE_FRAME_COLUMN_IMPORT import org.jetbrains.kotlinx.dataframe.util.CREATE_FRAME_COLUMN_REPLACE +import org.jetbrains.kotlinx.dataframe.util.CREATE_INLINE_REPLACE +import org.jetbrains.kotlinx.dataframe.util.CREATE_REPLACE +import org.jetbrains.kotlinx.dataframe.util.CREATE_WITH_TYPE_INFERENCE +import org.jetbrains.kotlinx.dataframe.util.CREATE_WITH_TYPE_INFERENCE_REPLACE +import org.jetbrains.kotlinx.dataframe.util.TYPE_SUGGESTION_IMPORT import kotlin.reflect.KClass import kotlin.reflect.KProperty import kotlin.reflect.KType @@ -103,14 +110,6 @@ public interface DataColumn : BaseColumn { */ public fun createColumnGroup(name: String, df: DataFrame): ColumnGroup = ColumnGroupImpl(name, df) - @Deprecated( - message = CREATE_FRAME_COLUMN, - replaceWith = ReplaceWith(CREATE_FRAME_COLUMN_REPLACE, CREATE_FRAME_COLUMN_IMPORT), - level = DeprecationLevel.WARNING, - ) - public fun createFrameColumn(name: String, df: DataFrame, startIndices: Iterable): FrameColumn = - FrameColumnImpl(name, df.splitByIndices(startIndices.asSequence()).toList(), lazy { df.schema() }) - /** * Creates [FrameColumn] using the given [name] and list of dataframes [groups]. * @@ -219,6 +218,65 @@ public interface DataColumn : BaseColumn { /** Creates an empty [DataColumn] with given [name]. */ public fun empty(name: String = ""): AnyCol = createValueColumn(name, emptyList(), typeOf()) + + // region deprecated + + @Deprecated( + message = CREATE_FRAME_COLUMN, + replaceWith = ReplaceWith(CREATE_FRAME_COLUMN_REPLACE, CHUNKED_IMPL_IMPORT), + level = DeprecationLevel.WARNING, + ) + public fun createFrameColumn(name: String, df: DataFrame, startIndices: Iterable): FrameColumn = + df.chunkedImpl(startIndices = startIndices, name = name) + + @Deprecated( + message = CREATE_WITH_TYPE_INFERENCE, + replaceWith = ReplaceWith( + CREATE_WITH_TYPE_INFERENCE_REPLACE, + CREATE_BY_INFERENCE_IMPORT, + TYPE_SUGGESTION_IMPORT, + ), + level = DeprecationLevel.WARNING, + ) + public fun createWithTypeInference( + name: String, + values: List, + nullable: Boolean? = null, + ): DataColumn = + createByInference( + name = name, + values = values, + suggestedType = TypeSuggestion.Infer, + nullable = nullable, + ) + + @Deprecated( + message = CREATE, + replaceWith = ReplaceWith(CREATE_REPLACE, CREATE_BY_TYPE_IMPORT), + level = DeprecationLevel.WARNING, + ) + public fun create( + name: String, + values: List, + type: KType, + infer: Infer = Infer.None, + ): DataColumn = + createByType( + name = name, + values = values, + type = type, + infer = infer, + ) + + @Deprecated( + message = CREATE, + replaceWith = ReplaceWith(CREATE_INLINE_REPLACE, CREATE_BY_TYPE_IMPORT), + level = DeprecationLevel.WARNING, + ) + public inline fun create(name: String, values: List, infer: Infer = Infer.None): DataColumn = + createByType(name = name, values = values, type = typeOf(), infer = infer) + + // endregion } public fun hasNulls(): Boolean = type().isMarkedNullable diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt index 48437b7f72..199568630a 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt @@ -10,6 +10,7 @@ import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.api.Infer import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType +import org.jetbrains.kotlinx.dataframe.util.GUESS_VALUE_TYPE import kotlin.reflect.KClass import kotlin.reflect.KType import kotlin.reflect.KTypeParameter @@ -385,6 +386,12 @@ internal fun getValuesType(values: List, type: KType, infer: Infer): KTyp Infer.None -> type } +/** Just for binary compatibility, as it's @PublishedApi. */ +@Deprecated(GUESS_VALUE_TYPE, level = DeprecationLevel.HIDDEN) +@PublishedApi +internal fun guessValueType(values: Sequence, upperBound: KType? = null, listifyValues: Boolean = false): KType = + guessValueType(values = values, upperBound = upperBound, listifyValues = listifyValues, allColsMakesRow = false) + /** * Returns the guessed value type of the given [values] sequence. * diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt index b7c397320d..638a8a6475 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt @@ -38,6 +38,8 @@ import org.jetbrains.kotlinx.dataframe.impl.guessValueType import org.jetbrains.kotlinx.dataframe.impl.replaceGenericTypeParametersWithUpperbound import org.jetbrains.kotlinx.dataframe.index import org.jetbrains.kotlinx.dataframe.nrow +import org.jetbrains.kotlinx.dataframe.util.CREATE_COLUMN +import org.jetbrains.kotlinx.dataframe.util.GUESS_COLUMN_TYPE import kotlin.reflect.KClass import kotlin.reflect.KType import kotlin.reflect.full.withNullability @@ -318,3 +320,39 @@ internal fun createColumnGuessingType( ) } } + +// region deprecated + +/** Just for binary compatibility, since it's @PublishedApi. */ +@Deprecated(CREATE_COLUMN, level = DeprecationLevel.HIDDEN) +@Suppress("UNCHECKED_CAST") +@PublishedApi +internal fun createColumn(values: Iterable, suggestedType: KType, guessType: Boolean = false): DataColumn = + createColumnGuessingType( + values = values, + suggestedType = TypeSuggestion.create(suggestedType, guessType), + allColsMakesColGroup = true, + ) + +/** Just for binary compatibility, since it's @PublishedApi. */ +@Deprecated(GUESS_COLUMN_TYPE, level = DeprecationLevel.HIDDEN) +@PublishedApi +internal fun guessColumnType( + name: String, + values: List, + suggestedType: KType? = null, + suggestedTypeIsUpperBound: Boolean = false, + defaultValue: T? = null, + nullable: Boolean? = null, +): DataColumn = + createColumnGuessingType( + name = name, + values = values, + suggestedType = TypeSuggestion.create(suggestedType, suggestedTypeIsUpperBound), + defaultValue = defaultValue, + nullable = nullable, + listifyValues = false, + allColsMakesColGroup = false, + ) + +// endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt index baa4b8dcdf..98c54f0134 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt @@ -15,9 +15,28 @@ internal const val DF_READ_NO_CSV = "This function is deprecated and should be r internal const val DF_READ_NO_CSV_REPLACE = "this.readCSV(fileOrUrl, delimiter, header, colTypes, skipLines, readLines, duplicate, charset)" -internal const val CREATE_FRAME_COLUMN = "Replaced by df.chunked(). $MESSAGE_0_16" -internal const val CREATE_FRAME_COLUMN_REPLACE = "df.chunked(startIndices, name)" -internal const val CREATE_FRAME_COLUMN_IMPORT = "org.jetbrains.kotlinx.dataframe.api.chunked" +internal const val CREATE_FRAME_COLUMN = + "Removed from public API as this can likely better be solved by `DataFrame.chunked()`. Replaced by internal df.chunkedImpl(). $MESSAGE_0_16" +internal const val CREATE_FRAME_COLUMN_REPLACE = "df.chunkedImpl(startIndices, name)" +internal const val CHUNKED_IMPL_IMPORT = "org.jetbrains.kotlinx.dataframe.impl.api.chunkedImpl" + +internal const val CREATE_WITH_TYPE_INFERENCE = + "This function is deprecated and should be replaced by `createByInference()`. $MESSAGE_0_16" +internal const val CREATE_WITH_TYPE_INFERENCE_REPLACE = + "createByInference(name, values, TypeSuggestion.Infer, nullable)" +internal const val CREATE_BY_INFERENCE_IMPORT = "org.jetbrains.kotlinx.dataframe.DataColumn.Companion.createByInference" +internal const val TYPE_SUGGESTION_IMPORT = "org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion" + +internal const val CREATE = "This function is deprecated and should be replaced by `createByType()`. $MESSAGE_0_16" +internal const val CREATE_REPLACE = "createByType(name, values, type, infer)" +internal const val CREATE_INLINE_REPLACE = "createByType(name, values, infer)" +internal const val CREATE_BY_TYPE_IMPORT = "org.jetbrains.kotlinx.dataframe.DataColumn.Companion.createByType" + +internal const val GUESS_VALUE_TYPE = "This function is just here for binary compatibility. $MESSAGE_0_16" + +internal const val CREATE_COLUMN = "This function is just here for binary compatibility. $MESSAGE_0_16" + +internal const val GUESS_COLUMN_TYPE = "This function is just here for binary compatibility. $MESSAGE_0_16" // endregion