Skip to content

JSON reading: unified numbers #1073

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Feb 27, 2025
59 changes: 42 additions & 17 deletions core/api/core.api

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import org.jetbrains.kotlinx.dataframe.api.JsonPath
import org.jetbrains.kotlinx.dataframe.api.KeyValueProperty
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
import org.jetbrains.kotlinx.dataframe.documentation.UnifyingNumbers
import org.jetbrains.kotlinx.dataframe.io.JSON

/**
Expand Down Expand Up @@ -80,4 +81,6 @@ public annotation class JsonOptions(
* `["""\$["store"]["book"][*]["author"]"""]`
*/
public val keyValuePaths: Array<String> = [],
/** Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. */
public val unifyNumbers: Boolean = true,
)
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,30 @@ internal fun <T> getValuesType(values: List<T>, type: KType, infer: Infer): KTyp
@Deprecated(GUESS_VALUE_TYPE, level = DeprecationLevel.HIDDEN)
@PublishedApi
internal fun guessValueType(values: Sequence<Any?>, upperBound: KType? = null, listifyValues: Boolean = false): KType =
guessValueType(values = values, upperBound = upperBound, listifyValues = listifyValues, allColsMakesRow = false)
guessValueType(
values = values,
upperBound = upperBound,
listifyValues = listifyValues,
allColsMakesRow = false,
unifyNumbers = false,
)

/** Just for binary compatibility, as it's @PublishedApi. */
@Deprecated(GUESS_VALUE_TYPE, level = DeprecationLevel.HIDDEN)
@PublishedApi
internal fun guessValueType(
values: Sequence<Any?>,
upperBound: KType? = null,
listifyValues: Boolean = false,
allColsMakesRow: Boolean = false,
): KType =
guessValueType(
values = values,
upperBound = upperBound,
listifyValues = listifyValues,
allColsMakesRow = allColsMakesRow,
unifyNumbers = false,
)

/**
* Returns the guessed value type of the given [values] sequence.
Expand All @@ -381,13 +404,18 @@ internal fun guessValueType(values: Sequence<Any?>, upperBound: KType? = null, l
* @param allColsMakesRow if true, then, if all values are non-null columns, we assume
* that a column group should be created instead of a [DataColumn][DataColumn]`<`[AnyCol][AnyCol]`>`,
* so the function will return [DataRow].
* @param unifyNumbers if true, then all number types encountered will be unified to the smallest possible
* number-type that can hold all number values lossless in [values]. See [commonNumberClass].
* Unsigned numbers are not supported.
* If false, the result of encountering multiple number types would be [Number].
*/
@PublishedApi
internal fun guessValueType(
values: Sequence<Any?>,
upperBound: KType? = null,
listifyValues: Boolean = false,
allColsMakesRow: Boolean = false,
unifyNumbers: Boolean = false,
): KType {
val classes = mutableSetOf<KClass<*>>()
val collectionClasses = mutableSetOf<KClass<out Collection<*>>>()
Expand Down Expand Up @@ -443,6 +471,18 @@ internal fun guessValueType(
classesInCollection.all { it.isSubclassOf(DataRow::class) } &&
!nullsInCollection

if (unifyNumbers) {
val nothingClass = Nothing::class
val usedNumberClasses = classes.filter {
it.isSubclassOf(Number::class) && it != nothingClass
}
if (usedNumberClasses.isNotEmpty()) {
val unifiedNumberClass = usedNumberClasses.unifiedNumberClass() as KClass<Number>
classes -= usedNumberClasses
classes += unifiedNumberClass
}
}

return when {
classes.isNotEmpty() -> {
if (hasRows) classes.add(DataRow::class)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,23 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnsResolver
import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion
import org.jetbrains.kotlinx.dataframe.columns.ValueColumn
import org.jetbrains.kotlinx.dataframe.columns.toColumnsSetOf
import org.jetbrains.kotlinx.dataframe.documentation.UnifyingNumbers
import org.jetbrains.kotlinx.dataframe.impl.DataFrameReceiver
import org.jetbrains.kotlinx.dataframe.impl.DataRowImpl
import org.jetbrains.kotlinx.dataframe.impl.api.createConverter
import org.jetbrains.kotlinx.dataframe.impl.asList
import org.jetbrains.kotlinx.dataframe.impl.guessValueType
import org.jetbrains.kotlinx.dataframe.impl.isNothing
import org.jetbrains.kotlinx.dataframe.impl.replaceGenericTypeParametersWithUpperbound
import org.jetbrains.kotlinx.dataframe.index
import org.jetbrains.kotlinx.dataframe.nrow
import org.jetbrains.kotlinx.dataframe.util.CREATE_COLUMN
import org.jetbrains.kotlinx.dataframe.util.GUESS_COLUMN_TYPE
import kotlin.reflect.KClass
import kotlin.reflect.KType
import kotlin.reflect.full.isSubtypeOf
import kotlin.reflect.full.withNullability
import kotlin.reflect.typeOf

// region create DataColumn

Expand Down Expand Up @@ -193,6 +198,9 @@ internal fun Array<out String>.toNumberColumns() = toColumnsSetOf<Number>()
* Note: this parameter is ignored if another [Collection] is present in the values.
* @param allColsMakesColGroup if `true`, then, if all values are non-null same-sized columns,
* a column group will be created instead of a [DataColumn][DataColumn]`<`[AnyCol][AnyCol]`>`.
* @param unifyNumbers if `true`, then all numbers encountered in [values] will be converted to the smallest possible
* number-type that can hold all the values lossless. Unsigned numbers are not supported. See [UnifyingNumbers].
* For example, if the values are `[1, 2f, 3.0]`, then all values will be converted to [Double].
*/
@PublishedApi
internal fun <T> createColumnGuessingType(
Expand All @@ -202,6 +210,7 @@ internal fun <T> createColumnGuessingType(
nullable: Boolean? = null,
listifyValues: Boolean = false,
allColsMakesColGroup: Boolean = false,
unifyNumbers: Boolean = false,
): DataColumn<T> =
createColumnGuessingType(
name = "",
Expand All @@ -211,6 +220,7 @@ internal fun <T> createColumnGuessingType(
nullable = nullable,
listifyValues = listifyValues,
allColsMakesColGroup = allColsMakesColGroup,
unifyNumbers = unifyNumbers,
)

/**
Expand All @@ -226,6 +236,7 @@ internal fun <T> createColumnGuessingType(
nullable: Boolean? = null,
listifyValues: Boolean = false,
allColsMakesColGroup: Boolean = false,
unifyNumbers: Boolean = false,
): DataColumn<T> {
val type = when (suggestedType) {
is TypeSuggestion.Infer, is TypeSuggestion.InferWithUpperbound ->
Expand All @@ -234,11 +245,23 @@ internal fun <T> createColumnGuessingType(
upperBound = (suggestedType as? TypeSuggestion.InferWithUpperbound)?.upperbound,
listifyValues = listifyValues,
allColsMakesRow = allColsMakesColGroup,
unifyNumbers = unifyNumbers,
)

is TypeSuggestion.Use -> suggestedType.type
}

// only needs to be used when unifyNumbers == true
@Suppress("UNCHECKED_CAST")
fun getSafeNumberConverter(targetType: KType): (Any?) -> Any? {
val converter = createConverter(
from = typeOf<Number>(),
to = targetType,
) as (Number) -> Number?

return { value -> if (value != null && value is Number) converter(value) else value }
}

return when (type.classifier!! as KClass<*>) {
// guessValueType can only return DataRow if all values are `AnyRow?`
// or allColsMakesColGroup == true, and all values are `AnyCol`
Expand Down Expand Up @@ -269,18 +292,29 @@ internal fun <T> createColumnGuessingType(
List::class -> {
val nullable = type.isMarkedNullable
var isListOfRows: Boolean? = null
val lists = values.map {
when (it) {
val subType = type.arguments.first().type!! // List<T> -> T

val needsNumberConversion = unifyNumbers &&
subType.isSubtypeOf(typeOf<Number?>()) &&
!subType.isNothing
val numberConverter: (Any?) -> Any? by lazy { getSafeNumberConverter(subType) }

val lists = values.map { value ->
when (value) {
null -> if (nullable) null else emptyList()

is List<*> -> {
if (isListOfRows != false && it.isNotEmpty()) isListOfRows = it.all { it is AnyRow }
it
if (isListOfRows != false && value.isNotEmpty()) isListOfRows = value.all { it is AnyRow }

if (needsNumberConversion) value.map(numberConverter) else value
}

else -> { // if !detectType and suggestedType is a list, we wrap the values in lists
if (isListOfRows != false) isListOfRows = it is AnyRow
listOf(it)
if (isListOfRows != false) isListOfRows = value is AnyRow

listOf(
if (needsNumberConversion) numberConverter(value) else value,
)
}
}
}
Expand All @@ -303,10 +337,15 @@ internal fun <T> createColumnGuessingType(
}
}

else ->
else -> {
val needsNumberConversion = unifyNumbers &&
type.isSubtypeOf(typeOf<Number?>()) &&
!type.isNothing
val numberConverter by lazy { getSafeNumberConverter(type) }

DataColumn.createValueColumn(
name = name,
values = values.asList(),
values = if (needsNumberConversion) values.map(numberConverter) as List<T> else values.asList(),
type = if (nullable != null) type.withNullability(nullable) else type,
infer = when {
// even though an exact type is suggested,
Expand All @@ -318,6 +357,7 @@ internal fun <T> createColumnGuessingType(
},
defaultValue = defaultValue,
)
}
}
}

Expand All @@ -332,6 +372,7 @@ internal fun <T> createColumn(values: Iterable<T>, suggestedType: KType, guessTy
values = values,
suggestedType = TypeSuggestion.create(suggestedType, guessType),
allColsMakesColGroup = true,
unifyNumbers = false,
)

/** Just for binary compatibility, since it's @PublishedApi. */
Expand All @@ -355,4 +396,48 @@ internal fun <T> guessColumnType(
allColsMakesColGroup = false,
)

/** Just for binary compatibility, since it's @PublishedApi. */
@Deprecated(GUESS_COLUMN_TYPE, level = DeprecationLevel.HIDDEN)
@PublishedApi
internal fun <T> createColumnGuessingType(
values: Iterable<T>,
suggestedType: TypeSuggestion = TypeSuggestion.Infer,
defaultValue: T? = null,
nullable: Boolean? = null,
listifyValues: Boolean = false,
allColsMakesColGroup: Boolean = false,
): DataColumn<T> =
createColumnGuessingType(
values = values,
suggestedType = suggestedType,
defaultValue = defaultValue,
nullable = nullable,
listifyValues = listifyValues,
allColsMakesColGroup = allColsMakesColGroup,
unifyNumbers = false,
)

/** Just for binary compatibility, since it's @PublishedApi. */
@Deprecated(GUESS_COLUMN_TYPE, level = DeprecationLevel.HIDDEN)
@PublishedApi
internal fun <T> createColumnGuessingType(
name: String,
values: Iterable<T>,
suggestedType: TypeSuggestion = TypeSuggestion.Infer,
defaultValue: T? = null,
nullable: Boolean? = null,
listifyValues: Boolean = false,
allColsMakesColGroup: Boolean = false,
): DataColumn<T> =
createColumnGuessingType(
name = name,
values = values,
suggestedType = suggestedType,
defaultValue = defaultValue,
nullable = nullable,
listifyValues = listifyValues,
allColsMakesColGroup = allColsMakesColGroup,
unifyNumbers = false,
)

// endregion
Loading