diff --git a/core/api/core.api b/core/api/core.api index 21ecce65f6..c82a7fc7f8 100644 --- a/core/api/core.api +++ b/core/api/core.api @@ -1300,6 +1300,7 @@ public abstract interface annotation class org/jetbrains/kotlinx/dataframe/annot public abstract interface annotation class org/jetbrains/kotlinx/dataframe/annotations/JsonOptions : java/lang/annotation/Annotation { public abstract fun keyValuePaths ()[Ljava/lang/String; public abstract fun typeClashTactic ()Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic; + public abstract fun unifyNumbers ()Z } public abstract interface annotation class org/jetbrains/kotlinx/dataframe/annotations/Order : java/lang/annotation/Annotation { @@ -9930,9 +9931,11 @@ public final class org/jetbrains/kotlinx/dataframe/impl/ExceptionUtilsKt { public final class org/jetbrains/kotlinx/dataframe/impl/TypeUtilsKt { public static final fun getValuesType (Ljava/util/List;Lkotlin/reflect/KType;Lorg/jetbrains/kotlinx/dataframe/api/Infer;)Lkotlin/reflect/KType; public static final synthetic fun guessValueType (Lkotlin/sequences/Sequence;Lkotlin/reflect/KType;Z)Lkotlin/reflect/KType; - public static final fun guessValueType (Lkotlin/sequences/Sequence;Lkotlin/reflect/KType;ZZ)Lkotlin/reflect/KType; + public static final synthetic fun guessValueType (Lkotlin/sequences/Sequence;Lkotlin/reflect/KType;ZZ)Lkotlin/reflect/KType; + public static final fun guessValueType (Lkotlin/sequences/Sequence;Lkotlin/reflect/KType;ZZZ)Lkotlin/reflect/KType; public static synthetic fun guessValueType$default (Lkotlin/sequences/Sequence;Lkotlin/reflect/KType;ZILjava/lang/Object;)Lkotlin/reflect/KType; public static synthetic fun guessValueType$default (Lkotlin/sequences/Sequence;Lkotlin/reflect/KType;ZZILjava/lang/Object;)Lkotlin/reflect/KType; + public static synthetic fun guessValueType$default (Lkotlin/sequences/Sequence;Lkotlin/reflect/KType;ZZZILjava/lang/Object;)Lkotlin/reflect/KType; public static final fun replaceGenericTypeParametersWithUpperbound (Lkotlin/reflect/KType;)Lkotlin/reflect/KType; } @@ -10077,7 +10080,7 @@ public final class org/jetbrains/kotlinx/dataframe/impl/api/ToDataFrameKt { } public final class org/jetbrains/kotlinx/dataframe/impl/api/ToSequenceKt { - public static final fun toSequenceImpl (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/reflect/KType;)Ljava/lang/Iterable; + public static final fun toSequenceImpl (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/reflect/KType;)Lkotlin/sequences/Sequence; } public final class org/jetbrains/kotlinx/dataframe/impl/api/UpdateKt { @@ -10145,10 +10148,14 @@ public final class org/jetbrains/kotlinx/dataframe/impl/columns/ComputedColumnRe public final class org/jetbrains/kotlinx/dataframe/impl/columns/ConstructorsKt { public static final synthetic fun createColumn (Ljava/lang/Iterable;Lkotlin/reflect/KType;Z)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static synthetic fun createColumn$default (Ljava/lang/Iterable;Lkotlin/reflect/KType;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; - public static final fun createColumnGuessingType (Ljava/lang/Iterable;Lorg/jetbrains/kotlinx/dataframe/columns/TypeSuggestion;Ljava/lang/Object;Ljava/lang/Boolean;ZZ)Lorg/jetbrains/kotlinx/dataframe/DataColumn; - public static final fun createColumnGuessingType (Ljava/lang/String;Ljava/lang/Iterable;Lorg/jetbrains/kotlinx/dataframe/columns/TypeSuggestion;Ljava/lang/Object;Ljava/lang/Boolean;ZZ)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final synthetic fun createColumnGuessingType (Ljava/lang/Iterable;Lorg/jetbrains/kotlinx/dataframe/columns/TypeSuggestion;Ljava/lang/Object;Ljava/lang/Boolean;ZZ)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun createColumnGuessingType (Ljava/lang/Iterable;Lorg/jetbrains/kotlinx/dataframe/columns/TypeSuggestion;Ljava/lang/Object;Ljava/lang/Boolean;ZZZ)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final synthetic fun createColumnGuessingType (Ljava/lang/String;Ljava/lang/Iterable;Lorg/jetbrains/kotlinx/dataframe/columns/TypeSuggestion;Ljava/lang/Object;Ljava/lang/Boolean;ZZ)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static final fun createColumnGuessingType (Ljava/lang/String;Ljava/lang/Iterable;Lorg/jetbrains/kotlinx/dataframe/columns/TypeSuggestion;Ljava/lang/Object;Ljava/lang/Boolean;ZZZ)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static synthetic fun createColumnGuessingType$default (Ljava/lang/Iterable;Lorg/jetbrains/kotlinx/dataframe/columns/TypeSuggestion;Ljava/lang/Object;Ljava/lang/Boolean;ZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static synthetic fun createColumnGuessingType$default (Ljava/lang/Iterable;Lorg/jetbrains/kotlinx/dataframe/columns/TypeSuggestion;Ljava/lang/Object;Ljava/lang/Boolean;ZZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static synthetic fun createColumnGuessingType$default (Ljava/lang/String;Ljava/lang/Iterable;Lorg/jetbrains/kotlinx/dataframe/columns/TypeSuggestion;Ljava/lang/Object;Ljava/lang/Boolean;ZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; + public static synthetic fun createColumnGuessingType$default (Ljava/lang/String;Ljava/lang/Iterable;Lorg/jetbrains/kotlinx/dataframe/columns/TypeSuggestion;Ljava/lang/Object;Ljava/lang/Boolean;ZZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final synthetic fun guessColumnType (Ljava/lang/String;Ljava/util/List;Lkotlin/reflect/KType;ZLjava/lang/Object;Ljava/lang/Boolean;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static synthetic fun guessColumnType$default (Ljava/lang/String;Ljava/util/List;Lkotlin/reflect/KType;ZLjava/lang/Object;Ljava/lang/Boolean;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun newColumn (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;Lkotlin/reflect/KType;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/api/Infer;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; @@ -10479,8 +10486,10 @@ public final class org/jetbrains/kotlinx/dataframe/io/HtmlKt { public final class org/jetbrains/kotlinx/dataframe/io/JSON : org/jetbrains/kotlinx/dataframe/io/SupportedDataFrameFormat { public fun ()V - public fun (Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Ljava/util/List;)V + public synthetic fun (Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Ljava/util/List;)V public synthetic fun (Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Ljava/util/List;ILkotlin/jvm/internal/DefaultConstructorMarker;)V + public fun (Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Ljava/util/List;Z)V + public synthetic fun (Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Ljava/util/List;ZILkotlin/jvm/internal/DefaultConstructorMarker;)V public fun acceptsExtension (Ljava/lang/String;)Z public fun acceptsSample (Lorg/jetbrains/kotlinx/dataframe/io/SupportedFormatSample;)Z public fun createDefaultReadMethod (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethod; @@ -10498,26 +10507,42 @@ public final class org/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic : jav } public final class org/jetbrains/kotlinx/dataframe/io/JsonKt { - public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/File;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/InputStream;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/net/URL;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final synthetic fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final synthetic fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final synthetic fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final synthetic fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/File;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/File;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final synthetic fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/InputStream;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/InputStream;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final synthetic fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/net/URL;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/net/URL;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow; public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/File;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/File;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/InputStream;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/InputStream;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/net/URL;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static final fun readJsonStr (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun readJsonStr (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/net/URL;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final synthetic fun readJsonStr (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun readJsonStr (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final synthetic fun readJsonStr (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun readJsonStr (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow; public static synthetic fun readJsonStr$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun readJsonStr$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static synthetic fun readJsonStr$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun readJsonStr$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; public static final fun toJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Z)Ljava/lang/String; public static final fun toJson (Lorg/jetbrains/kotlinx/dataframe/DataRow;Z)Ljava/lang/String; public static synthetic fun toJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;ZILjava/lang/Object;)Ljava/lang/String; diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt index ded7a366e9..184f10baf8 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt @@ -4,6 +4,7 @@ import org.jetbrains.kotlinx.dataframe.api.JsonPath import org.jetbrains.kotlinx.dataframe.api.KeyValueProperty import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.FrameColumn +import org.jetbrains.kotlinx.dataframe.documentation.UnifyingNumbers import org.jetbrains.kotlinx.dataframe.io.JSON /** @@ -80,4 +81,6 @@ public annotation class JsonOptions( * `["""\$["store"]["book"][*]["author"]"""]` */ public val keyValuePaths: Array = [], + /** Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. */ + public val unifyNumbers: Boolean = true, ) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt index 38be1760bf..b5f9635139 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt @@ -363,7 +363,30 @@ internal fun getValuesType(values: List, type: KType, infer: Infer): KTyp @Deprecated(GUESS_VALUE_TYPE, level = DeprecationLevel.HIDDEN) @PublishedApi internal fun guessValueType(values: Sequence, upperBound: KType? = null, listifyValues: Boolean = false): KType = - guessValueType(values = values, upperBound = upperBound, listifyValues = listifyValues, allColsMakesRow = false) + guessValueType( + values = values, + upperBound = upperBound, + listifyValues = listifyValues, + allColsMakesRow = false, + unifyNumbers = false, + ) + +/** Just for binary compatibility, as it's @PublishedApi. */ +@Deprecated(GUESS_VALUE_TYPE, level = DeprecationLevel.HIDDEN) +@PublishedApi +internal fun guessValueType( + values: Sequence, + upperBound: KType? = null, + listifyValues: Boolean = false, + allColsMakesRow: Boolean = false, +): KType = + guessValueType( + values = values, + upperBound = upperBound, + listifyValues = listifyValues, + allColsMakesRow = allColsMakesRow, + unifyNumbers = false, + ) /** * Returns the guessed value type of the given [values] sequence. @@ -381,6 +404,10 @@ internal fun guessValueType(values: Sequence, upperBound: KType? = null, l * @param allColsMakesRow if true, then, if all values are non-null columns, we assume * that a column group should be created instead of a [DataColumn][DataColumn]`<`[AnyCol][AnyCol]`>`, * so the function will return [DataRow]. + * @param unifyNumbers if true, then all number types encountered will be unified to the smallest possible + * number-type that can hold all number values lossless in [values]. See [commonNumberClass]. + * Unsigned numbers are not supported. + * If false, the result of encountering multiple number types would be [Number]. */ @PublishedApi internal fun guessValueType( @@ -388,6 +415,7 @@ internal fun guessValueType( upperBound: KType? = null, listifyValues: Boolean = false, allColsMakesRow: Boolean = false, + unifyNumbers: Boolean = false, ): KType { val classes = mutableSetOf>() val collectionClasses = mutableSetOf>>() @@ -443,6 +471,18 @@ internal fun guessValueType( classesInCollection.all { it.isSubclassOf(DataRow::class) } && !nullsInCollection + if (unifyNumbers) { + val nothingClass = Nothing::class + val usedNumberClasses = classes.filter { + it.isSubclassOf(Number::class) && it != nothingClass + } + if (usedNumberClasses.isNotEmpty()) { + val unifiedNumberClass = usedNumberClasses.unifiedNumberClass() as KClass + classes -= usedNumberClasses + classes += unifiedNumberClass + } + } + return when { classes.isNotEmpty() -> { if (hasRows) classes.add(DataRow::class) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt index 638a8a6475..84a6368b8e 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt @@ -31,10 +31,13 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnsResolver import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion import org.jetbrains.kotlinx.dataframe.columns.ValueColumn import org.jetbrains.kotlinx.dataframe.columns.toColumnsSetOf +import org.jetbrains.kotlinx.dataframe.documentation.UnifyingNumbers import org.jetbrains.kotlinx.dataframe.impl.DataFrameReceiver import org.jetbrains.kotlinx.dataframe.impl.DataRowImpl +import org.jetbrains.kotlinx.dataframe.impl.api.createConverter import org.jetbrains.kotlinx.dataframe.impl.asList import org.jetbrains.kotlinx.dataframe.impl.guessValueType +import org.jetbrains.kotlinx.dataframe.impl.isNothing import org.jetbrains.kotlinx.dataframe.impl.replaceGenericTypeParametersWithUpperbound import org.jetbrains.kotlinx.dataframe.index import org.jetbrains.kotlinx.dataframe.nrow @@ -42,7 +45,9 @@ import org.jetbrains.kotlinx.dataframe.util.CREATE_COLUMN import org.jetbrains.kotlinx.dataframe.util.GUESS_COLUMN_TYPE import kotlin.reflect.KClass import kotlin.reflect.KType +import kotlin.reflect.full.isSubtypeOf import kotlin.reflect.full.withNullability +import kotlin.reflect.typeOf // region create DataColumn @@ -193,6 +198,9 @@ internal fun Array.toNumberColumns() = toColumnsSetOf() * Note: this parameter is ignored if another [Collection] is present in the values. * @param allColsMakesColGroup if `true`, then, if all values are non-null same-sized columns, * a column group will be created instead of a [DataColumn][DataColumn]`<`[AnyCol][AnyCol]`>`. + * @param unifyNumbers if `true`, then all numbers encountered in [values] will be converted to the smallest possible + * number-type that can hold all the values lossless. Unsigned numbers are not supported. See [UnifyingNumbers]. + * For example, if the values are `[1, 2f, 3.0]`, then all values will be converted to [Double]. */ @PublishedApi internal fun createColumnGuessingType( @@ -202,6 +210,7 @@ internal fun createColumnGuessingType( nullable: Boolean? = null, listifyValues: Boolean = false, allColsMakesColGroup: Boolean = false, + unifyNumbers: Boolean = false, ): DataColumn = createColumnGuessingType( name = "", @@ -211,6 +220,7 @@ internal fun createColumnGuessingType( nullable = nullable, listifyValues = listifyValues, allColsMakesColGroup = allColsMakesColGroup, + unifyNumbers = unifyNumbers, ) /** @@ -226,6 +236,7 @@ internal fun createColumnGuessingType( nullable: Boolean? = null, listifyValues: Boolean = false, allColsMakesColGroup: Boolean = false, + unifyNumbers: Boolean = false, ): DataColumn { val type = when (suggestedType) { is TypeSuggestion.Infer, is TypeSuggestion.InferWithUpperbound -> @@ -234,11 +245,23 @@ internal fun createColumnGuessingType( upperBound = (suggestedType as? TypeSuggestion.InferWithUpperbound)?.upperbound, listifyValues = listifyValues, allColsMakesRow = allColsMakesColGroup, + unifyNumbers = unifyNumbers, ) is TypeSuggestion.Use -> suggestedType.type } + // only needs to be used when unifyNumbers == true + @Suppress("UNCHECKED_CAST") + fun getSafeNumberConverter(targetType: KType): (Any?) -> Any? { + val converter = createConverter( + from = typeOf(), + to = targetType, + ) as (Number) -> Number? + + return { value -> if (value != null && value is Number) converter(value) else value } + } + return when (type.classifier!! as KClass<*>) { // guessValueType can only return DataRow if all values are `AnyRow?` // or allColsMakesColGroup == true, and all values are `AnyCol` @@ -269,18 +292,29 @@ internal fun createColumnGuessingType( List::class -> { val nullable = type.isMarkedNullable var isListOfRows: Boolean? = null - val lists = values.map { - when (it) { + val subType = type.arguments.first().type!! // List -> T + + val needsNumberConversion = unifyNumbers && + subType.isSubtypeOf(typeOf()) && + !subType.isNothing + val numberConverter: (Any?) -> Any? by lazy { getSafeNumberConverter(subType) } + + val lists = values.map { value -> + when (value) { null -> if (nullable) null else emptyList() is List<*> -> { - if (isListOfRows != false && it.isNotEmpty()) isListOfRows = it.all { it is AnyRow } - it + if (isListOfRows != false && value.isNotEmpty()) isListOfRows = value.all { it is AnyRow } + + if (needsNumberConversion) value.map(numberConverter) else value } else -> { // if !detectType and suggestedType is a list, we wrap the values in lists - if (isListOfRows != false) isListOfRows = it is AnyRow - listOf(it) + if (isListOfRows != false) isListOfRows = value is AnyRow + + listOf( + if (needsNumberConversion) numberConverter(value) else value, + ) } } } @@ -303,10 +337,15 @@ internal fun createColumnGuessingType( } } - else -> + else -> { + val needsNumberConversion = unifyNumbers && + type.isSubtypeOf(typeOf()) && + !type.isNothing + val numberConverter by lazy { getSafeNumberConverter(type) } + DataColumn.createValueColumn( name = name, - values = values.asList(), + values = if (needsNumberConversion) values.map(numberConverter) as List else values.asList(), type = if (nullable != null) type.withNullability(nullable) else type, infer = when { // even though an exact type is suggested, @@ -318,6 +357,7 @@ internal fun createColumnGuessingType( }, defaultValue = defaultValue, ) + } } } @@ -332,6 +372,7 @@ internal fun createColumn(values: Iterable, suggestedType: KType, guessTy values = values, suggestedType = TypeSuggestion.create(suggestedType, guessType), allColsMakesColGroup = true, + unifyNumbers = false, ) /** Just for binary compatibility, since it's @PublishedApi. */ @@ -355,4 +396,48 @@ internal fun guessColumnType( allColsMakesColGroup = false, ) +/** Just for binary compatibility, since it's @PublishedApi. */ +@Deprecated(GUESS_COLUMN_TYPE, level = DeprecationLevel.HIDDEN) +@PublishedApi +internal fun createColumnGuessingType( + values: Iterable, + suggestedType: TypeSuggestion = TypeSuggestion.Infer, + defaultValue: T? = null, + nullable: Boolean? = null, + listifyValues: Boolean = false, + allColsMakesColGroup: Boolean = false, +): DataColumn = + createColumnGuessingType( + values = values, + suggestedType = suggestedType, + defaultValue = defaultValue, + nullable = nullable, + listifyValues = listifyValues, + allColsMakesColGroup = allColsMakesColGroup, + unifyNumbers = false, + ) + +/** Just for binary compatibility, since it's @PublishedApi. */ +@Deprecated(GUESS_COLUMN_TYPE, level = DeprecationLevel.HIDDEN) +@PublishedApi +internal fun createColumnGuessingType( + name: String, + values: Iterable, + suggestedType: TypeSuggestion = TypeSuggestion.Infer, + defaultValue: T? = null, + nullable: Boolean? = null, + listifyValues: Boolean = false, + allColsMakesColGroup: Boolean = false, +): DataColumn = + createColumnGuessingType( + name = name, + values = values, + suggestedType = suggestedType, + defaultValue = defaultValue, + nullable = nullable, + listifyValues = listifyValues, + allColsMakesColGroup = allColsMakesColGroup, + unifyNumbers = false, + ) + // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt index 6a1547dcb9..e47f176d32 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt @@ -36,6 +36,7 @@ import org.jetbrains.kotlinx.dataframe.api.toDataFrame import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.FrameColumn import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion +import org.jetbrains.kotlinx.dataframe.documentation.UnifyingNumbers import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator import org.jetbrains.kotlinx.dataframe.impl.DataCollectorBase import org.jetbrains.kotlinx.dataframe.impl.api.chunkedImpl @@ -78,8 +79,9 @@ internal interface AnyKeyValueProperty : KeyValueProperty { override val value: Any? } -internal fun readJson( +internal fun readJsonImpl( parsed: Any?, + unifyNumbers: Boolean, header: List, keyValuePaths: List = emptyList(), typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, @@ -89,12 +91,14 @@ internal fun readJson( when (parsed) { is JsonArray -> fromJsonListArrayAndValueColumns( records = parsed, + unifyNumbers = unifyNumbers, header = header, keyValuePaths = keyValuePaths, ) else -> fromJsonListArrayAndValueColumns( records = listOf(parsed), + unifyNumbers = unifyNumbers, keyValuePaths = keyValuePaths, ) } @@ -104,12 +108,14 @@ internal fun readJson( when (parsed) { is JsonArray -> fromJsonListAnyColumns( records = parsed, + unifyNumbers = unifyNumbers, header = header, keyValuePaths = keyValuePaths, ) else -> fromJsonListAnyColumns( records = listOf(parsed), + unifyNumbers = unifyNumbers, keyValuePaths = keyValuePaths, ) } @@ -123,6 +129,7 @@ internal fun readJson( * A.k.a. [TypeClashTactic.ANY_COLUMNS]. * * @param records List of json elements to be converted to a [DataFrame]. + * @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. * @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[KeyValueProperty]> * will be created. * @param header Optional list of column names. If given, [records] will be read like an object with [header] being the keys. @@ -130,6 +137,7 @@ internal fun readJson( */ internal fun fromJsonListAnyColumns( records: List<*>, + unifyNumbers: Boolean, keyValuePaths: List = emptyList(), header: List = emptyList(), jsonPath: JsonPath = JsonPath(), @@ -185,6 +193,7 @@ internal fun fromJsonListAnyColumns( val parsed = fromJsonListAnyColumns( records = listOf(v), + unifyNumbers = unifyNumbers, keyValuePaths = keyValuePaths, jsonPath = jsonPath.replaceLastWildcardWithIndex(i), ) @@ -200,6 +209,7 @@ internal fun fromJsonListAnyColumns( is JsonArray -> { val parsed = fromJsonListAnyColumns( records = v, + unifyNumbers = unifyNumbers, keyValuePaths = keyValuePaths, jsonPath = jsonPath.replaceLastWildcardWithIndex(i).appendArrayWithWildcard(), ) @@ -229,10 +239,10 @@ internal fun fromJsonListAnyColumns( v.longOrNull != null -> collector.add(v.long) - v.doubleOrNull != null -> collector.add(v.double) - v.floatOrNull != null -> collector.add(v.float) + v.doubleOrNull != null -> collector.add(v.double) + else -> error("Malformed JSON element ${v::class}: $v") } } @@ -240,7 +250,7 @@ internal fun fromJsonListAnyColumns( else -> collector.add(v) } } - val column = collector.toColumn(VALUE_COLUMN_NAME) + val column = createColumnGuessingType(VALUE_COLUMN_NAME, collector.values, unifyNumbers = unifyNumbers) val res = if (nanIndices.isNotEmpty()) { fun DataColumn.updateNaNs(nanValue: C): DataColumn { var j = 0 @@ -281,6 +291,7 @@ internal fun fromJsonListAnyColumns( } val parsed = fromJsonListAnyColumns( records = values, + unifyNumbers = unifyNumbers, keyValuePaths = keyValuePaths, jsonPath = jsonPath.appendArrayWithWildcard(), ) @@ -320,6 +331,7 @@ internal fun fromJsonListAnyColumns( val map = record.mapValues { (key, value) -> val parsed = fromJsonListAnyColumns( records = listOf(value), + unifyNumbers = unifyNumbers, keyValuePaths = keyValuePaths, jsonPath = jsonPath.append(key), ) @@ -330,7 +342,7 @@ internal fun fromJsonListAnyColumns( } } val valueType = map.values.map { - guessValueType(sequenceOf(it)) + guessValueType(sequenceOf(it), unifyNumbers = unifyNumbers) }.commonType() valueTypes += valueType @@ -340,6 +352,7 @@ internal fun fromJsonListAnyColumns( createColumnGuessingType( values = map.values, suggestedType = TypeSuggestion.Use(valueType), + unifyNumbers = unifyNumbers, ).named(KeyValueProperty<*>::value.name), ) } @@ -393,6 +406,7 @@ internal fun fromJsonListAnyColumns( val parsed = fromJsonListAnyColumns( records = values, + unifyNumbers = unifyNumbers, keyValuePaths = keyValuePaths, jsonPath = jsonPath.append(colName), ) @@ -436,6 +450,7 @@ private fun AnyFrame.isSingleUnnamedColumn() = ncol == 1 && getColumn(0) is Unna * A.k.a. [TypeClashTactic.ARRAY_AND_VALUE_COLUMNS]. * * @param records List of json elements to be converted to a [DataFrame]. + * @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. * @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[KeyValueProperty]> * will be created. * @param header Optional list of column names. If given, [records] will be read like an object with [header] being the keys. @@ -443,6 +458,7 @@ private fun AnyFrame.isSingleUnnamedColumn() = ncol == 1 && getColumn(0) is Unna */ internal fun fromJsonListArrayAndValueColumns( records: List<*>, + unifyNumbers: Boolean, keyValuePaths: List = emptyList(), header: List = emptyList(), jsonPath: JsonPath = JsonPath(), @@ -503,6 +519,7 @@ internal fun fromJsonListArrayAndValueColumns( val map = record.mapValues { (key, value) -> val parsed = fromJsonListArrayAndValueColumns( records = listOf(value), + unifyNumbers = unifyNumbers, keyValuePaths = keyValuePaths, jsonPath = jsonPath.append(key), ) @@ -514,7 +531,7 @@ internal fun fromJsonListArrayAndValueColumns( } val valueType = map.values - .map { guessValueType(sequenceOf(it)) } + .map { guessValueType(sequenceOf(it), unifyNumbers = unifyNumbers) } .commonType() dataFrameOf( @@ -522,6 +539,7 @@ internal fun fromJsonListArrayAndValueColumns( createColumnGuessingType( values = map.values, suggestedType = TypeSuggestion.Use(valueType), + unifyNumbers = unifyNumbers, ).named(KeyValueProperty<*>::value.name), ) } @@ -576,10 +594,10 @@ internal fun fromJsonListArrayAndValueColumns( v.longOrNull != null -> collector.add(v.long) - v.doubleOrNull != null -> collector.add(v.double) - v.floatOrNull != null -> collector.add(v.float) + v.doubleOrNull != null -> collector.add(v.double) + else -> error("Malformed JSON element ${v::class}: $v") } } @@ -587,7 +605,7 @@ internal fun fromJsonListArrayAndValueColumns( else -> collector.add(v) } } - val column = collector.toColumn(colName) + val column = createColumnGuessingType(colName, collector.values, unifyNumbers = unifyNumbers) val res = if (nanIndices.isNotEmpty()) { fun DataColumn.updateNaNs(nanValue: C): DataColumn { var j = 0 @@ -624,6 +642,7 @@ internal fun fromJsonListArrayAndValueColumns( } val parsed = fromJsonListArrayAndValueColumns( records = values, + unifyNumbers = unifyNumbers, keyValuePaths = keyValuePaths, jsonPath = jsonPath.appendArrayWithWildcard(), ) @@ -661,6 +680,7 @@ internal fun fromJsonListArrayAndValueColumns( val parsed = fromJsonListArrayAndValueColumns( records = values, + unifyNumbers = unifyNumbers, keyValuePaths = keyValuePaths, jsonPath = jsonPath.append(colName), ) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt index 54b4a799c4..eb5c1690f3 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -19,13 +19,15 @@ import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadJsonMethod import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.FrameColumn +import org.jetbrains.kotlinx.dataframe.documentation.UnifyingNumbers import org.jetbrains.kotlinx.dataframe.impl.io.encodeDataFrameWithMetadata import org.jetbrains.kotlinx.dataframe.impl.io.encodeFrame import org.jetbrains.kotlinx.dataframe.impl.io.encodeRow -import org.jetbrains.kotlinx.dataframe.impl.io.readJson +import org.jetbrains.kotlinx.dataframe.impl.io.readJsonImpl import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ANY_COLUMNS import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS +import org.jetbrains.kotlinx.dataframe.util.READ_JSON import java.io.File import java.io.InputStream import java.net.URL @@ -34,13 +36,22 @@ import kotlin.reflect.typeOf public class JSON( private val typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, private val keyValuePaths: List = emptyList(), + private val unifyNumbers: Boolean = true, ) : SupportedDataFrameFormat { + + @Deprecated(READ_JSON, level = DeprecationLevel.HIDDEN) + public constructor( + typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, + keyValuePaths: List = emptyList(), + ) : this(typeClashTactic, keyValuePaths, true) + override fun readDataFrame(stream: InputStream, header: List): AnyFrame = DataFrame.readJson( stream = stream, header = header, typeClashTactic = typeClashTactic, keyValuePaths = keyValuePaths, + unifyNumbers = unifyNumbers, ) override fun readDataFrame(file: File, header: List): AnyFrame = @@ -49,6 +60,7 @@ public class JSON( header = header, typeClashTactic = typeClashTactic, keyValuePaths = keyValuePaths, + unifyNumbers = unifyNumbers, ) override fun acceptsExtension(ext: String): Boolean = ext == "json" @@ -74,6 +86,11 @@ public class JSON( "typeClashTactic", typeOf(), "org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.${typeClashTactic.name}", + ) + .add( + "unifyNumbers", + typeOf(), + unifyNumbers.toString(), ), ) @@ -127,6 +144,7 @@ internal const val VALUE_COLUMN_NAME: String = "value" * will be created. * @param typeClashTactic How to handle type clashes when reading a JSON file. * @param header Optional list of column names. If given, the file will be read like an object with [header] being the keys. + * @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. * @return [DataFrame] from the given [file]. */ public fun DataFrame.Companion.readJson( @@ -134,7 +152,8 @@ public fun DataFrame.Companion.readJson( header: List = emptyList(), keyValuePaths: List = emptyList(), typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyFrame = readJson(file.toURI().toURL(), header, keyValuePaths, typeClashTactic) + unifyNumbers: Boolean = true, +): AnyFrame = DataFrame.readJson(file.toURI().toURL(), header, keyValuePaths, typeClashTactic, unifyNumbers) /** * @param file Where to fetch the Json as [InputStream] to be converted to a [DataRow]. @@ -142,6 +161,7 @@ public fun DataFrame.Companion.readJson( * will be created. * @param typeClashTactic How to handle type clashes when reading a JSON file. * @param header Optional list of column names. If given, the file will be read like an object with [header] being the keys. + * @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. * @return [DataRow] from the given [file]. */ public fun DataRow.Companion.readJson( @@ -149,7 +169,8 @@ public fun DataRow.Companion.readJson( header: List = emptyList(), keyValuePaths: List = emptyList(), typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyRow = DataFrame.readJson(file, header, keyValuePaths, typeClashTactic).single() + unifyNumbers: Boolean = true, +): AnyRow = DataFrame.readJson(file, header, keyValuePaths, typeClashTactic, unifyNumbers).single() /** * @param path URL or file path from where to fetch the Json as [InputStream] to be converted to a [DataFrame]. @@ -157,6 +178,7 @@ public fun DataRow.Companion.readJson( * will be created. * @param typeClashTactic How to handle type clashes when reading a JSON file. * @param header Optional list of column names. If given, the stream will be read like an object with [header] being the keys. + * @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. * @return [DataFrame] from the given [path]. */ @OptInRefine @@ -166,7 +188,8 @@ public fun DataFrame.Companion.readJson( header: List = emptyList(), keyValuePaths: List = emptyList(), typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyFrame = readJson(asUrl(path), header, keyValuePaths, typeClashTactic) + unifyNumbers: Boolean = true, +): AnyFrame = DataFrame.readJson(asUrl(path), header, keyValuePaths, typeClashTactic, unifyNumbers) /** * @param path URL or file path from where to fetch the Json as [InputStream] to be converted to a [DataRow]. @@ -174,6 +197,7 @@ public fun DataFrame.Companion.readJson( * will be created. * @param typeClashTactic How to handle type clashes when reading a JSON file. * @param header Optional list of column names. If given, the stream will be read like an object with [header] being the keys. + * @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. * @return [DataRow] from the given [path]. */ public fun DataRow.Companion.readJson( @@ -181,7 +205,8 @@ public fun DataRow.Companion.readJson( header: List = emptyList(), keyValuePaths: List = emptyList(), typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyRow = DataFrame.readJson(path, header, keyValuePaths, typeClashTactic).single() + unifyNumbers: Boolean = true, +): AnyRow = DataFrame.readJson(path, header, keyValuePaths, typeClashTactic, unifyNumbers).single() /** * @param url Where to fetch the Json as [InputStream] to be converted to a [DataFrame]. @@ -189,6 +214,7 @@ public fun DataRow.Companion.readJson( * will be created. * @param typeClashTactic How to handle type clashes when reading a JSON file. * @param header Optional list of column names. If given, the stream will be read like an object with [header] being the keys. + * @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. * @return [DataFrame] from the given [url]. */ public fun DataFrame.Companion.readJson( @@ -196,7 +222,8 @@ public fun DataFrame.Companion.readJson( header: List = emptyList(), keyValuePaths: List = emptyList(), typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyFrame = catchHttpResponse(url) { readJson(it, header, keyValuePaths, typeClashTactic) } + unifyNumbers: Boolean = true, +): AnyFrame = catchHttpResponse(url) { DataFrame.readJson(it, header, keyValuePaths, typeClashTactic, unifyNumbers) } /** * @param url Where to fetch the Json as [InputStream] to be converted to a [DataRow]. @@ -204,6 +231,7 @@ public fun DataFrame.Companion.readJson( * will be created. * @param typeClashTactic How to handle type clashes when reading a JSON file. * @param header Optional list of column names. If given, the stream will be read like an object with [header] being the keys. + * @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. * @return [DataRow] from the given [url]. */ public fun DataRow.Companion.readJson( @@ -211,7 +239,8 @@ public fun DataRow.Companion.readJson( header: List = emptyList(), keyValuePaths: List = emptyList(), typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyRow = DataFrame.readJson(url, header, keyValuePaths, typeClashTactic).single() + unifyNumbers: Boolean = true, +): AnyRow = DataFrame.readJson(url, header, keyValuePaths, typeClashTactic, unifyNumbers).single() /** * @param stream Json as [InputStream] to be converted to a [DataFrame]. @@ -219,6 +248,7 @@ public fun DataRow.Companion.readJson( * will be created. * @param typeClashTactic How to handle type clashes when reading a JSON file. * @param header Optional list of column names. If given, [stream] will be read like an object with [header] being the keys. + * @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. * @return [DataFrame] from the given [stream]. */ @OptIn(ExperimentalSerializationApi::class) @@ -227,7 +257,9 @@ public fun DataFrame.Companion.readJson( header: List = emptyList(), keyValuePaths: List = emptyList(), typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyFrame = readJson(Json.decodeFromStream(stream), header, keyValuePaths, typeClashTactic) + unifyNumbers: Boolean = true, +): AnyFrame = + readJsonImpl(Json.decodeFromStream(stream), unifyNumbers, header, keyValuePaths, typeClashTactic) /** * @param stream Json as [InputStream] to be converted to a [DataRow]. @@ -235,6 +267,7 @@ public fun DataFrame.Companion.readJson( * will be created. * @param typeClashTactic How to handle type clashes when reading a JSON file. * @param header Optional list of column names. If given, [stream] will be read like an object with [header] being the keys. + * @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. * @return [DataRow] from the given [stream]. */ public fun DataRow.Companion.readJson( @@ -242,7 +275,8 @@ public fun DataRow.Companion.readJson( header: List = emptyList(), keyValuePaths: List = emptyList(), typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyRow = DataFrame.readJson(stream, header, keyValuePaths, typeClashTactic).single() + unifyNumbers: Boolean = true, +): AnyRow = DataFrame.readJson(stream, header, keyValuePaths, typeClashTactic, unifyNumbers).single() /** * @param text Json as [String] to be converted to a [DataFrame]. @@ -250,6 +284,7 @@ public fun DataRow.Companion.readJson( * will be created. * @param typeClashTactic How to handle type clashes when reading a JSON file. * @param header Optional list of column names. If given, [text] will be read like an object with [header] being the keys. + * @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. * @return [DataFrame] from the given [text]. */ @Refine @@ -259,7 +294,8 @@ public fun DataFrame.Companion.readJsonStr( header: List = emptyList(), keyValuePaths: List = emptyList(), typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyFrame = readJson(Json.parseToJsonElement(text), header, keyValuePaths, typeClashTactic) + unifyNumbers: Boolean = true, +): AnyFrame = readJsonImpl(Json.parseToJsonElement(text), unifyNumbers, header, keyValuePaths, typeClashTactic) /** * @param text Json as [String] to be converted to a [DataRow]. @@ -267,6 +303,7 @@ public fun DataFrame.Companion.readJsonStr( * will be created. * @param typeClashTactic How to handle type clashes when reading a JSON file. * @param header Optional list of column names. If given, [text] will be read like an object with [header] being the keys. + * @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. * @return [DataRow] from the given [text]. */ @Refine @@ -276,7 +313,8 @@ public fun DataRow.Companion.readJsonStr( header: List = emptyList(), keyValuePaths: List = emptyList(), typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyRow = DataFrame.readJsonStr(text, header, keyValuePaths, typeClashTactic).single() + unifyNumbers: Boolean = true, +): AnyRow = DataFrame.readJsonStr(text, header, keyValuePaths, typeClashTactic, unifyNumbers).single() public fun AnyFrame.toJson(prettyPrint: Boolean = false): String { val json = Json { @@ -296,7 +334,8 @@ public fun AnyFrame.toJson(prettyPrint: Boolean = false): String { * If null, all rows are included. * Applied for each frame column recursively * @param prettyPrint Specifies whether the output JSON should be formatted with indentation and line breaks. - * @param imageEncodingOptions The options for encoding images. The default is null, which indicates that the image is not encoded as Base64. + * @param customEncoders The options for encoding things like images. + * The default is empty list, which indicates that the image is not encoded as Base64. * * @return The DataFrame converted to a JSON string with metadata. */ @@ -393,3 +432,79 @@ public fun AnyRow.writeJson(path: String, prettyPrint: Boolean = false) { public fun AnyRow.writeJson(writer: Appendable, prettyPrint: Boolean = false) { writer.append(toJson(prettyPrint)) } + +// region deprecations + +/** Here for binary compatibility. */ +@Deprecated(message = READ_JSON, level = DeprecationLevel.HIDDEN) +public fun DataFrame.Companion.readJson( + file: File, + header: List = emptyList(), + keyValuePaths: List = emptyList(), + typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, +): AnyFrame = DataFrame.readJson(file, header, keyValuePaths, typeClashTactic, true) + +/** Here for binary compatibility. */ +@Deprecated(message = READ_JSON, level = DeprecationLevel.HIDDEN) +public fun DataRow.Companion.readJson( + file: File, + header: List = emptyList(), + keyValuePaths: List = emptyList(), + typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, +): AnyRow = DataRow.readJson(file, header, keyValuePaths, typeClashTactic, true) + +/** Here for binary compatibility. */ +@Deprecated(message = READ_JSON, level = DeprecationLevel.HIDDEN) +public fun DataFrame.Companion.readJson( + stream: InputStream, + header: List = emptyList(), + keyValuePaths: List = emptyList(), + typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, +): AnyFrame = DataFrame.readJson(stream, header, keyValuePaths, typeClashTactic, true) + +/** Here for binary compatibility. */ +@Deprecated(message = READ_JSON, level = DeprecationLevel.HIDDEN) +public fun DataRow.Companion.readJson( + stream: InputStream, + header: List = emptyList(), + keyValuePaths: List = emptyList(), + typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, +): AnyRow = DataRow.readJson(stream, header, keyValuePaths, typeClashTactic, true) + +/** Here for binary compatibility. */ +@Deprecated(message = READ_JSON, level = DeprecationLevel.HIDDEN) +public fun DataFrame.Companion.readJson( + url: URL, + header: List = emptyList(), + keyValuePaths: List = emptyList(), + typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, +): AnyFrame = DataFrame.readJson(url, header, keyValuePaths, typeClashTactic, true) + +/** Here for binary compatibility. */ +@Deprecated(message = READ_JSON, level = DeprecationLevel.HIDDEN) +public fun DataRow.Companion.readJson( + url: URL, + header: List = emptyList(), + keyValuePaths: List = emptyList(), + typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, +): AnyRow = DataRow.readJson(url, header, keyValuePaths, typeClashTactic, true) + +/** Here for binary compatibility. */ +@Deprecated(message = READ_JSON, level = DeprecationLevel.HIDDEN) +public fun DataFrame.Companion.readJsonStr( + @Language("json") text: String, + header: List = emptyList(), + keyValuePaths: List = emptyList(), + typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, +): AnyFrame = DataFrame.readJsonStr(text, header, keyValuePaths, typeClashTactic, true) + +/** Here for binary compatibility. */ +@Deprecated(message = READ_JSON, level = DeprecationLevel.HIDDEN) +public fun DataRow.Companion.readJsonStr( + @Language("json") text: String, + header: List = emptyList(), + keyValuePaths: List = emptyList(), + typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, +): AnyRow = DataRow.readJsonStr(text, header, keyValuePaths, typeClashTactic, true) + +// endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt index 6539d2a584..11ab7599e5 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt @@ -60,6 +60,8 @@ internal const val IS_URL_IMPORT = "org.jetbrains.kotlinx.dataframe.io.isUrl" internal const val MINUS = "This minus overload will be removed in favor of `remove`. $MESSAGE_0_16" internal const val MINUS_REPLACE = "this.remove(columns)" +internal const val READ_JSON = "This function is just here for binary compatibility. $MESSAGE_0_16" + internal const val MOVE_TO_LEFT = "This `moveToLeft` overload will be removed in favor of `moveToStart`. $MESSAGE_0_16" internal const val MOVE_TO_LEFT_REPLACE = "this.moveToStart(columns)" diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/colsAtAnyDepth.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/colsAtAnyDepth.kt index 30160ff9b5..ed9109763d 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/colsAtAnyDepth.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/colsAtAnyDepth.kt @@ -18,8 +18,6 @@ import org.jetbrains.kotlinx.dataframe.samples.api.city import org.jetbrains.kotlinx.dataframe.samples.api.firstName import org.jetbrains.kotlinx.dataframe.samples.api.lastName import org.jetbrains.kotlinx.dataframe.samples.api.name -import org.jetbrains.kotlinx.dataframe.samples.api.secondName -import org.jetbrains.kotlinx.dataframe.samples.api.thirdName import org.jetbrains.kotlinx.dataframe.samples.api.weight import org.junit.Test import kotlin.reflect.typeOf diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt index 730978aad5..a54a448626 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -22,6 +22,7 @@ import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.alsoDebug import org.jetbrains.kotlinx.dataframe.api.JsonPath import org.jetbrains.kotlinx.dataframe.api.allNulls +import org.jetbrains.kotlinx.dataframe.api.colsOf import org.jetbrains.kotlinx.dataframe.api.columnsCount import org.jetbrains.kotlinx.dataframe.api.convert import org.jetbrains.kotlinx.dataframe.api.dataFrameOf @@ -35,7 +36,6 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.ColumnKind import org.jetbrains.kotlinx.dataframe.columns.FrameColumn import org.jetbrains.kotlinx.dataframe.columns.ValueColumn -import org.jetbrains.kotlinx.dataframe.get import org.jetbrains.kotlinx.dataframe.impl.io.SERIALIZATION_VERSION import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.COLUMNS import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.DATA @@ -45,8 +45,9 @@ import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.METADATA import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NCOL import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NROW import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.VERSION -import org.jetbrains.kotlinx.dataframe.impl.io.readJson +import org.jetbrains.kotlinx.dataframe.impl.io.readJsonImpl import org.jetbrains.kotlinx.dataframe.impl.nothingType +import org.jetbrains.kotlinx.dataframe.impl.nullableNothingType import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ANY_COLUMNS import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS import org.jetbrains.kotlinx.dataframe.parseJsonStr @@ -54,6 +55,7 @@ import org.jetbrains.kotlinx.dataframe.testJson import org.jetbrains.kotlinx.dataframe.type import org.jetbrains.kotlinx.dataframe.values import org.junit.Test +import kotlin.Double import kotlin.reflect.typeOf @Suppress("ktlint:standard:argument-list-wrapping") @@ -122,7 +124,7 @@ class JsonTests { df.rowsCount() shouldBe 2 df["a"].type() shouldBe typeOf() df["b"].type() shouldBe typeOf>() - df["c"].type() shouldBe typeOf() + df["c"].type() shouldBe typeOf() } @Test @@ -140,7 +142,7 @@ class JsonTests { df.rowsCount() shouldBe 2 df["a"].type() shouldBe typeOf() df["b"].type() shouldBe typeOf>() - df["c"].type() shouldBe typeOf() + df["c"].type() shouldBe typeOf() } @Test @@ -199,7 +201,7 @@ class JsonTests { val df = DataFrame.readJsonStr(json).alsoDebug() df.columnsCount() shouldBe 1 df.rowsCount() shouldBe 3 - df["a"].type() shouldBe typeOf>() + df["a"].type() shouldBe typeOf>() df[1]["a"] shouldBe emptyList() } @@ -217,7 +219,7 @@ class JsonTests { val df = DataFrame.readJsonStr(json, typeClashTactic = ANY_COLUMNS).alsoDebug() df.columnsCount() shouldBe 1 df.rowsCount() shouldBe 3 - df["a"].type() shouldBe typeOf>() + df["a"].type() shouldBe typeOf>() df[1]["a"] shouldBe emptyList() } @@ -228,7 +230,7 @@ class JsonTests { """ [ {"a":[ {"b":2}, {"c":3} ]}, - {"a":[ {"b":4}, {"d":5} ]} + {"a":[ {"b":4.0}, {"d":5} ]} ] """.trimIndent() val df = DataFrame.readJsonStr(json).alsoDebug() @@ -238,7 +240,7 @@ class JsonTests { group[0].alsoDebug().let { it.columnsCount() shouldBe 3 it.rowsCount() shouldBe 2 - it["b"].type() shouldBe typeOf() + it["b"].type() shouldBe typeOf() it["c"].type() shouldBe typeOf() it["d"].type() shouldBe typeOf() it["b"].values.toList() shouldBe listOf(2, null) @@ -249,7 +251,7 @@ class JsonTests { group[1].alsoDebug().let { it.columnsCount() shouldBe 3 it.rowsCount() shouldBe 2 - it["b"].type() shouldBe typeOf() + it["b"].type() shouldBe typeOf() it["c"].type() shouldBe typeOf() it["d"].type() shouldBe typeOf() it["b"].values.toList() shouldBe listOf(4, null) @@ -295,6 +297,31 @@ class JsonTests { } } + @Test + fun `json and number unification`() { + @Language("json") + val json = + """ + [ + {"a":1}, + {"a":2.0}, + {"a":3}, + {"a":4.5} + ] + """.trimIndent() + val df1 = DataFrame.readJsonStr(json, unifyNumbers = true).alsoDebug() + df1.columnsCount() shouldBe 1 + df1.rowsCount() shouldBe 4 + df1["a"].type() shouldBe typeOf() + df1["a"].values.toList() shouldBe listOf(1.0, 2.0, 3.0, 4.5) + + val df2 = DataFrame.readJsonStr(json, unifyNumbers = false).alsoDebug() + df2.columnsCount() shouldBe 1 + df2.rowsCount() shouldBe 4 + df2["a"].type() shouldBe typeOf() + df2["a"].values.toList() shouldBe listOf(1, 2.0f, 3, 4.5f) + } + @Test fun `parse json with nested json array with mixed values`() { @Language("json") @@ -383,7 +410,7 @@ class JsonTests { ).alsoDebug("df:") val res = DataFrame.readJsonStr(df.toJson()).alsoDebug("res:") - res shouldBe df + res shouldBe df.convert { colsOf() }.toFloat() } @Test @@ -396,21 +423,17 @@ class JsonTests { val res = DataFrame.readJsonStr(df.toJson(), typeClashTactic = ANY_COLUMNS).alsoDebug("res:") - res shouldBe df - } - - @Test - fun `NaN double serialization`() { - val df = dataFrameOf("v")(1.1, Double.NaN) - df["v"].type() shouldBe typeOf() - DataFrame.readJsonStr(df.toJson()) shouldBe df + res shouldBe df.convert { colsOf() }.toFloat() } @Test fun `NaN double serialization Any`() { val df = dataFrameOf("v")(1.1, Double.NaN) df["v"].type() shouldBe typeOf() - DataFrame.readJsonStr(df.toJson(), typeClashTactic = ANY_COLUMNS) shouldBe df + + val df2 = DataFrame.readJsonStr(df.toJson(), typeClashTactic = ANY_COLUMNS) + df2["v"].type() shouldBe typeOf() + df2 shouldBe df.convert("v").toFloat() } @Test @@ -583,7 +606,7 @@ class JsonTests { val group = df["a"] as ColumnGroup<*> group.columnsCount() shouldBe 6 group["b"].type() shouldBe typeOf() - group["value"].type() shouldBe typeOf() + group["value"].type() shouldBe typeOf() group["value1"].type() shouldBe typeOf() group["array"].type() shouldBe nothingType(nullable = true) @@ -922,7 +945,7 @@ class JsonTests { it["b"].type() shouldBe typeOf() it["c"].type() shouldBe typeOf() - it["d"].type() shouldBe typeOf() + it["d"].type() shouldBe nullableNothingType it[0].toMap() shouldBe mapOf("b" to 1, "c" to null, "d" to null) it[1].toMap() shouldBe mapOf("b" to listOf(1, 2, 3), "c" to 2, "d" to null) @@ -1115,7 +1138,7 @@ class JsonTests { // https://github.com/Kotlin/kotlinx.serialization/issues/2511 val json = Json.decodeFromString("""[jetbrains, jetbrains-youtrack, youtrack, youtrack-api]""") shouldThrow { - readJson(json, emptyList()) + readJsonImpl(json, true, emptyList()) } } } diff --git a/docs/StardustDocs/topics/read.md b/docs/StardustDocs/topics/read.md index 7843c90a63..14082d09f4 100644 --- a/docs/StardustDocs/topics/read.md +++ b/docs/StardustDocs/topics/read.md @@ -310,8 +310,9 @@ DataFrame.readJson("https://covid.ourworldindata.org/data/owid-covid-data.json") ### Column type inference from JSON Type inference for JSON is much simpler than for CSV. -JSON string literals are always supposed to have String type. Number literals -take different `Number` kinds. Boolean literals are converted to `Boolean`. +JSON string literals always become a `String`. +Number literals are converted to a unified `Number` type which will fit all encountered numbers. +Boolean literals are converted to `Boolean`. Let's take a look at the following JSON: @@ -355,12 +356,12 @@ The corresponding [`DataFrame`](DataFrame.md) schema is: ```text A: String B: Int -C: Number +C: Double D: Boolean? ``` Column A has `String` type because all values are string literals, no implicit conversion is performed. Column C -has `Number` type because it's the least common type for `Int` and `Double`. +has the `Double` type because it's the smallest unified number type for `Int` and `Float`. ### JSON parsing options @@ -370,8 +371,8 @@ By default, if a type clash occurs when reading JSON, a new column group is crea any number of object properties: "value" will be set to the value of the JSON element if it's a primitive, else it will be `null`.\ -"array" will be set to the array of values if the json element is an array, else it will be `[]`.\ -If the json element is an object, then each property will spread out to its own column in the group, else these columns +"array" will be set to the array of values if the JSON element is an array, else it will be `[]`.\ +If the JSON element is an object, then each property will spread out to its own column in the group, else these columns will be `null`. In this case `typeClashTactic = JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS`. diff --git a/plugins/dataframe-gradle-plugin/src/main/kotlin/org/jetbrains/dataframe/gradle/GenerateDataSchemaTask.kt b/plugins/dataframe-gradle-plugin/src/main/kotlin/org/jetbrains/dataframe/gradle/GenerateDataSchemaTask.kt index 8dbfa3f996..dfec70c339 100644 --- a/plugins/dataframe-gradle-plugin/src/main/kotlin/org/jetbrains/dataframe/gradle/GenerateDataSchemaTask.kt +++ b/plugins/dataframe-gradle-plugin/src/main/kotlin/org/jetbrains/dataframe/gradle/GenerateDataSchemaTask.kt @@ -117,7 +117,11 @@ abstract class GenerateDataSchemaTask : DefaultTask() { val formats = listOf( CsvDeephaven(delimiter = csvOptions.delimiter), - JSON(typeClashTactic = jsonOptions.typeClashTactic, keyValuePaths = jsonOptions.keyValuePaths), + JSON( + typeClashTactic = jsonOptions.typeClashTactic, + keyValuePaths = jsonOptions.keyValuePaths, + unifyNumbers = jsonOptions.unifyNumbers, + ), Excel(), TsvDeephaven(), ArrowFeather(), diff --git a/plugins/dataframe-gradle-plugin/src/main/kotlin/org/jetbrains/dataframe/gradle/SchemaGeneratorExtension.kt b/plugins/dataframe-gradle-plugin/src/main/kotlin/org/jetbrains/dataframe/gradle/SchemaGeneratorExtension.kt index b9f3dc9c55..7f402be156 100644 --- a/plugins/dataframe-gradle-plugin/src/main/kotlin/org/jetbrains/dataframe/gradle/SchemaGeneratorExtension.kt +++ b/plugins/dataframe-gradle-plugin/src/main/kotlin/org/jetbrains/dataframe/gradle/SchemaGeneratorExtension.kt @@ -129,6 +129,7 @@ data class CsvOptionsDsl(var delimiter: Char = ',') : Serializable data class JsonOptionsDsl( var typeClashTactic: JSON.TypeClashTactic = JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS, var keyValuePaths: List = emptyList(), + var unifyNumbers: Boolean = true, ) : Serializable /** diff --git a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/read.kt b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/read.kt index 0b8d99a3d5..53610938f9 100644 --- a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/read.kt +++ b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/read.kt @@ -125,18 +125,20 @@ internal class ReadDelimStr : AbstractInterpreter() { internal class ReadJsonStr : AbstractInterpreter() { val Arguments.text: String by arg() val Arguments.typeClashTactic: JSON.TypeClashTactic by arg(defaultValue = Present(ARRAY_AND_VALUE_COLUMNS)) + val Arguments.unifyNumbers: Boolean by arg(defaultValue = Present(true)) override fun Arguments.interpret(): PluginDataFrameSchema { - return DataFrame.readJsonStr(text, typeClashTactic = typeClashTactic).schema().toPluginDataFrameSchema() + return DataFrame.readJsonStr(text, typeClashTactic = typeClashTactic, unifyNumbers = unifyNumbers).schema().toPluginDataFrameSchema() } } internal class DataRowReadJsonStr : AbstractInterpreter() { val Arguments.text: String by arg() val Arguments.typeClashTactic: JSON.TypeClashTactic by arg(defaultValue = Present(ARRAY_AND_VALUE_COLUMNS)) + val Arguments.unifyNumbers: Boolean by arg(defaultValue = Present(true)) override fun Arguments.interpret(): PluginDataFrameSchema { - return DataRow.readJsonStr(text, typeClashTactic = typeClashTactic).schema().toPluginDataFrameSchema() + return DataRow.readJsonStr(text, typeClashTactic = typeClashTactic, unifyNumbers = unifyNumbers).schema().toPluginDataFrameSchema() } } diff --git a/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Read.kt b/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Read.kt index ffb27ae5fd..d9e38391c2 100644 --- a/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Read.kt +++ b/tests/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Read.kt @@ -62,7 +62,7 @@ class Read { val row = DataRow.readJson(file) // SampleEnd row.columnNames() shouldBe listOf("A", "B", "C", "D") - row.columnTypes() shouldBe listOf(typeOf(), typeOf(), typeOf(), typeOf()) + row.columnTypes() shouldBe listOf(typeOf(), typeOf(), typeOf(), typeOf()) } @Test