From 8ed6fe6ba61fd65dd83e7d3fe42179a9131f9af5 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Tue, 22 Apr 2025 20:31:27 +0200 Subject: [PATCH 01/15] wip extracting json module --- build.gradle.kts | 2 + core/build.gradle.kts | 2 - .../dataframe/annotations/ImportDataSchema.kt | 15 ++- .../kotlinx/dataframe/api/chunked.kt | 12 +- .../dataframe/codeGen/DefaultReadDfMethods.kt | 8 -- .../kotlinx/dataframe/columns/ColumnGroup.kt | 2 + .../documentation/UnifyingNumbers.kt | 5 +- .../dataframe/impl/ColumnDataCollector.kt | 2 +- .../jetbrains/kotlinx/dataframe/impl/Utils.kt | 9 ++ .../kotlinx/dataframe/impl/io/image.kt | 21 ++++ .../kotlinx/dataframe/impl/schema/Utils.kt | 3 + ...rains.kotlinx.dataframe.io.SupportedFormat | 1 - dataframe-csv/build.gradle.kts | 3 + dataframe-json/build.gradle.kts | 68 +++++++++++ .../kotlinx/dataframe/impl/io/BytesUtils.kt | 0 .../kotlinx/dataframe/impl/io/compression.kt | 0 .../kotlinx/dataframe/impl/io/readJson.kt | 115 ++++++++++++++---- .../kotlinx/dataframe/impl/io/writeJson.kt | 47 ++++++- .../jetbrains/kotlinx/dataframe/io/json.kt | 90 ++------------ ...rains.kotlinx.dataframe.io.SupportedFormat | 1 + dataframe-jupyter/build.gradle.kts | 2 + settings.gradle.kts | 2 + 22 files changed, 278 insertions(+), 132 deletions(-) create mode 100644 dataframe-json/build.gradle.kts rename {core => dataframe-json}/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/BytesUtils.kt (100%) rename {core => dataframe-json}/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/compression.kt (100%) rename {core => dataframe-json}/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt (89%) rename {core => dataframe-json}/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt (90%) rename {core => dataframe-json}/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt (83%) create mode 100644 dataframe-json/src/main/resources/services/org.jetbrains.kotlinx.dataframe.io.SupportedFormat diff --git a/build.gradle.kts b/build.gradle.kts index 7cf636b40a..001df5bb44 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -54,6 +54,7 @@ dependencies { api(projects.dataframeExcel) api(projects.dataframeJdbc) api(projects.dataframeCsv) + api(projects.dataframeJson) // experimental, so not included by default: // api(projects.dataframeOpenapi) @@ -64,6 +65,7 @@ dependencies { kover(projects.dataframeOpenapi) kover(projects.dataframeJdbc) kover(projects.dataframeCsv) + kover(projects.dataframeJson) kover(projects.plugins.kotlinDataframe) kover(projects.dataframeJupyter) } diff --git a/core/build.gradle.kts b/core/build.gradle.kts index d15c043415..0e10747099 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -65,8 +65,6 @@ dependencies { api(libs.commonsCsv) implementation(libs.commonsIo) - implementation(libs.serialization.core) - implementation(libs.serialization.json) implementation(libs.fastDoubleParser) api(libs.kotlin.datetimeJvm) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt index d7f74eb415..107e5a3b6d 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt @@ -5,7 +5,6 @@ import org.jetbrains.kotlinx.dataframe.api.KeyValueProperty import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.FrameColumn import org.jetbrains.kotlinx.dataframe.documentation.UnifyingNumbers -import org.jetbrains.kotlinx.dataframe.io.JSON /** * Annotation preprocessing will generate a DataSchema interface from the data at `path`. @@ -73,8 +72,11 @@ public annotation class JdbcOptions( ) public annotation class JsonOptions( - /** Allows the choice of how to handle type clashes when reading a JSON file. */ - public val typeClashTactic: JSON.TypeClashTactic = JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS, + /** + * Allows the choice of how to handle type clashes when reading a JSON file. + * Must be either [TypeClashTactics.ARRAY_AND_VALUE_COLUMNS] or [TypeClashTactics.ANY_COLUMNS] + * */ + public val typeClashTactic: String = TypeClashTactics.ARRAY_AND_VALUE_COLUMNS, /** * List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[KeyValueProperty]> * will be created. @@ -85,4 +87,9 @@ public annotation class JsonOptions( public val keyValuePaths: Array = [], /** Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. */ public val unifyNumbers: Boolean = true, -) +) { + public object TypeClashTactics { + public const val ARRAY_AND_VALUE_COLUMNS: String = "ARRAY_AND_VALUE_COLUMNS" + public const val ANY_COLUMNS: String = "ANY_COLUMNS" + } +} diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt index 421d1d64e6..9aa6ae504f 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt @@ -15,10 +15,14 @@ import org.jetbrains.kotlinx.dataframe.type * Creates a [FrameColumn] from [this] by splitting the dataframe into * smaller ones, with their number of rows at most [size]. */ -public fun DataFrame.chunked(size: Int, name: String = "groups"): FrameColumn { - val startIndices = (0 until nrow step size) - return this.chunkedImpl(startIndices, name) -} +public fun DataFrame.chunked(size: Int, name: String = "groups"): FrameColumn = + chunked( + startIndices = 0 until nrow step size, + name = name, + ) + +public fun DataFrame.chunked(startIndices: Iterable, name: String = "groups"): FrameColumn = + chunkedImpl(startIndices, name) public fun DataColumn.chunked(size: Int): ValueColumn> { val values = toList().chunked(size) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethods.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethods.kt index 6ecb10a3cc..cc0a791828 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethods.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethods.kt @@ -23,7 +23,6 @@ private const val CAST = "cast" private const val VERIFY = "verify" // cast(true) is obscure, i think it's better to use named argument here private const val READ_CSV = "readCSV" private const val READ_TSV = "readTSV" -private const val READ_JSON = "readJson" private const val READ_JDBC = "readJdbc" public abstract class AbstractDefaultReadMethod( @@ -82,13 +81,6 @@ public abstract class AbstractDefaultReadMethod( override val additionalImports: List = listOf("import org.jetbrains.kotlinx.dataframe.io.$methodName") } -internal class DefaultReadJsonMethod(path: String?, arguments: MethodArguments) : - AbstractDefaultReadMethod( - path = path, - arguments = arguments, - methodName = READ_JSON, - ) - internal class DefaultReadCsvMethod(path: String?, arguments: MethodArguments) : AbstractDefaultReadMethod(path, arguments, READ_CSV) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnGroup.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnGroup.kt index bd1fe61347..734f5dd3c5 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnGroup.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnGroup.kt @@ -9,6 +9,8 @@ import org.jetbrains.kotlinx.dataframe.annotations.HasSchema import org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl import org.jetbrains.kotlinx.dataframe.api.asColumnGroup import org.jetbrains.kotlinx.dataframe.api.columnGroup +import org.jetbrains.kotlinx.dataframe.impl.schema.extractSchema +import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema import kotlin.reflect.KProperty /** diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/UnifyingNumbers.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/UnifyingNumbers.kt index dd149f2328..0e5ba62ff4 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/UnifyingNumbers.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/UnifyingNumbers.kt @@ -24,7 +24,7 @@ import org.jetbrains.kotlinx.dataframe.impl.UnifiedNumberTypeOptions * * At the bottom of the graph is [Nothing]. This can be interpreted as `null`. */ -internal interface UnifyingNumbers { +public interface UnifyingNumbers { /** * ``` @@ -47,5 +47,6 @@ internal interface UnifyingNumbers { * Nothing? * ``` */ - interface Graph + @ExcludeFromSources + private interface Graph } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/ColumnDataCollector.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/ColumnDataCollector.kt index 16423a28a5..ae0e60438c 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/ColumnDataCollector.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/ColumnDataCollector.kt @@ -14,7 +14,7 @@ import kotlin.reflect.full.isSubclassOf import kotlin.reflect.full.withNullability import kotlin.reflect.jvm.jvmErasure -internal interface DataCollector { +public interface DataCollector { public val data: List public val hasNulls: Boolean diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt index ce625f0351..902fe6c85d 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt @@ -150,6 +150,11 @@ internal fun catchSilent(body: () -> T): T? = internal fun Iterable>.commonType(nullable: Boolean, upperBound: KType? = null) = commonParents(this).createType(nullable, upperBound) +// helper overload for friend modules +@JvmName("commonTypeOverload") +internal fun commonType(types: Iterable, useStar: Boolean = true) = + types.commonType(useStar) + /** * Returns the common supertype of the given types. * @@ -276,6 +281,10 @@ internal fun DataFrame.splitByIndices(startIndices: Sequence): Seque } } +// helper overload for friend modules +@JvmName("splitByIndicesOverload") +internal fun splitByIndices(list: List, startIndices: Sequence) = list.splitByIndices(startIndices) + internal fun List.splitByIndices(startIndices: Sequence): Sequence> = (startIndices + size).zipWithNext { start, endExclusive -> subList(start, endExclusive) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/image.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/image.kt index 782288718c..0ef54308ac 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/image.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/image.kt @@ -58,3 +58,24 @@ internal fun BufferedImage.toByteArray(format: String = DEFAULT_IMG_FORMAT): Byt ImageIO.write(this, format, bos) bos.toByteArray() } + +// helper overload for friend modules +internal fun resizeKeepingAspectRatio( + image: BufferedImage, + maxSize: Int, + resultImageType: Int = BufferedImage.TYPE_INT_ARGB, + interpolation: Any = RenderingHints.VALUE_INTERPOLATION_NEAREST_NEIGHBOR, + renderingQuality: Any = RenderingHints.VALUE_RENDER_QUALITY, + antialiasing: Any = RenderingHints.VALUE_ANTIALIAS_ON, + observer: ImageObserver? = null, +) = image.resizeKeepingAspectRatio( + maxSize = maxSize, + resultImageType = resultImageType, + interpolation = interpolation, + renderingQuality = renderingQuality, + antialiasing = antialiasing, + observer = observer, +) + +// helper overload for friend modules +internal fun toByteArray(image: BufferedImage, format: String = DEFAULT_IMG_FORMAT) = image.toByteArray(format) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/schema/Utils.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/schema/Utils.kt index e1ffbf8ec9..caabacc179 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/schema/Utils.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/schema/Utils.kt @@ -29,6 +29,9 @@ import kotlin.reflect.typeOf internal fun AnyFrame.extractSchema(): DataFrameSchema = DataFrameSchemaImpl(columns().filter { it.name().isNotEmpty() }.associate { it.name() to it.extractSchema() }) +// helper overload for friend modules +internal fun intersectSchemas(schemas: Iterable): DataFrameSchema = schemas.intersectSchemas() + internal fun Iterable.intersectSchemas(): DataFrameSchema { val collectedTypes = mutableMapOf>() var first = true diff --git a/core/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.SupportedFormat b/core/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.SupportedFormat index 335b91190b..772df1f0cb 100644 --- a/core/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.SupportedFormat +++ b/core/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.SupportedFormat @@ -1,3 +1,2 @@ org.jetbrains.kotlinx.dataframe.io.CSV -org.jetbrains.kotlinx.dataframe.io.JSON org.jetbrains.kotlinx.dataframe.io.TSV diff --git a/dataframe-csv/build.gradle.kts b/dataframe-csv/build.gradle.kts index 77bd88e769..8f810b1250 100644 --- a/dataframe-csv/build.gradle.kts +++ b/dataframe-csv/build.gradle.kts @@ -34,6 +34,9 @@ dependencies { implementation(libs.kotlinLogging) implementation(libs.kotlin.reflect) + // for writing json in csv + implementation(projects.dataframeJson) + testApi(projects.core) testImplementation(libs.kotlinx.benchmark.runtime) testImplementation(libs.junit) diff --git a/dataframe-json/build.gradle.kts b/dataframe-json/build.gradle.kts new file mode 100644 index 0000000000..2dfd4732da --- /dev/null +++ b/dataframe-json/build.gradle.kts @@ -0,0 +1,68 @@ +import org.jetbrains.kotlin.gradle.tasks.KotlinCompile + +plugins { + with(libs.plugins) { + alias(kotlin.jvm) + alias(publisher) + alias(serialization) + alias(kover) + alias(ktlint) + alias(kodex) + alias(buildconfig) + alias(binary.compatibility.validator) + + // generates keywords using the :generator module +// alias(keywordGenerator) + + // dependence on our own plugin +// alias(dataframe) + + // only mandatory if `kotlin.dataframe.add.ksp=false` in gradle.properties +// alias(ksp) + } +} + +group = "org.jetbrains.kotlinx" +version = "1.0.0-dev" + +repositories { + mavenLocal() + mavenCentral() +} + +dependencies { + api(projects.core) + implementation(libs.kotlin.stdlib) + implementation(libs.serialization.core) + implementation(libs.serialization.json) + implementation(libs.sl4j) + + testImplementation(libs.junit) + testImplementation(libs.kotestAssertions) { + exclude("org.jetbrains.kotlin", "kotlin-stdlib-jdk8") + } + testImplementation(libs.kotlin.scriptingJvm) + testImplementation(libs.jsoup) + testImplementation(libs.sl4jsimple) +} + +kotlin { + explicitApi() +} + +tasks.withType { + friendPaths.from(project(projects.core.path).projectDir) +} + +tasks.test { + useJUnitPlatform() +} + +kotlinPublications { + publication { + publicationName = "dataframeJson" + artifactId = project.name + description = "Kotlin DataFrame JSON integration." + packageName = artifactId + } +} diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/BytesUtils.kt b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/BytesUtils.kt similarity index 100% rename from core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/BytesUtils.kt rename to dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/BytesUtils.kt diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/compression.kt b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/compression.kt similarity index 100% rename from core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/compression.kt rename to dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/compression.kt diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt similarity index 89% rename from core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt rename to dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt index e47f176d32..775481c337 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt +++ b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt @@ -23,6 +23,7 @@ import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.api.JsonPath import org.jetbrains.kotlinx.dataframe.api.KeyValueProperty import org.jetbrains.kotlinx.dataframe.api.cast +import org.jetbrains.kotlinx.dataframe.api.chunked import org.jetbrains.kotlinx.dataframe.api.columnOf import org.jetbrains.kotlinx.dataframe.api.concat import org.jetbrains.kotlinx.dataframe.api.dataFrameOf @@ -38,25 +39,16 @@ import org.jetbrains.kotlinx.dataframe.columns.FrameColumn import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion import org.jetbrains.kotlinx.dataframe.documentation.UnifyingNumbers import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator -import org.jetbrains.kotlinx.dataframe.impl.DataCollectorBase -import org.jetbrains.kotlinx.dataframe.impl.api.chunkedImpl +import org.jetbrains.kotlinx.dataframe.impl.DataCollector import org.jetbrains.kotlinx.dataframe.impl.asList -import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType -import org.jetbrains.kotlinx.dataframe.impl.commonType -import org.jetbrains.kotlinx.dataframe.impl.createDataCollector -import org.jetbrains.kotlinx.dataframe.impl.guessValueType import org.jetbrains.kotlinx.dataframe.impl.schema.DataFrameSchemaImpl -import org.jetbrains.kotlinx.dataframe.impl.schema.extractSchema -import org.jetbrains.kotlinx.dataframe.impl.schema.intersectSchemas -import org.jetbrains.kotlinx.dataframe.impl.splitByIndices import org.jetbrains.kotlinx.dataframe.io.ARRAY_COLUMN_NAME import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ANY_COLUMNS import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS import org.jetbrains.kotlinx.dataframe.io.VALUE_COLUMN_NAME -import org.jetbrains.kotlinx.dataframe.ncol -import org.jetbrains.kotlinx.dataframe.nrow import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema +import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema import org.jetbrains.kotlinx.dataframe.type import org.jetbrains.kotlinx.dataframe.typeClass import org.jetbrains.kotlinx.dataframe.values @@ -179,7 +171,7 @@ internal fun fromJsonListAnyColumns( val columns: List = when { // Create one column of type Any? (or guessed primitive type) from all the records colType == AnyColType.ANY -> { - val collector: DataCollectorBase = + val collector: DataCollector = if (justPrimitives) { createDataCollector(records.size) // guess the type } else { @@ -250,7 +242,7 @@ internal fun fromJsonListAnyColumns( else -> collector.add(v) } } - val column = createColumnGuessingType(VALUE_COLUMN_NAME, collector.values, unifyNumbers = unifyNumbers) + val column = createColumnGuessingType(VALUE_COLUMN_NAME, collector.data, unifyNumbers = unifyNumbers) val res = if (nanIndices.isNotEmpty()) { fun DataColumn.updateNaNs(nanValue: C): DataColumn { var j = 0 @@ -313,7 +305,7 @@ internal fun fromJsonListAnyColumns( else -> parsed.unwrapUnnamedColumns() - .chunkedImpl( + .chunked( startIndices = startIndices, name = ARRAY_COLUMN_NAME, // will be erased ) @@ -367,7 +359,7 @@ internal fun fromJsonListAnyColumns( val valueColumnSchema = when { // in these cases we can safely combine the columns to get a single column schema valueColumns.all { it is ColumnGroup<*> } || valueColumns.all { it is FrameColumn<*> } -> - valueColumns.concat().extractSchema() + valueColumns.concat().toDataFrame().schema().columns.values.single() // to avoid listification, we create the value columns schema ourselves (https://github.com/Kotlin/dataframe/issues/184) else -> ColumnSchema.Value(valueTypes.commonType()) @@ -411,7 +403,7 @@ internal fun fromJsonListAnyColumns( jsonPath = jsonPath.append(colName), ) when { - parsed.ncol == 0 -> + parsed.columnsCount() == 0 -> DataColumn.createValueColumn( name = colName, values = arrayOfNulls(values.size).toList(), @@ -442,7 +434,7 @@ internal fun fromJsonListAnyColumns( } } -private fun AnyFrame.isSingleUnnamedColumn() = ncol == 1 && getColumn(0) is UnnamedColumn +private fun AnyFrame.isSingleUnnamedColumn() = columnsCount() == 1 && getColumn(0) is UnnamedColumn /** * Json to DataFrame converter that creates allows creates `value` and `array` accessors @@ -556,7 +548,7 @@ internal fun fromJsonListArrayAndValueColumns( name = VALUE_COLUMN_NAME, // will be erased unless at top-level groups = dataFrames, schema = lazy { - dataFrames.mapNotNull { it.takeIf { it.nrow > 0 }?.schema() }.intersectSchemas() + dataFrames.mapNotNull { it.takeIf { it.rowsCount() > 0 }?.schema() }.intersectSchemas() }, ), ), @@ -569,7 +561,7 @@ internal fun fromJsonListArrayAndValueColumns( when { // Collect primitive values from records into the `value` column if needed colName == valueColumn && (hasPrimitive || records.isEmpty()) -> { - val collector = createDataCollector(records.size) + val collector: DataCollector = createDataCollector(records.size) val nanIndices = mutableListOf() records.forEachIndexed { i, v -> when (v) { @@ -605,7 +597,7 @@ internal fun fromJsonListArrayAndValueColumns( else -> collector.add(v) } } - val column = createColumnGuessingType(colName, collector.values, unifyNumbers = unifyNumbers) + val column = createColumnGuessingType(colName, collector.data, unifyNumbers = unifyNumbers) val res = if (nanIndices.isNotEmpty()) { fun DataColumn.updateNaNs(nanValue: C): DataColumn { var j = 0 @@ -663,7 +655,7 @@ internal fun fromJsonListArrayAndValueColumns( ) } - else -> parsed.unwrapUnnamedColumns().chunkedImpl(startIndices, colName) + else -> parsed.unwrapUnnamedColumns().chunked(startIndices, colName) } UnnamedColumn(res) } @@ -685,7 +677,7 @@ internal fun fromJsonListArrayAndValueColumns( jsonPath = jsonPath.append(colName), ) when { - parsed.ncol == 0 -> + parsed.columnsCount() == 0 -> DataColumn.createValueColumn( name = colName, values = arrayOfNulls(values.size).toList(), @@ -722,3 +714,82 @@ internal fun fromJsonListArrayAndValueColumns( // but filtered values [1, { ... }, []] -> [1, null, null] // or arrays: [1, { ...}, []] -> [null, null, []] private class UnnamedColumn(val col: DataColumn) : DataColumn by col + +// region friend module error suppression + +@Suppress("INVISIBLE_REFERENCE") +private fun createDataCollector(initCapacity: Int = 0) = + org.jetbrains.kotlinx.dataframe.impl.createDataCollector(initCapacity) + +@Suppress("INVISIBLE_REFERENCE") +private fun createDataCollector(initCapacity: Int = 0, type: KType) = + org.jetbrains.kotlinx.dataframe.impl.createDataCollector(initCapacity, type) + +@Suppress("INVISIBLE_REFERENCE") +private fun createColumnGuessingType( + name: String, + values: Iterable, + suggestedType: TypeSuggestion = TypeSuggestion.Infer, + defaultValue: T? = null, + nullable: Boolean? = null, + listifyValues: Boolean = false, + allColsMakesColGroup: Boolean = false, + unifyNumbers: Boolean = false, +) = org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType( + name = name, + values = values, + suggestedType = suggestedType, + defaultValue = defaultValue, + nullable = nullable, + listifyValues = listifyValues, + allColsMakesColGroup = allColsMakesColGroup, + unifyNumbers = unifyNumbers, +) + +@Suppress("INVISIBLE_REFERENCE") +private fun createColumnGuessingType( + values: Iterable, + suggestedType: TypeSuggestion = TypeSuggestion.Infer, + defaultValue: T? = null, + nullable: Boolean? = null, + listifyValues: Boolean = false, + allColsMakesColGroup: Boolean = false, + unifyNumbers: Boolean = false, +) = org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType( + values = values, + suggestedType = suggestedType, + defaultValue = defaultValue, + nullable = nullable, + listifyValues = listifyValues, + allColsMakesColGroup = allColsMakesColGroup, + unifyNumbers = unifyNumbers, +) + +@Suppress("INVISIBLE_REFERENCE") +private fun guessValueType( + values: Sequence, + upperBound: KType? = null, + listifyValues: Boolean = false, + allColsMakesRow: Boolean = false, + unifyNumbers: Boolean = false, +) = org.jetbrains.kotlinx.dataframe.impl.guessValueType( + values = values, + upperBound = upperBound, + listifyValues = listifyValues, + allColsMakesRow = allColsMakesRow, + unifyNumbers = unifyNumbers, +) + +@Suppress("INVISIBLE_REFERENCE") +private fun List.splitByIndices(startIndices: Sequence) = + org.jetbrains.kotlinx.dataframe.impl.splitByIndices(list = this, startIndices = startIndices) + +@Suppress("INVISIBLE_REFERENCE") +private fun Iterable.commonType(useStar: Boolean = true) = + org.jetbrains.kotlinx.dataframe.impl.commonType(types = this, useStar) + +@Suppress("INVISIBLE_REFERENCE") +private fun Iterable.intersectSchemas() = + org.jetbrains.kotlinx.dataframe.impl.schema.intersectSchemas(schemas = this) + +// endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt similarity index 90% rename from core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt rename to dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt index d7c069214f..833468e78c 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt +++ b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt @@ -24,7 +24,6 @@ import org.jetbrains.kotlinx.dataframe.api.isList import org.jetbrains.kotlinx.dataframe.api.rows import org.jetbrains.kotlinx.dataframe.api.schema import org.jetbrains.kotlinx.dataframe.api.take -import org.jetbrains.kotlinx.dataframe.columns.CellKind import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.ColumnKind import org.jetbrains.kotlinx.dataframe.columns.FrameColumn @@ -43,13 +42,12 @@ import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions import org.jetbrains.kotlinx.dataframe.io.CustomEncoder import org.jetbrains.kotlinx.dataframe.io.VALUE_COLUMN_NAME import org.jetbrains.kotlinx.dataframe.jupyter.KotlinNotebookPluginUtils -import org.jetbrains.kotlinx.dataframe.jupyter.KotlinNotebookPluginUtils.isDataframeConvertable import org.jetbrains.kotlinx.dataframe.name -import org.jetbrains.kotlinx.dataframe.ncol -import org.jetbrains.kotlinx.dataframe.nrow import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema import org.jetbrains.kotlinx.dataframe.typeClass +import java.awt.RenderingHints import java.awt.image.BufferedImage +import java.awt.image.ImageObserver import java.io.IOException // See docs/serialization_format.md for a description of @@ -155,8 +153,8 @@ internal fun encodeRowWithMetadata( }, ) } - put(NCOL, JsonPrimitive(col[index].ncol)) - put(NROW, JsonPrimitive(col[index].nrow)) + put(NCOL, JsonPrimitive(col[index].columnsCount())) + put(NROW, JsonPrimitive(col[index].rowsCount())) } } } @@ -415,3 +413,40 @@ internal fun encodeFrameNoDynamicNestedTables(df: AnyFrame, limit: Int): JsonObj encodeFrame(df.take(limit)), ) } + +// region friend module error suppression + +@Suppress("INVISIBLE_REFERENCE") +private object CellKind { + val DataFrameConvertable = org.jetbrains.kotlinx.dataframe.columns.CellKind.DataFrameConvertable +} + +@Suppress("INVISIBLE_REFERENCE") +private fun isDataframeConvertable(dataframeLike: Any?) = + KotlinNotebookPluginUtils.isDataframeConvertable(dataframeLike = dataframeLike) + +@Suppress("INVISIBLE_REFERENCE") +private fun BufferedImage.resizeKeepingAspectRatio( + maxSize: Int, + resultImageType: Int = BufferedImage.TYPE_INT_ARGB, + interpolation: Any = RenderingHints.VALUE_INTERPOLATION_NEAREST_NEIGHBOR, + renderingQuality: Any = RenderingHints.VALUE_RENDER_QUALITY, + antialiasing: Any = RenderingHints.VALUE_ANTIALIAS_ON, + observer: ImageObserver? = null, +) = org.jetbrains.kotlinx.dataframe.impl.io.resizeKeepingAspectRatio( + image = this, + maxSize = maxSize, + resultImageType = resultImageType, + interpolation = interpolation, + renderingQuality = renderingQuality, + antialiasing = antialiasing, + observer = observer, +) + +private const val DEFAULT_IMG_FORMAT: String = "png" + +@Suppress("INVISIBLE_REFERENCE") +private fun BufferedImage.toByteArray(format: String = DEFAULT_IMG_FORMAT) = + org.jetbrains.kotlinx.dataframe.impl.io.toByteArray(image = this, format = format) + +// endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt similarity index 83% rename from core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt rename to dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt index 4e3ade50bc..cddc23ea7b 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -12,8 +12,8 @@ import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.api.JsonPath import org.jetbrains.kotlinx.dataframe.api.KeyValueProperty import org.jetbrains.kotlinx.dataframe.api.single +import org.jetbrains.kotlinx.dataframe.codeGen.AbstractDefaultReadMethod import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod -import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadJsonMethod import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.FrameColumn import org.jetbrains.kotlinx.dataframe.documentation.UnifyingNumbers @@ -24,7 +24,6 @@ import org.jetbrains.kotlinx.dataframe.impl.io.readJsonImpl import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ANY_COLUMNS import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS -import org.jetbrains.kotlinx.dataframe.util.READ_JSON import java.io.File import java.io.InputStream import java.net.URL @@ -36,12 +35,6 @@ public class JSON( private val unifyNumbers: Boolean = true, ) : SupportedDataFrameFormat { - @Deprecated(READ_JSON, level = DeprecationLevel.HIDDEN) - public constructor( - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, - keyValuePaths: List = emptyList(), - ) : this(typeClashTactic, keyValuePaths, true) - override fun readDataFrame(stream: InputStream, header: List): AnyFrame = DataFrame.readJson( stream = stream, @@ -424,78 +417,11 @@ public fun AnyRow.writeJson(writer: Appendable, prettyPrint: Boolean = false) { writer.append(toJson(prettyPrint)) } -// region deprecations - -/** Here for binary compatibility. */ -@Deprecated(message = READ_JSON, level = DeprecationLevel.HIDDEN) -public fun DataFrame.Companion.readJson( - file: File, - header: List = emptyList(), - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyFrame = DataFrame.readJson(file, header, keyValuePaths, typeClashTactic, true) - -/** Here for binary compatibility. */ -@Deprecated(message = READ_JSON, level = DeprecationLevel.HIDDEN) -public fun DataRow.Companion.readJson( - file: File, - header: List = emptyList(), - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyRow = DataRow.readJson(file, header, keyValuePaths, typeClashTactic, true) - -/** Here for binary compatibility. */ -@Deprecated(message = READ_JSON, level = DeprecationLevel.HIDDEN) -public fun DataFrame.Companion.readJson( - stream: InputStream, - header: List = emptyList(), - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyFrame = DataFrame.readJson(stream, header, keyValuePaths, typeClashTactic, true) - -/** Here for binary compatibility. */ -@Deprecated(message = READ_JSON, level = DeprecationLevel.HIDDEN) -public fun DataRow.Companion.readJson( - stream: InputStream, - header: List = emptyList(), - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyRow = DataRow.readJson(stream, header, keyValuePaths, typeClashTactic, true) - -/** Here for binary compatibility. */ -@Deprecated(message = READ_JSON, level = DeprecationLevel.HIDDEN) -public fun DataFrame.Companion.readJson( - url: URL, - header: List = emptyList(), - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyFrame = DataFrame.readJson(url, header, keyValuePaths, typeClashTactic, true) +private const val READ_JSON = "readJson" -/** Here for binary compatibility. */ -@Deprecated(message = READ_JSON, level = DeprecationLevel.HIDDEN) -public fun DataRow.Companion.readJson( - url: URL, - header: List = emptyList(), - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyRow = DataRow.readJson(url, header, keyValuePaths, typeClashTactic, true) - -/** Here for binary compatibility. */ -@Deprecated(message = READ_JSON, level = DeprecationLevel.HIDDEN) -public fun DataFrame.Companion.readJsonStr( - @Language("json") text: String, - header: List = emptyList(), - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyFrame = DataFrame.readJsonStr(text, header, keyValuePaths, typeClashTactic, true) - -/** Here for binary compatibility. */ -@Deprecated(message = READ_JSON, level = DeprecationLevel.HIDDEN) -public fun DataRow.Companion.readJsonStr( - @Language("json") text: String, - header: List = emptyList(), - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyRow = DataRow.readJsonStr(text, header, keyValuePaths, typeClashTactic, true) - -// endregion +internal class DefaultReadJsonMethod(path: String?, arguments: MethodArguments) : + AbstractDefaultReadMethod( + path = path, + arguments = arguments, + methodName = READ_JSON, + ) diff --git a/dataframe-json/src/main/resources/services/org.jetbrains.kotlinx.dataframe.io.SupportedFormat b/dataframe-json/src/main/resources/services/org.jetbrains.kotlinx.dataframe.io.SupportedFormat new file mode 100644 index 0000000000..d1410d82fa --- /dev/null +++ b/dataframe-json/src/main/resources/services/org.jetbrains.kotlinx.dataframe.io.SupportedFormat @@ -0,0 +1 @@ +org.jetbrains.kotlinx.dataframe.io.JSON diff --git a/dataframe-jupyter/build.gradle.kts b/dataframe-jupyter/build.gradle.kts index 1246112bbb..b8146de22e 100644 --- a/dataframe-jupyter/build.gradle.kts +++ b/dataframe-jupyter/build.gradle.kts @@ -4,6 +4,8 @@ plugins { with(libs.plugins) { alias(kotlin.jvm) alias(publisher) + alias(kover) + alias(ktlint) alias(jupyter.api) alias(buildconfig) alias(binary.compatibility.validator) diff --git a/settings.gradle.kts b/settings.gradle.kts index 06b3591a3f..2de92285b6 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -12,6 +12,7 @@ include("plugins:symbol-processor") include("plugins:expressions-converter") include("plugins:kotlin-dataframe") include("tests") +include("dataframe-json") include("dataframe-arrow") include("dataframe-openapi") include("dataframe-excel") @@ -49,3 +50,4 @@ include("dataframe-openapi-generator") include("dataframe-geo") include("plugins:public-api-modifier") include("dataframe-compiler-plugin-core") + From 341b7c85a879164c2f63c7d204fafb38c8050474 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Wed, 23 Apr 2025 00:06:27 +0200 Subject: [PATCH 02/15] fully decoupled json module, made it optional for csv and excel using reflection --- .../dataframe/annotations/ImportDataSchema.kt | 2 +- .../kotlinx/dataframe/impl/api/parse.kt | 42 +++++++++---------- .../kotlinx/dataframe/impl/io/image.kt | 2 + .../kotlinx/dataframe/impl/schema/Utils.kt | 1 + .../jetbrains/kotlinx/dataframe/io/common.kt | 6 +-- .../org/jetbrains/kotlinx/dataframe/io/csv.kt | 14 +++++-- dataframe-csv/build.gradle.kts | 4 +- .../kotlinx/dataframe/impl/io/jsonHelper.kt | 28 +++++++++++++ .../kotlinx/dataframe/impl/io/writeDelim.kt | 1 - .../kotlinx/dataframe/io/DelimCsvTsvTests.kt | 16 +++++++ dataframe-excel/build.gradle.kts | 1 + .../kotlinx/dataframe/io/jsonHelper.kt | 28 +++++++++++++ dataframe-json/build.gradle.kts | 13 +----- dataframe-jupyter/build.gradle.kts | 2 + .../kotlinx/dataframe/jupyter/Integration.kt | 1 + .../dataframe-gradle-plugin/build.gradle.kts | 1 + plugins/symbol-processor/build.gradle.kts | 1 + .../dataframe/ksp/DataSchemaGenerator.kt | 2 +- 18 files changed, 121 insertions(+), 44 deletions(-) create mode 100644 dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/jsonHelper.kt create mode 100644 dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/jsonHelper.kt diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt index 107e5a3b6d..3f7e0d576c 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt @@ -74,7 +74,7 @@ public annotation class JdbcOptions( public annotation class JsonOptions( /** * Allows the choice of how to handle type clashes when reading a JSON file. - * Must be either [TypeClashTactics.ARRAY_AND_VALUE_COLUMNS] or [TypeClashTactics.ANY_COLUMNS] + * Must be either [JsonOptions.TypeClashTactics.ARRAY_AND_VALUE_COLUMNS] or [JsonOptions.TypeClashTactics.ANY_COLUMNS] * */ public val typeClashTactic: String = TypeClashTactics.ARRAY_AND_VALUE_COLUMNS, /** diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt index d2da7201f7..54cca95277 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt @@ -10,11 +10,9 @@ import kotlinx.datetime.toKotlinLocalDate import kotlinx.datetime.toKotlinLocalDateTime import kotlinx.datetime.toKotlinLocalTime import org.jetbrains.kotlinx.dataframe.AnyFrame -import org.jetbrains.kotlinx.dataframe.AnyRow import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.api.GlobalParserOptions import org.jetbrains.kotlinx.dataframe.api.ParserOptions import org.jetbrains.kotlinx.dataframe.api.asColumnGroup @@ -31,13 +29,14 @@ import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion import org.jetbrains.kotlinx.dataframe.columns.size import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException import org.jetbrains.kotlinx.dataframe.hasNulls +import org.jetbrains.kotlinx.dataframe.impl.api.Parsers.resetToDefault +import org.jetbrains.kotlinx.dataframe.impl.api.Parsers.stringParser import org.jetbrains.kotlinx.dataframe.impl.canParse import org.jetbrains.kotlinx.dataframe.impl.catchSilent import org.jetbrains.kotlinx.dataframe.impl.createStarProjectedType import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser import org.jetbrains.kotlinx.dataframe.impl.javaDurationCanParse import org.jetbrains.kotlinx.dataframe.io.isUrl -import org.jetbrains.kotlinx.dataframe.io.readJsonStr import org.jetbrains.kotlinx.dataframe.values import java.math.BigDecimal import java.math.BigInteger @@ -404,24 +403,25 @@ internal object Parsers : GlobalParserOptions { stringParser { it.toBigIntegerOrNull() }, // BigDecimal stringParser { it.toBigDecimalOrNull() }, - // JSON array as DataFrame<*> - stringParser(catch = true) { - val trimmed = it.trim() - if (trimmed.startsWith("[") && trimmed.endsWith("]")) { - DataFrame.readJsonStr(it) - } else { - null - } - }, - // JSON object as DataRow<*> - stringParser(catch = true) { - val trimmed = it.trim() - if (trimmed.startsWith("{") && trimmed.endsWith("}")) { - DataRow.readJsonStr(it) - } else { - null - } - }, + + // JSON array as DataFrame<*> TODO +// stringParser(catch = true) { +// val trimmed = it.trim() +// if (trimmed.startsWith("[") && trimmed.endsWith("]")) { +// DataFrame.readJsonStr(it) +// } else { +// null +// } +// }, + // JSON object as DataRow<*> TODO +// stringParser(catch = true) { +// val trimmed = it.trim() +// if (trimmed.startsWith("{") && trimmed.endsWith("}")) { +// DataRow.readJsonStr(it) +// } else { +// null +// } +// }, // Char stringParser { it.singleOrNull() }, // No parser found, return as String diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/image.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/image.kt index 0ef54308ac..9de00d56a2 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/image.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/image.kt @@ -60,6 +60,7 @@ internal fun BufferedImage.toByteArray(format: String = DEFAULT_IMG_FORMAT): Byt } // helper overload for friend modules +@JvmName("resizeKeepingAspectRatioOverload") internal fun resizeKeepingAspectRatio( image: BufferedImage, maxSize: Int, @@ -78,4 +79,5 @@ internal fun resizeKeepingAspectRatio( ) // helper overload for friend modules +@JvmName("toByteArrayOverload") internal fun toByteArray(image: BufferedImage, format: String = DEFAULT_IMG_FORMAT) = image.toByteArray(format) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/schema/Utils.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/schema/Utils.kt index caabacc179..c0cdc57c7e 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/schema/Utils.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/schema/Utils.kt @@ -30,6 +30,7 @@ internal fun AnyFrame.extractSchema(): DataFrameSchema = DataFrameSchemaImpl(columns().filter { it.name().isNotEmpty() }.associate { it.name() to it.extractSchema() }) // helper overload for friend modules +@JvmName("intersectSchemasOverload") internal fun intersectSchemas(schemas: Iterable): DataFrameSchema = schemas.intersectSchemas() internal fun Iterable.intersectSchemas(): DataFrameSchema { diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt index 672a869ab4..f5021368c2 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt @@ -17,7 +17,7 @@ import java.net.URL * Opens a stream to [url] to create a [DataFrame] from it. * If the URL is a file URL, the file is read directly. * If the URL is an HTTP URL, it's also read directly, but if the server returns an error code, - * the error response is read as JSON and parsed as [DataFrame] too. + * the error response is read and parsed as [DataFrame] too. * * Public so it may be used in other modules. */ @@ -32,8 +32,8 @@ public fun catchHttpResponse(url: URL, body: (InputStream) -> AnyFrame): AnyFram if (code != 200) { val response = connection.responseMessage try { - // attempt to read error response as JSON - return DataFrame.readJson(connection.errorStream) + // attempt to read error response as dataframe + return DataFrame.read(connection.errorStream).df } catch (_: Exception) { throw RuntimeException("Server returned HTTP response code: $code. Response: $response") } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt index 2e3fa55f80..fc370459af 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt @@ -430,9 +430,17 @@ public fun AnyFrame.writeCSV(writer: Appendable, format: CSVFormat = CSVFormat.D } forEach { val values = it.values.map { - when (it) { - is AnyRow -> it.toJson() - is AnyFrame -> it.toJson() + when (it) { // todo use compileOnly? + is AnyRow -> + error( + "Encountered a DataRow when writing CSV. This needs to be converted to JSON, which is not supported by `writeCSV` anymore. Please use `df.writeCsv()` instead.", + ) + + is AnyFrame -> + error( + "Encountered a DataFrame when writing CSV. This needs to be converted to JSON, which is not supported by `writeCSV` anymore. Please use `df.writeCsv()` instead.", + ) + else -> it } } diff --git a/dataframe-csv/build.gradle.kts b/dataframe-csv/build.gradle.kts index 8f810b1250..d557c69a99 100644 --- a/dataframe-csv/build.gradle.kts +++ b/dataframe-csv/build.gradle.kts @@ -34,13 +34,11 @@ dependencies { implementation(libs.kotlinLogging) implementation(libs.kotlin.reflect) - // for writing json in csv - implementation(projects.dataframeJson) - testApi(projects.core) testImplementation(libs.kotlinx.benchmark.runtime) testImplementation(libs.junit) testImplementation(libs.sl4jsimple) + testImplementation(projects.dataframeJson) testImplementation(libs.kotestAssertions) { exclude("org.jetbrains.kotlin", "kotlin-stdlib-jdk8") } diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/jsonHelper.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/jsonHelper.kt new file mode 100644 index 0000000000..73e390817f --- /dev/null +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/jsonHelper.kt @@ -0,0 +1,28 @@ +package org.jetbrains.kotlinx.dataframe.impl.io + +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.AnyRow + +internal fun AnyFrame.toJson(prettyPrint: Boolean = false): String { + val jsonClass = try { + Class.forName("org.jetbrains.kotlinx.dataframe.io.JsonKt") + } catch (_: ClassNotFoundException) { + error( + "Encountered a DataFrame when writing to csv/tsv/delim. This needs to be converted to JSON, so the dataframe-json dependency is required.", + ) + } + return jsonClass.getMethod("toJson", AnyFrame::class.java, Boolean::class.java) + .invoke(null, this, prettyPrint) as String +} + +internal fun AnyRow.toJson(prettyPrint: Boolean = false): String { + val jsonClass = try { + Class.forName("org.jetbrains.kotlinx.dataframe.io.JsonKt") + } catch (_: ClassNotFoundException) { + error( + "Encountered a DataRow when writing to csv/tsv/delim. This needs to be converted to JSON, so the dataframe-json dependency is required.", + ) + } + return jsonClass.getMethod("toJson", AnyRow::class.java, Boolean::class.java) + .invoke(null, this, prettyPrint) as String +} diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeDelim.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeDelim.kt index 6a7eb9b368..79d007365e 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeDelim.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeDelim.kt @@ -12,7 +12,6 @@ import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.CSV_DELIMITER import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.WRITER_WRITE import org.jetbrains.kotlinx.dataframe.io.AdjustCSVFormat import org.jetbrains.kotlinx.dataframe.io.QuoteMode -import org.jetbrains.kotlinx.dataframe.io.toJson import org.apache.commons.csv.QuoteMode as ApacheQuoteMode /** diff --git a/dataframe-csv/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/DelimCsvTsvTests.kt b/dataframe-csv/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/DelimCsvTsvTests.kt index f303e5028c..153e26553f 100644 --- a/dataframe-csv/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/DelimCsvTsvTests.kt +++ b/dataframe-csv/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/DelimCsvTsvTests.kt @@ -796,6 +796,22 @@ class DelimCsvTsvTests { } } + @Test + fun `json dependency test`() { + val df = dataFrameOf("firstName", "lastName")( + "John", "Doe", + "Jane", "Doe", + ).group { "firstName" and "lastName" }.into { "name" } + + df.toCsvStr(quote = '\'') shouldBe + """ + name + '{"firstName":"John","lastName":"Doe"}' + '{"firstName":"Jane","lastName":"Doe"}' + + """.trimIndent() + } + companion object { private val irisDataset = testCsv("irisDataset") private val simpleCsv = testCsv("testCSV") diff --git a/dataframe-excel/build.gradle.kts b/dataframe-excel/build.gradle.kts index e995d592b4..f3f74f6702 100644 --- a/dataframe-excel/build.gradle.kts +++ b/dataframe-excel/build.gradle.kts @@ -22,6 +22,7 @@ dependencies { implementation(libs.kotlin.datetimeJvm) testImplementation(libs.junit) + testImplementation(projects.dataframeJson) testImplementation(libs.kotestAssertions) { exclude("org.jetbrains.kotlin", "kotlin-stdlib-jdk8") } diff --git a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/jsonHelper.kt b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/jsonHelper.kt new file mode 100644 index 0000000000..96c0cd9528 --- /dev/null +++ b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/jsonHelper.kt @@ -0,0 +1,28 @@ +package org.jetbrains.kotlinx.dataframe.io + +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.AnyRow + +internal fun AnyFrame.toJson(prettyPrint: Boolean = false): String { + val jsonClass = try { + Class.forName("org.jetbrains.kotlinx.dataframe.io.JsonKt") + } catch (_: ClassNotFoundException) { + error( + "Encountered a DataFrame when writing to an Excel cell. This needs to be converted to JSON, so the dataframe-json dependency is required.", + ) + } + return jsonClass.getMethod("toJson", AnyFrame::class.java, Boolean::class.java) + .invoke(null, this, prettyPrint) as String +} + +internal fun AnyRow.toJson(prettyPrint: Boolean = false): String { + val jsonClass = try { + Class.forName("org.jetbrains.kotlinx.dataframe.io.JsonKt") + } catch (_: ClassNotFoundException) { + error( + "Encountered a DataRow when writing to an Excel cell. This needs to be converted to JSON, so the dataframe-json dependency is required.", + ) + } + return jsonClass.getMethod("toJson", AnyRow::class.java, Boolean::class.java) + .invoke(null, this, prettyPrint) as String +} diff --git a/dataframe-json/build.gradle.kts b/dataframe-json/build.gradle.kts index 2dfd4732da..40f1822cf1 100644 --- a/dataframe-json/build.gradle.kts +++ b/dataframe-json/build.gradle.kts @@ -10,20 +10,10 @@ plugins { alias(kodex) alias(buildconfig) alias(binary.compatibility.validator) - - // generates keywords using the :generator module -// alias(keywordGenerator) - - // dependence on our own plugin -// alias(dataframe) - - // only mandatory if `kotlin.dataframe.add.ksp=false` in gradle.properties -// alias(ksp) } } group = "org.jetbrains.kotlinx" -version = "1.0.0-dev" repositories { mavenLocal() @@ -32,6 +22,7 @@ repositories { dependencies { api(projects.core) + implementation(libs.kotlin.stdlib) implementation(libs.serialization.core) implementation(libs.serialization.json) @@ -62,7 +53,7 @@ kotlinPublications { publication { publicationName = "dataframeJson" artifactId = project.name - description = "Kotlin DataFrame JSON integration." + description = "Kotlin DataFrame JSON integration" packageName = artifactId } } diff --git a/dataframe-jupyter/build.gradle.kts b/dataframe-jupyter/build.gradle.kts index b8146de22e..428e8763b4 100644 --- a/dataframe-jupyter/build.gradle.kts +++ b/dataframe-jupyter/build.gradle.kts @@ -20,6 +20,7 @@ repositories { dependencies { compileOnly(projects.core) + compileOnly(projects.dataframeJson) testImplementation(libs.junit) testImplementation(libs.serialization.json) @@ -28,6 +29,7 @@ dependencies { testImplementation(projects.dataframeCsv) testImplementation(projects.dataframeExcel) testImplementation(projects.dataframeJdbc) + testImplementation(projects.dataframeJson) // experimental testImplementation(projects.dataframeOpenapiGenerator) testImplementation(projects.dataframeOpenapi) diff --git a/dataframe-jupyter/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt b/dataframe-jupyter/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt index 85295f2c4a..48b92166e1 100644 --- a/dataframe-jupyter/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt +++ b/dataframe-jupyter/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt @@ -172,6 +172,7 @@ internal class Integration(private val notebook: Notebook, private val options: } dependencies( "org.jetbrains.kotlinx:dataframe-core:$version", + "org.jetbrains.kotlinx:dataframe-json:$version", "org.jetbrains.kotlinx:dataframe-csv:$version", "org.jetbrains.kotlinx:dataframe-excel:$version", "org.jetbrains.kotlinx:dataframe-jdbc:$version", diff --git a/plugins/dataframe-gradle-plugin/build.gradle.kts b/plugins/dataframe-gradle-plugin/build.gradle.kts index d03345773a..d87e3ffae9 100644 --- a/plugins/dataframe-gradle-plugin/build.gradle.kts +++ b/plugins/dataframe-gradle-plugin/build.gradle.kts @@ -21,6 +21,7 @@ dependencies { implementation(projects.dataframeExcel) implementation(projects.dataframeCsv) implementation(projects.dataframeJdbc) + implementation(projects.dataframeJson) // experimental implementation(projects.dataframeOpenapiGenerator) diff --git a/plugins/symbol-processor/build.gradle.kts b/plugins/symbol-processor/build.gradle.kts index b07e0f3400..db2a3f5d17 100644 --- a/plugins/symbol-processor/build.gradle.kts +++ b/plugins/symbol-processor/build.gradle.kts @@ -24,6 +24,7 @@ dependencies { implementation(projects.dataframeExcel) implementation(projects.dataframeCsv) implementation(projects.dataframeJdbc) + implementation(projects.dataframeJson) // experimental implementation(projects.dataframeOpenapiGenerator) diff --git a/plugins/symbol-processor/src/main/kotlin/org/jetbrains/dataframe/ksp/DataSchemaGenerator.kt b/plugins/symbol-processor/src/main/kotlin/org/jetbrains/dataframe/ksp/DataSchemaGenerator.kt index 6290183d66..877b553903 100644 --- a/plugins/symbol-processor/src/main/kotlin/org/jetbrains/dataframe/ksp/DataSchemaGenerator.kt +++ b/plugins/symbol-processor/src/main/kotlin/org/jetbrains/dataframe/ksp/DataSchemaGenerator.kt @@ -160,7 +160,7 @@ class DataSchemaGenerator( val formats = listOfNotNull( CsvDeephaven(delimiter = importStatement.csvOptions.delimiter), JSON( - typeClashTactic = importStatement.jsonOptions.typeClashTactic, + typeClashTactic = JSON.TypeClashTactic.valueOf(importStatement.jsonOptions.typeClashTactic), keyValuePaths = importStatement.jsonOptions.keyValuePaths.map(::JsonPath), ), Excel(), From 0f155ae1ff212117076b989ef9dda8ec371300c7 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 24 Apr 2025 16:54:28 +0200 Subject: [PATCH 03/15] added dataframe-json as excludable dependency to dataframe-csv and dataframe-excel --- dataframe-csv/build.gradle.kts | 4 +++ .../kotlinx/dataframe/impl/io/jsonHelper.kt | 28 ------------------- .../kotlinx/dataframe/impl/io/writeDelim.kt | 19 +++++++++++-- dataframe-excel/build.gradle.kts | 5 ++++ .../kotlinx/dataframe/io/jsonHelper.kt | 28 ------------------- .../jetbrains/kotlinx/dataframe/io/xlsx.kt | 20 +++++++++++-- 6 files changed, 44 insertions(+), 60 deletions(-) delete mode 100644 dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/jsonHelper.kt delete mode 100644 dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/jsonHelper.kt diff --git a/dataframe-csv/build.gradle.kts b/dataframe-csv/build.gradle.kts index d557c69a99..449f9de757 100644 --- a/dataframe-csv/build.gradle.kts +++ b/dataframe-csv/build.gradle.kts @@ -25,6 +25,10 @@ repositories { dependencies { api(projects.core) + // for reading/writing JSON <-> DataFrame/DataRow in CSV/TSV/Delim + // can safely be excluded when working without JSON and only writing flat dataframes + api(projects.dataframeJson) + // for csv reading api(libs.deephavenCsv) // for csv writing diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/jsonHelper.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/jsonHelper.kt deleted file mode 100644 index 73e390817f..0000000000 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/jsonHelper.kt +++ /dev/null @@ -1,28 +0,0 @@ -package org.jetbrains.kotlinx.dataframe.impl.io - -import org.jetbrains.kotlinx.dataframe.AnyFrame -import org.jetbrains.kotlinx.dataframe.AnyRow - -internal fun AnyFrame.toJson(prettyPrint: Boolean = false): String { - val jsonClass = try { - Class.forName("org.jetbrains.kotlinx.dataframe.io.JsonKt") - } catch (_: ClassNotFoundException) { - error( - "Encountered a DataFrame when writing to csv/tsv/delim. This needs to be converted to JSON, so the dataframe-json dependency is required.", - ) - } - return jsonClass.getMethod("toJson", AnyFrame::class.java, Boolean::class.java) - .invoke(null, this, prettyPrint) as String -} - -internal fun AnyRow.toJson(prettyPrint: Boolean = false): String { - val jsonClass = try { - Class.forName("org.jetbrains.kotlinx.dataframe.io.JsonKt") - } catch (_: ClassNotFoundException) { - error( - "Encountered a DataRow when writing to csv/tsv/delim. This needs to be converted to JSON, so the dataframe-json dependency is required.", - ) - } - return jsonClass.getMethod("toJson", AnyRow::class.java, Boolean::class.java) - .invoke(null, this, prettyPrint) as String -} diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeDelim.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeDelim.kt index 79d007365e..008fb5a19d 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeDelim.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeDelim.kt @@ -12,6 +12,7 @@ import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.CSV_DELIMITER import org.jetbrains.kotlinx.dataframe.documentation.DelimParams.WRITER_WRITE import org.jetbrains.kotlinx.dataframe.io.AdjustCSVFormat import org.jetbrains.kotlinx.dataframe.io.QuoteMode +import org.jetbrains.kotlinx.dataframe.io.toJson import org.apache.commons.csv.QuoteMode as ApacheQuoteMode /** @@ -57,8 +58,22 @@ internal fun writeDelimImpl( df.forEach { val values = it.values().map { when (it) { - is AnyRow -> it.toJson() - is AnyFrame -> it.toJson() + is AnyRow -> try { + it.toJson() + } catch (_: NoClassDefFoundError) { + error( + "Encountered a DataFrame when writing to csv/tsv/delim. This needs to be converted to JSON, so the dataframe-json dependency is required.", + ) + } + + is AnyFrame -> try { + it.toJson() + } catch (_: NoClassDefFoundError) { + error( + "Encountered a DataRow when writing to csv/tsv/delim. This needs to be converted to JSON, so the dataframe-json dependency is required.", + ) + } + else -> it } } diff --git a/dataframe-excel/build.gradle.kts b/dataframe-excel/build.gradle.kts index f3f74f6702..3f22df5909 100644 --- a/dataframe-excel/build.gradle.kts +++ b/dataframe-excel/build.gradle.kts @@ -17,6 +17,11 @@ repositories { dependencies { api(projects.core) api(libs.poi) + + // for writing DataFrame/DataRow -> JSON in Excel cells + // can safely be excluded when writing only flat dataframes + api(projects.dataframeJson) + implementation(libs.poi.ooxml) implementation(libs.kotlin.datetimeJvm) diff --git a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/jsonHelper.kt b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/jsonHelper.kt deleted file mode 100644 index 96c0cd9528..0000000000 --- a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/jsonHelper.kt +++ /dev/null @@ -1,28 +0,0 @@ -package org.jetbrains.kotlinx.dataframe.io - -import org.jetbrains.kotlinx.dataframe.AnyFrame -import org.jetbrains.kotlinx.dataframe.AnyRow - -internal fun AnyFrame.toJson(prettyPrint: Boolean = false): String { - val jsonClass = try { - Class.forName("org.jetbrains.kotlinx.dataframe.io.JsonKt") - } catch (_: ClassNotFoundException) { - error( - "Encountered a DataFrame when writing to an Excel cell. This needs to be converted to JSON, so the dataframe-json dependency is required.", - ) - } - return jsonClass.getMethod("toJson", AnyFrame::class.java, Boolean::class.java) - .invoke(null, this, prettyPrint) as String -} - -internal fun AnyRow.toJson(prettyPrint: Boolean = false): String { - val jsonClass = try { - Class.forName("org.jetbrains.kotlinx.dataframe.io.JsonKt") - } catch (_: ClassNotFoundException) { - error( - "Encountered a DataRow when writing to an Excel cell. This needs to be converted to JSON, so the dataframe-json dependency is required.", - ) - } - return jsonClass.getMethod("toJson", AnyRow::class.java, Boolean::class.java) - .invoke(null, this, prettyPrint) as String -} diff --git a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt index 95104f5ad0..4cc061cdce 100644 --- a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt +++ b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt @@ -668,9 +668,25 @@ public fun DataFrame.writeExcel( private fun Cell.setCellValueByGuessedType(any: Any) = when (any) { - is AnyRow -> this.setCellValue(any.toJson()) + is AnyRow -> this.setCellValue( + try { + any.toJson() + } catch (_: NoClassDefFoundError) { + error( + "Encountered a DataRow when writing to an Excel cell. This needs to be converted to JSON, so the dataframe-json dependency is required.", + ) + }, + ) - is AnyFrame -> this.setCellValue(any.toJson()) + is AnyFrame -> this.setCellValue( + try { + any.toJson() + } catch (_: NoClassDefFoundError) { + error( + "Encountered a DataFrame when writing to an Excel cell. This needs to be converted to JSON, so the dataframe-json dependency is required.", + ) + }, + ) is Number -> this.setCellValue(any.toDouble()) From fe617a4f98fbe486977033052789c2a6edbe87f3 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Fri, 25 Apr 2025 17:59:28 +0200 Subject: [PATCH 04/15] fixing tests --- core/build.gradle.kts | 3 + .../kotlinx/dataframe/impl/api/parse.kt | 90 +++++++++++++++---- .../org/jetbrains/kotlinx/dataframe/Utils.kt | 2 - dataframe-csv/build.gradle.kts | 1 - .../kotlinx/dataframe/impl/io/writeJson.kt | 2 +- ...rains.kotlinx.dataframe.io.SupportedFormat | 0 .../dataframe/io/ImageSerializationTests.kt | 5 +- .../jetbrains/kotlinx/dataframe/io/json.kt | 31 +++++-- 8 files changed, 103 insertions(+), 31 deletions(-) rename dataframe-json/src/main/resources/{ => META-INF}/services/org.jetbrains.kotlinx.dataframe.io.SupportedFormat (100%) rename {core => dataframe-json}/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt (96%) rename {core => dataframe-json}/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt (97%) diff --git a/core/build.gradle.kts b/core/build.gradle.kts index 0e10747099..76d3a20afc 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -79,6 +79,9 @@ dependencies { testImplementation(libs.kotlin.scriptingJvm) testImplementation(libs.jsoup) testImplementation(libs.sl4jsimple) + testImplementation(projects.dataframeJson) + testImplementation(libs.serialization.core) + testImplementation(libs.serialization.json) // for samples.api testImplementation(projects.dataframeCsv) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt index 54cca95277..8aaa9a3232 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt @@ -1,5 +1,6 @@ package org.jetbrains.kotlinx.dataframe.impl.api +import io.github.oshai.kotlinlogging.KotlinLogging import kotlinx.datetime.Instant import kotlinx.datetime.LocalDate import kotlinx.datetime.LocalDateTime @@ -10,9 +11,11 @@ import kotlinx.datetime.toKotlinLocalDate import kotlinx.datetime.toKotlinLocalDateTime import kotlinx.datetime.toKotlinLocalTime import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.AnyRow import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.api.GlobalParserOptions import org.jetbrains.kotlinx.dataframe.api.ParserOptions import org.jetbrains.kotlinx.dataframe.api.asColumnGroup @@ -30,7 +33,6 @@ import org.jetbrains.kotlinx.dataframe.columns.size import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException import org.jetbrains.kotlinx.dataframe.hasNulls import org.jetbrains.kotlinx.dataframe.impl.api.Parsers.resetToDefault -import org.jetbrains.kotlinx.dataframe.impl.api.Parsers.stringParser import org.jetbrains.kotlinx.dataframe.impl.canParse import org.jetbrains.kotlinx.dataframe.impl.catchSilent import org.jetbrains.kotlinx.dataframe.impl.createStarProjectedType @@ -60,6 +62,8 @@ import java.time.LocalDate as JavaLocalDate import java.time.LocalDateTime as JavaLocalDateTime import java.time.LocalTime as JavaLocalTime +private val logger = KotlinLogging.logger { } + internal interface StringParser { fun toConverter(options: ParserOptions?): TypeConverter @@ -210,7 +214,7 @@ internal object Parsers : GlobalParserOptions { .parseOrNull(this) ?.toInstantUsingOffset() } - // fallback on the java instant to catch things like "2022-01-23T04:29:60", a.k.a. leap seconds + // fallback on the java instant to catch things like "2022-01-23T04:29:60", a.k.a. leap seconds ?: toJavaInstantOrNull()?.toKotlinInstant() private fun String.toJavaInstantOrNull(): JavaInstant? = @@ -405,23 +409,73 @@ internal object Parsers : GlobalParserOptions { stringParser { it.toBigDecimalOrNull() }, // JSON array as DataFrame<*> TODO -// stringParser(catch = true) { -// val trimmed = it.trim() -// if (trimmed.startsWith("[") && trimmed.endsWith("]")) { -// DataFrame.readJsonStr(it) -// } else { -// null -// } -// }, + stringParser(catch = true) { + val trimmed = it.trim() + if (trimmed.startsWith("[") && trimmed.endsWith("]")) { + try { + val klass = Class.forName("org.jetbrains.kotlinx.dataframe.io.JsonKt") + val typeClashTactic = Class.forName("org.jetbrains.kotlinx.dataframe.io.JSON\$TypeClashTactic") + val readJsonStr = klass.getMethod( + "readJsonStr", + /* this = */DataFrame.Companion::class.java, + /* text = */ String::class.java, + /* header = */ List::class.java, + /* keyValuePaths = */ List::class.java, + /* typeClashTactic = */ typeClashTactic, + /* unifyNumbers = */ Boolean::class.java, + ) + + readJsonStr.invoke( + null, + /* this = */ DataFrame.Companion, + /* text = */ trimmed, + /* header = */ emptyList(), + /* keyValuePaths = */ emptyList(), + /* typeClashTactic = */ typeClashTactic.enumConstants[0], + /* unifyNumbers = */ true, + ) as AnyFrame + } catch (_: ClassNotFoundException) { + logger.warn { "parse() encountered a string that looks like a JSON array, but the dataframe-json dependency was not detected. Skipping for now." } + null + } + } else { + null + } + }, // JSON object as DataRow<*> TODO -// stringParser(catch = true) { -// val trimmed = it.trim() -// if (trimmed.startsWith("{") && trimmed.endsWith("}")) { -// DataRow.readJsonStr(it) -// } else { -// null -// } -// }, + stringParser(catch = true) { + val trimmed = it.trim() + if (trimmed.startsWith("{") && trimmed.endsWith("}")) { + try { + val klass = Class.forName("org.jetbrains.kotlinx.dataframe.io.JsonKt") + val typeClashTactic = Class.forName("org.jetbrains.kotlinx.dataframe.io.JSON\$TypeClashTactic") + val readJsonStr = klass.getMethod( + "readJsonStr", + /* this = */DataRow.Companion::class.java, + /* text = */ String::class.java, + /* header = */ List::class.java, + /* keyValuePaths = */ List::class.java, + /* typeClashTactic = */ typeClashTactic, + /* unifyNumbers = */ Boolean::class.java, + ) + + readJsonStr.invoke( + null, + /* this = */ DataRow.Companion, + /* text = */ trimmed, + /* header = */ emptyList(), + /* keyValuePaths = */ emptyList(), + /* typeClashTactic = */ typeClashTactic.enumConstants[0], + /* unifyNumbers = */ true, + ) as AnyRow + } catch (_: ClassNotFoundException) { + logger.warn { "parse() encountered a string that looks like a JSON object, but the dataframe-json dependency was not detected. Skipping for now." } + null + } + } else { + null + } + }, // Char stringParser { it.singleOrNull() }, // No parser found, return as String diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/Utils.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/Utils.kt index 3865c36343..3354334424 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/Utils.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/Utils.kt @@ -31,5 +31,3 @@ fun > T.alsoDebug(println: String? = null, rowsLimit: Int = 20) print(borders = true, title = true, columnTypes = true, valueLimit = -1, rowsLimit = rowsLimit) schema().print() } - -fun parseJsonStr(jsonStr: String): JsonObject = Json.parseToJsonElement(jsonStr).jsonObject diff --git a/dataframe-csv/build.gradle.kts b/dataframe-csv/build.gradle.kts index 449f9de757..c444ee4038 100644 --- a/dataframe-csv/build.gradle.kts +++ b/dataframe-csv/build.gradle.kts @@ -42,7 +42,6 @@ dependencies { testImplementation(libs.kotlinx.benchmark.runtime) testImplementation(libs.junit) testImplementation(libs.sl4jsimple) - testImplementation(projects.dataframeJson) testImplementation(libs.kotestAssertions) { exclude("org.jetbrains.kotlin", "kotlin-stdlib-jdk8") } diff --git a/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt index 833468e78c..5c168cba80 100644 --- a/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt +++ b/dataframe-json/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt @@ -426,7 +426,7 @@ private fun isDataframeConvertable(dataframeLike: Any?) = KotlinNotebookPluginUtils.isDataframeConvertable(dataframeLike = dataframeLike) @Suppress("INVISIBLE_REFERENCE") -private fun BufferedImage.resizeKeepingAspectRatio( +internal fun BufferedImage.resizeKeepingAspectRatio( maxSize: Int, resultImageType: Int = BufferedImage.TYPE_INT_ARGB, interpolation: Any = RenderingHints.VALUE_INTERPOLATION_NEAREST_NEIGHBOR, diff --git a/dataframe-json/src/main/resources/services/org.jetbrains.kotlinx.dataframe.io.SupportedFormat b/dataframe-json/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.SupportedFormat similarity index 100% rename from dataframe-json/src/main/resources/services/org.jetbrains.kotlinx.dataframe.io.SupportedFormat rename to dataframe-json/src/main/resources/META-INF/services/org.jetbrains.kotlinx.dataframe.io.SupportedFormat diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt b/dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt similarity index 96% rename from core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt rename to dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt index fe119fc414..21f1750e12 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt +++ b/dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt @@ -13,8 +13,6 @@ import org.jetbrains.kotlinx.dataframe.impl.io.resizeKeepingAspectRatio import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions.Companion.ALL_OFF import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions.Companion.GZIP_ON import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions.Companion.LIMIT_SIZE_ON -import org.jetbrains.kotlinx.dataframe.parseJsonStr -import org.jetbrains.kotlinx.dataframe.testResource import org.junit.Test import org.junit.runner.RunWith import org.junit.runners.Parameterized @@ -25,6 +23,7 @@ import java.io.File import java.util.Base64 import java.util.zip.GZIPInputStream import javax.imageio.ImageIO +import kotlin.math.abs @RunWith(Parameterized::class) class ImageSerializationTests(private val encodingOptions: Base64ImageEncodingOptions?) { @@ -145,7 +144,7 @@ class ImageSerializationTests(private val encodingOptions: Base64ImageEncodingOp val g2 = (rgb2 shr 8) and 0xFF val b2 = rgb2 and 0xFF - val diff = kotlin.math.abs(r1 - r2) + kotlin.math.abs(g1 - g2) + kotlin.math.abs(b1 - b2) + val diff = abs(r1 - r2) + abs(g1 - g2) + abs(b1 - b2) // If the difference in color components exceed our allowance return false if (diff > allowedDelta) { diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt similarity index 97% rename from core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt rename to dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt index baff3b703f..d91fd7ab1e 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ b/dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -10,6 +10,7 @@ import io.kotest.matchers.types.instanceOf import io.kotest.matchers.types.shouldBeInstanceOf import kotlinx.serialization.json.Json import kotlinx.serialization.json.JsonElement +import kotlinx.serialization.json.JsonObject import kotlinx.serialization.json.boolean import kotlinx.serialization.json.int import kotlinx.serialization.json.jsonArray @@ -19,7 +20,6 @@ import org.intellij.lang.annotations.Language import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow -import org.jetbrains.kotlinx.dataframe.alsoDebug import org.jetbrains.kotlinx.dataframe.api.JsonPath import org.jetbrains.kotlinx.dataframe.api.allNulls import org.jetbrains.kotlinx.dataframe.api.colsOf @@ -29,6 +29,7 @@ import org.jetbrains.kotlinx.dataframe.api.dataFrameOf import org.jetbrains.kotlinx.dataframe.api.forEach import org.jetbrains.kotlinx.dataframe.api.getColumnGroup import org.jetbrains.kotlinx.dataframe.api.getFrameColumn +import org.jetbrains.kotlinx.dataframe.api.print import org.jetbrains.kotlinx.dataframe.api.schema import org.jetbrains.kotlinx.dataframe.api.toFloat import org.jetbrains.kotlinx.dataframe.api.toMap @@ -46,16 +47,14 @@ import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NCOL import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NROW import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.VERSION import org.jetbrains.kotlinx.dataframe.impl.io.readJsonImpl -import org.jetbrains.kotlinx.dataframe.impl.nothingType -import org.jetbrains.kotlinx.dataframe.impl.nullableNothingType import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ANY_COLUMNS import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS -import org.jetbrains.kotlinx.dataframe.parseJsonStr -import org.jetbrains.kotlinx.dataframe.testJson import org.jetbrains.kotlinx.dataframe.type import org.jetbrains.kotlinx.dataframe.values import org.junit.Test +import java.net.URL import kotlin.Double +import kotlin.reflect.KType import kotlin.reflect.typeOf @Suppress("ktlint:standard:argument-list-wrapping") @@ -1121,7 +1120,7 @@ class JsonTests { @Test fun `serialize column with list of objects`() { val df = dataFrameOf("col")(Regex(".+").findAll("abc").toList()) - val json = shouldNotThrowAny { df.toJson() }!! + val json = shouldNotThrowAny { df.toJson() } val list = DataFrame.readJsonStr(json)["col"][0].shouldBeInstanceOf>() list[0].shouldBeInstanceOf() } @@ -1142,3 +1141,23 @@ class JsonTests { } } } + +fun testResource(resourcePath: String): URL = JsonTests::class.java.classLoader.getResource(resourcePath)!! + +fun parseJsonStr(jsonStr: String): JsonObject = Json.parseToJsonElement(jsonStr).jsonObject + +fun testJson(jsonName: String) = testResource("$jsonName.json") + +/** + * Prints dataframe to console with borders, title, column types and schema + */ +fun > T.alsoDebug(println: String? = null, rowsLimit: Int = 20): T = + apply { + println?.let { println(it) } + print(borders = true, title = true, columnTypes = true, valueLimit = -1, rowsLimit = rowsLimit) + schema().print() + } + +internal val nothingType: KType = typeOf>().arguments.first().type!! +internal val nullableNothingType: KType = typeOf>().arguments.first().type!! +internal fun nothingType(nullable: Boolean): KType = if (nullable) nullableNothingType else nothingType From 993e6d3a03584cf46d883e14b9f9ee242ac1fba0 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Sat, 26 Apr 2025 12:56:05 +0200 Subject: [PATCH 05/15] moved reflection-based json parser to lazy value --- .../kotlinx/dataframe/columns/ColumnGroup.kt | 2 - .../jetbrains/kotlinx/dataframe/impl/Utils.kt | 3 +- .../kotlinx/dataframe/impl/api/parse.kt | 156 ++++++++++++------ .../org/jetbrains/kotlinx/dataframe/Utils.kt | 3 - .../jetbrains/kotlinx/dataframe/io/json.kt | 1 + settings.gradle.kts | 1 - 6 files changed, 105 insertions(+), 61 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnGroup.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnGroup.kt index 734f5dd3c5..bd1fe61347 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnGroup.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnGroup.kt @@ -9,8 +9,6 @@ import org.jetbrains.kotlinx.dataframe.annotations.HasSchema import org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl import org.jetbrains.kotlinx.dataframe.api.asColumnGroup import org.jetbrains.kotlinx.dataframe.api.columnGroup -import org.jetbrains.kotlinx.dataframe.impl.schema.extractSchema -import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema import kotlin.reflect.KProperty /** diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt index 902fe6c85d..bbe4f92c67 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt @@ -152,8 +152,7 @@ internal fun Iterable>.commonType(nullable: Boolean, upperBound: KType // helper overload for friend modules @JvmName("commonTypeOverload") -internal fun commonType(types: Iterable, useStar: Boolean = true) = - types.commonType(useStar) +internal fun commonType(types: Iterable, useStar: Boolean = true) = types.commonType(useStar) /** * Returns the common supertype of the given types. diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt index 8aaa9a3232..4e82aa9f1f 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt @@ -26,7 +26,6 @@ import org.jetbrains.kotlinx.dataframe.api.isColumnGroup import org.jetbrains.kotlinx.dataframe.api.isFrameColumn import org.jetbrains.kotlinx.dataframe.api.isSubtypeOf import org.jetbrains.kotlinx.dataframe.api.map -import org.jetbrains.kotlinx.dataframe.api.parser import org.jetbrains.kotlinx.dataframe.api.to import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion import org.jetbrains.kotlinx.dataframe.columns.size @@ -214,7 +213,7 @@ internal object Parsers : GlobalParserOptions { .parseOrNull(this) ?.toInstantUsingOffset() } - // fallback on the java instant to catch things like "2022-01-23T04:29:60", a.k.a. leap seconds + // fallback on the java instant to catch things like "2022-01-23T04:29:60", a.k.a. leap seconds ?: toJavaInstantOrNull()?.toKotlinInstant() private fun String.toJavaInstantOrNull(): JavaInstant? = @@ -338,6 +337,94 @@ internal object Parsers : GlobalParserOptions { parser } + // TODO rewrite using parser service later https://github.com/Kotlin/dataframe/issues/962 + // null when dataframe-json is not present + private val readJsonStrAnyFrame: ((text: String) -> AnyFrame)? by lazy { + try { + val klass = Class.forName("org.jetbrains.kotlinx.dataframe.io.JsonKt") + val typeClashTactic = Class.forName("org.jetbrains.kotlinx.dataframe.io.JSON\$TypeClashTactic") + val readJsonStr = klass.getMethod( + "readJsonStr", + // this = + DataFrame.Companion::class.java, + // text = + String::class.java, + // header = + List::class.java, + // keyValuePaths = + List::class.java, + // typeClashTactic = + typeClashTactic, + // unifyNumbers = + Boolean::class.java, + ) + + return@lazy { text: String -> + readJsonStr.invoke( + null, + // this = + DataFrame.Companion, + // text = + text, + // header = + emptyList(), + // keyValuePaths = + emptyList(), + // typeClashTactic = + typeClashTactic.enumConstants[0], + // unifyNumbers = + true, + ) as AnyFrame + } + } catch (_: ClassNotFoundException) { + return@lazy null + } + } + + // TODO rewrite using parser service later https://github.com/Kotlin/dataframe/issues/962 + // null when dataframe-json is not present + private val readJsonStrAnyRow: ((text: String) -> AnyRow)? by lazy { + try { + val klass = Class.forName("org.jetbrains.kotlinx.dataframe.io.JsonKt") + val typeClashTactic = Class.forName("org.jetbrains.kotlinx.dataframe.io.JSON\$TypeClashTactic") + val readJsonStr = klass.getMethod( + "readJsonStr", + // this = + DataRow.Companion::class.java, + // text = + String::class.java, + // header = + List::class.java, + // keyValuePaths = + List::class.java, + // typeClashTactic = + typeClashTactic, + // unifyNumbers = + Boolean::class.java, + ) + + return@lazy { text: String -> + readJsonStr.invoke( + null, + // this = + DataRow.Companion, + // text = + text, + // header = + emptyList(), + // keyValuePaths = + emptyList(), + // typeClashTactic = + typeClashTactic.enumConstants[0], + // unifyNumbers = + true, + ) as AnyRow + } + } catch (_: ClassNotFoundException) { + return@lazy null + } + } + internal val parsersOrder = listOf( // Int stringParser { it.toIntOrNull() }, @@ -407,70 +494,33 @@ internal object Parsers : GlobalParserOptions { stringParser { it.toBigIntegerOrNull() }, // BigDecimal stringParser { it.toBigDecimalOrNull() }, - - // JSON array as DataFrame<*> TODO + // JSON array as DataFrame<*> stringParser(catch = true) { val trimmed = it.trim() if (trimmed.startsWith("[") && trimmed.endsWith("]")) { - try { - val klass = Class.forName("org.jetbrains.kotlinx.dataframe.io.JsonKt") - val typeClashTactic = Class.forName("org.jetbrains.kotlinx.dataframe.io.JSON\$TypeClashTactic") - val readJsonStr = klass.getMethod( - "readJsonStr", - /* this = */DataFrame.Companion::class.java, - /* text = */ String::class.java, - /* header = */ List::class.java, - /* keyValuePaths = */ List::class.java, - /* typeClashTactic = */ typeClashTactic, - /* unifyNumbers = */ Boolean::class.java, - ) - - readJsonStr.invoke( - null, - /* this = */ DataFrame.Companion, - /* text = */ trimmed, - /* header = */ emptyList(), - /* keyValuePaths = */ emptyList(), - /* typeClashTactic = */ typeClashTactic.enumConstants[0], - /* unifyNumbers = */ true, - ) as AnyFrame - } catch (_: ClassNotFoundException) { - logger.warn { "parse() encountered a string that looks like a JSON array, but the dataframe-json dependency was not detected. Skipping for now." } + if (readJsonStrAnyFrame == null) { + logger.warn { + "parse() encountered a string that looks like a JSON array, but the dataframe-json dependency was not detected. Skipping for now." + } null + } else { + readJsonStrAnyFrame!!(trimmed) } } else { null } }, - // JSON object as DataRow<*> TODO + // JSON object as DataRow<*> stringParser(catch = true) { val trimmed = it.trim() if (trimmed.startsWith("{") && trimmed.endsWith("}")) { - try { - val klass = Class.forName("org.jetbrains.kotlinx.dataframe.io.JsonKt") - val typeClashTactic = Class.forName("org.jetbrains.kotlinx.dataframe.io.JSON\$TypeClashTactic") - val readJsonStr = klass.getMethod( - "readJsonStr", - /* this = */DataRow.Companion::class.java, - /* text = */ String::class.java, - /* header = */ List::class.java, - /* keyValuePaths = */ List::class.java, - /* typeClashTactic = */ typeClashTactic, - /* unifyNumbers = */ Boolean::class.java, - ) - - readJsonStr.invoke( - null, - /* this = */ DataRow.Companion, - /* text = */ trimmed, - /* header = */ emptyList(), - /* keyValuePaths = */ emptyList(), - /* typeClashTactic = */ typeClashTactic.enumConstants[0], - /* unifyNumbers = */ true, - ) as AnyRow - } catch (_: ClassNotFoundException) { - logger.warn { "parse() encountered a string that looks like a JSON object, but the dataframe-json dependency was not detected. Skipping for now." } + if (readJsonStrAnyRow == null) { + logger.warn { + "parse() encountered a string that looks like a JSON object, but the dataframe-json dependency was not detected. Skipping for now." + } null + } else { + readJsonStrAnyRow!!(trimmed) } } else { null diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/Utils.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/Utils.kt index 3354334424..9be2b8e52f 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/Utils.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/Utils.kt @@ -1,8 +1,5 @@ package org.jetbrains.kotlinx.dataframe -import kotlinx.serialization.json.Json -import kotlinx.serialization.json.JsonObject -import kotlinx.serialization.json.jsonObject import org.jetbrains.kotlinx.dataframe.api.print import org.jetbrains.kotlinx.dataframe.api.schema import org.jetbrains.kotlinx.dataframe.io.renderToString diff --git a/dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt index d91fd7ab1e..fd3ded5fe0 100644 --- a/dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ b/dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -1160,4 +1160,5 @@ fun > T.alsoDebug(println: String? = null, rowsLimit: Int = 20) internal val nothingType: KType = typeOf>().arguments.first().type!! internal val nullableNothingType: KType = typeOf>().arguments.first().type!! + internal fun nothingType(nullable: Boolean): KType = if (nullable) nullableNothingType else nothingType diff --git a/settings.gradle.kts b/settings.gradle.kts index 2de92285b6..bade37d6ac 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -50,4 +50,3 @@ include("dataframe-openapi-generator") include("dataframe-geo") include("plugins:public-api-modifier") include("dataframe-compiler-plugin-core") - From 73e6f799a87ffdb1d38305040f5c6dd9aa5143cb Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Sat, 26 Apr 2025 13:42:50 +0200 Subject: [PATCH 06/15] removed buildconfig from non-core modules --- core/api/core.api | 120 +- .../dataframe/annotations/ImportDataSchema.kt | 15 +- .../kotlinx/dataframe/api/chunked.kt | 12 +- .../dataframe/codeGen/DefaultReadDfMethods.kt | 8 - .../documentation/UnifyingNumbers.kt | 26 +- .../dataframe/impl/ColumnDataCollector.kt | 2 +- .../jetbrains/kotlinx/dataframe/impl/Utils.kt | 8 + .../kotlinx/dataframe/impl/api/parse.kt | 112 +- .../kotlinx/dataframe/impl/io/BytesUtils.kt | 5 - .../kotlinx/dataframe/impl/io/compression.kt | 11 - .../kotlinx/dataframe/impl/io/image.kt | 23 + .../kotlinx/dataframe/impl/io/readJson.kt | 724 ----------- .../kotlinx/dataframe/impl/io/writeJson.kt | 417 ------ .../kotlinx/dataframe/impl/schema/Utils.kt | 4 + .../jetbrains/kotlinx/dataframe/io/common.kt | 6 +- .../org/jetbrains/kotlinx/dataframe/io/csv.kt | 14 +- .../jetbrains/kotlinx/dataframe/io/json.kt | 501 -------- .../org/jetbrains/kotlinx/dataframe/Utils.kt | 5 - .../dataframe/io/ImageSerializationTests.kt | 180 --- .../jetbrains/kotlinx/dataframe/io/json.kt | 1144 ----------------- .../kotlinx/dataframe/impl/io/writeDelim.kt | 18 +- .../kotlinx/dataframe/io/DelimCsvTsvTests.kt | 16 + dataframe-json/build.gradle.kts | 1 - dataframe-jupyter/build.gradle.kts | 1 - 24 files changed, 229 insertions(+), 3144 deletions(-) delete mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/BytesUtils.kt delete mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/compression.kt delete mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt delete mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt delete mode 100644 core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt delete mode 100644 core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt delete mode 100644 core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt diff --git a/core/api/core.api b/core/api/core.api index c91c04bde2..85aa42fa81 100644 --- a/core/api/core.api +++ b/core/api/core.api @@ -273,10 +273,16 @@ public abstract interface annotation class org/jetbrains/kotlinx/dataframe/annot public abstract interface annotation class org/jetbrains/kotlinx/dataframe/annotations/JsonOptions : java/lang/annotation/Annotation { public abstract fun keyValuePaths ()[Ljava/lang/String; - public abstract fun typeClashTactic ()Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic; + public abstract fun typeClashTactic ()Ljava/lang/String; public abstract fun unifyNumbers ()Z } +public final class org/jetbrains/kotlinx/dataframe/annotations/JsonOptions$TypeClashTactics { + public static final field ANY_COLUMNS Ljava/lang/String; + public static final field ARRAY_AND_VALUE_COLUMNS Ljava/lang/String; + public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/annotations/JsonOptions$TypeClashTactics; +} + public abstract interface annotation class org/jetbrains/kotlinx/dataframe/annotations/Order : java/lang/annotation/Annotation { public abstract fun order ()I } @@ -681,8 +687,10 @@ public final class org/jetbrains/kotlinx/dataframe/api/ChunkedKt { public static final fun chunked (Lorg/jetbrains/kotlinx/dataframe/DataColumn;I)Lorg/jetbrains/kotlinx/dataframe/columns/FrameColumn; public static final fun chunked (Lorg/jetbrains/kotlinx/dataframe/DataColumn;I)Lorg/jetbrains/kotlinx/dataframe/columns/ValueColumn; public static final fun chunked (Lorg/jetbrains/kotlinx/dataframe/DataFrame;ILjava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/columns/FrameColumn; + public static final fun chunked (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/Iterable;Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/columns/FrameColumn; public static final fun chunked (Lorg/jetbrains/kotlinx/dataframe/columns/ColumnGroup;I)Lorg/jetbrains/kotlinx/dataframe/columns/FrameColumn; public static synthetic fun chunked$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;ILjava/lang/String;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/columns/FrameColumn; + public static synthetic fun chunked$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/Iterable;Ljava/lang/String;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/columns/FrameColumn; } public final class org/jetbrains/kotlinx/dataframe/api/CodeString { @@ -5320,6 +5328,9 @@ public abstract interface class org/jetbrains/kotlinx/dataframe/documentation/Ds public abstract interface class org/jetbrains/kotlinx/dataframe/documentation/DslGrammarTemplateColumnsSelectionDsl$UsageTemplateExample$ColumnSetName { } +public abstract interface class org/jetbrains/kotlinx/dataframe/documentation/UnifyingNumbers { +} + public final class org/jetbrains/kotlinx/dataframe/exceptions/CellConversionException : org/jetbrains/kotlinx/dataframe/exceptions/TypeConversionException { public fun (Ljava/lang/Object;Lkotlin/reflect/KType;Lkotlin/reflect/KType;Lorg/jetbrains/kotlinx/dataframe/columns/ColumnPath;Ljava/lang/Integer;Ljava/lang/Throwable;)V public fun getCause ()Ljava/lang/Throwable; @@ -5393,6 +5404,13 @@ public final class org/jetbrains/kotlinx/dataframe/impl/ColumnNameGenerator { public final fun getNames ()Ljava/util/List; } +public abstract interface class org/jetbrains/kotlinx/dataframe/impl/DataCollector { + public abstract fun add (Ljava/lang/Object;)V + public abstract fun getData ()Ljava/util/List; + public abstract fun getHasNulls ()Z + public abstract fun toColumn (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; +} + public final class org/jetbrains/kotlinx/dataframe/impl/DataFrameSize { public fun (II)V public final fun component1 ()I @@ -5718,22 +5736,6 @@ public final class org/jetbrains/kotlinx/dataframe/impl/schema/UtilsKt { public static final fun getSchema (Lkotlin/reflect/KClass;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema; } -public final class org/jetbrains/kotlinx/dataframe/io/Base64ImageEncodingOptions { - public static final field ALL_OFF I - public static final field Companion Lorg/jetbrains/kotlinx/dataframe/io/Base64ImageEncodingOptions$Companion; - public static final field GZIP_ON I - public static final field LIMIT_SIZE_ON I - public fun ()V - public fun (II)V - public synthetic fun (IIILkotlin/jvm/internal/DefaultConstructorMarker;)V - public final fun getImageSizeLimit ()I - public final fun isGzipOn ()Z - public final fun isLimitSizeOn ()Z -} - -public final class org/jetbrains/kotlinx/dataframe/io/Base64ImageEncodingOptions$Companion { -} - public final class org/jetbrains/kotlinx/dataframe/io/CSV : org/jetbrains/kotlinx/dataframe/io/SupportedDataFrameFormat { public fun ()V public fun (C)V @@ -5880,11 +5882,6 @@ public final class org/jetbrains/kotlinx/dataframe/io/CsvKt { public static synthetic fun writeCSV$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/String;Lorg/apache/commons/csv/CSVFormat;ILjava/lang/Object;)V } -public abstract interface class org/jetbrains/kotlinx/dataframe/io/CustomEncoder { - public abstract fun canEncode (Ljava/lang/Object;)Z - public abstract fun encode (Ljava/lang/Object;)Lkotlinx/serialization/json/JsonElement; -} - public final class org/jetbrains/kotlinx/dataframe/io/DataFrameHtmlData { public static final field Companion Lorg/jetbrains/kotlinx/dataframe/io/DataFrameHtmlData$Companion; public fun ()V @@ -5989,85 +5986,6 @@ public final class org/jetbrains/kotlinx/dataframe/io/HtmlKt { public static synthetic fun toStaticHtml$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lorg/jetbrains/kotlinx/dataframe/io/DisplayConfiguration;Lorg/jetbrains/kotlinx/dataframe/jupyter/CellRenderer;ZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/DataFrameHtmlData; } -public final class org/jetbrains/kotlinx/dataframe/io/JSON : org/jetbrains/kotlinx/dataframe/io/SupportedDataFrameFormat { - public fun ()V - public synthetic fun (Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Ljava/util/List;)V - public synthetic fun (Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Ljava/util/List;ILkotlin/jvm/internal/DefaultConstructorMarker;)V - public fun (Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Ljava/util/List;Z)V - public synthetic fun (Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Ljava/util/List;ZILkotlin/jvm/internal/DefaultConstructorMarker;)V - public fun acceptsExtension (Ljava/lang/String;)Z - public fun acceptsSample (Lorg/jetbrains/kotlinx/dataframe/io/SupportedFormatSample;)Z - public fun createDefaultReadMethod (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethod; - public fun getTestOrder ()I - public fun readDataFrame (Ljava/io/File;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public fun readDataFrame (Ljava/io/InputStream;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; -} - -public final class org/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic : java/lang/Enum { - public static final field ANY_COLUMNS Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic; - public static final field ARRAY_AND_VALUE_COLUMNS Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic; - public static fun getEntries ()Lkotlin/enums/EnumEntries; - public static fun valueOf (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic; - public static fun values ()[Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic; -} - -public final class org/jetbrains/kotlinx/dataframe/io/JsonKt { - public static final synthetic fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final synthetic fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final synthetic fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final synthetic fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/File;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/File;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static final synthetic fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/InputStream;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/InputStream;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static final synthetic fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/net/URL;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/net/URL;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/File;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/File;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/InputStream;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/InputStream;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/net/URL;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/net/URL;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static final synthetic fun readJsonStr (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun readJsonStr (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final synthetic fun readJsonStr (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static final fun readJsonStr (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static synthetic fun readJsonStr$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun readJsonStr$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun readJsonStr$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static synthetic fun readJsonStr$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; - public static final fun toJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Z)Ljava/lang/String; - public static final fun toJson (Lorg/jetbrains/kotlinx/dataframe/DataRow;Z)Ljava/lang/String; - public static synthetic fun toJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;ZILjava/lang/Object;)Ljava/lang/String; - public static synthetic fun toJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow;ZILjava/lang/Object;)Ljava/lang/String; - public static final fun toJsonWithMetadata (Lorg/jetbrains/kotlinx/dataframe/DataFrame;ILjava/lang/Integer;ZLjava/util/List;)Ljava/lang/String; - public static synthetic fun toJsonWithMetadata$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;ILjava/lang/Integer;ZLjava/util/List;ILjava/lang/Object;)Ljava/lang/String; - public static final fun writeJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/io/File;Z)V - public static final fun writeJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/Appendable;Z)V - public static final fun writeJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/String;Z)V - public static final fun writeJson (Lorg/jetbrains/kotlinx/dataframe/DataRow;Ljava/io/File;Z)V - public static final fun writeJson (Lorg/jetbrains/kotlinx/dataframe/DataRow;Ljava/lang/Appendable;Z)V - public static final fun writeJson (Lorg/jetbrains/kotlinx/dataframe/DataRow;Ljava/lang/String;Z)V - public static synthetic fun writeJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/io/File;ZILjava/lang/Object;)V - public static synthetic fun writeJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/Appendable;ZILjava/lang/Object;)V - public static synthetic fun writeJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/String;ZILjava/lang/Object;)V - public static synthetic fun writeJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow;Ljava/io/File;ZILjava/lang/Object;)V - public static synthetic fun writeJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow;Ljava/lang/Appendable;ZILjava/lang/Object;)V - public static synthetic fun writeJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow;Ljava/lang/String;ZILjava/lang/Object;)V -} - public final class org/jetbrains/kotlinx/dataframe/io/MethodArguments { public static final field Companion Lorg/jetbrains/kotlinx/dataframe/io/MethodArguments$Companion; public fun ()V diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt index 0ec47b1f21..4095ab26cf 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt @@ -5,7 +5,6 @@ import org.jetbrains.kotlinx.dataframe.api.KeyValueProperty import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.FrameColumn import org.jetbrains.kotlinx.dataframe.documentation.UnifyingNumbers -import org.jetbrains.kotlinx.dataframe.io.JSON /** * Annotation preprocessing will generate a DataSchema interface from the data at `path`. @@ -73,8 +72,11 @@ public annotation class JdbcOptions( ) public annotation class JsonOptions( - /** Allows the choice of how to handle type clashes when reading a JSON file. */ - public val typeClashTactic: JSON.TypeClashTactic = JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS, + /** + * Allows the choice of how to handle type clashes when reading a JSON file. + * Must be either [JsonOptions.TypeClashTactics.ARRAY_AND_VALUE_COLUMNS] or [JsonOptions.TypeClashTactics.ANY_COLUMNS] + * */ + public val typeClashTactic: String = TypeClashTactics.ARRAY_AND_VALUE_COLUMNS, /** * List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[KeyValueProperty]> * will be created. @@ -85,4 +87,9 @@ public annotation class JsonOptions( public val keyValuePaths: Array = [], /** Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. */ public val unifyNumbers: Boolean = true, -) +) { + public object TypeClashTactics { + public const val ARRAY_AND_VALUE_COLUMNS: String = "ARRAY_AND_VALUE_COLUMNS" + public const val ANY_COLUMNS: String = "ANY_COLUMNS" + } +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt index 421d1d64e6..9aa6ae504f 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt @@ -15,10 +15,14 @@ import org.jetbrains.kotlinx.dataframe.type * Creates a [FrameColumn] from [this] by splitting the dataframe into * smaller ones, with their number of rows at most [size]. */ -public fun DataFrame.chunked(size: Int, name: String = "groups"): FrameColumn { - val startIndices = (0 until nrow step size) - return this.chunkedImpl(startIndices, name) -} +public fun DataFrame.chunked(size: Int, name: String = "groups"): FrameColumn = + chunked( + startIndices = 0 until nrow step size, + name = name, + ) + +public fun DataFrame.chunked(startIndices: Iterable, name: String = "groups"): FrameColumn = + chunkedImpl(startIndices, name) public fun DataColumn.chunked(size: Int): ValueColumn> { val values = toList().chunked(size) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethods.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethods.kt index 6ecb10a3cc..cc0a791828 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethods.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethods.kt @@ -23,7 +23,6 @@ private const val CAST = "cast" private const val VERIFY = "verify" // cast(true) is obscure, i think it's better to use named argument here private const val READ_CSV = "readCSV" private const val READ_TSV = "readTSV" -private const val READ_JSON = "readJson" private const val READ_JDBC = "readJdbc" public abstract class AbstractDefaultReadMethod( @@ -82,13 +81,6 @@ public abstract class AbstractDefaultReadMethod( override val additionalImports: List = listOf("import org.jetbrains.kotlinx.dataframe.io.$methodName") } -internal class DefaultReadJsonMethod(path: String?, arguments: MethodArguments) : - AbstractDefaultReadMethod( - path = path, - arguments = arguments, - methodName = READ_JSON, - ) - internal class DefaultReadCsvMethod(path: String?, arguments: MethodArguments) : AbstractDefaultReadMethod(path, arguments, READ_CSV) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/UnifyingNumbers.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/UnifyingNumbers.kt index 26b1350fcc..14e3dd9019 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/UnifyingNumbers.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/UnifyingNumbers.kt @@ -42,28 +42,4 @@ import org.jetbrains.kotlinx.dataframe.impl.UnifiedNumberTypeOptions * * At the bottom of the graph is [Nothing]. This can be interpreted as `null`. */ -internal interface UnifyingNumbers { - - /** - * ``` - * (BigDecimal) - * / \ - * (BigInteger) \ - * / \ \ - * <~ ULong Long ~> Double .. - * .. | / | / | \.. - * \ | / | / | - * UInt Int Float - * .. | / | / \.. - * \ | / | / - * UShort Short - * | / | - * | / | - * UByte Byte - * \ / - * \ / - * Nothing? - * ``` - */ - interface Graph -} +public interface UnifyingNumbers diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/ColumnDataCollector.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/ColumnDataCollector.kt index 16423a28a5..ae0e60438c 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/ColumnDataCollector.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/ColumnDataCollector.kt @@ -14,7 +14,7 @@ import kotlin.reflect.full.isSubclassOf import kotlin.reflect.full.withNullability import kotlin.reflect.jvm.jvmErasure -internal interface DataCollector { +public interface DataCollector { public val data: List public val hasNulls: Boolean diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt index 48cde41f76..834e53bd34 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt @@ -150,6 +150,10 @@ internal fun catchSilent(body: () -> T): T? = internal fun Iterable>.commonType(nullable: Boolean, upperBound: KType? = null) = commonParents(this).createType(nullable, upperBound) +// helper overload for friend modules +@JvmName("commonTypeOverload") +internal fun commonType(types: Iterable, useStar: Boolean = true) = types.commonType(useStar) + /** * Returns the common supertype of the given types. * @@ -276,6 +280,10 @@ internal fun DataFrame.splitByIndices(startIndices: Sequence): Seque } } +// helper overload for friend modules +@JvmName("splitByIndicesOverload") +internal fun splitByIndices(list: List, startIndices: Sequence) = list.splitByIndices(startIndices) + internal fun List.splitByIndices(startIndices: Sequence): Sequence> = (startIndices + size).zipWithNext { start, endExclusive -> subList(start, endExclusive) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt index d2da7201f7..4e82aa9f1f 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt @@ -1,5 +1,6 @@ package org.jetbrains.kotlinx.dataframe.impl.api +import io.github.oshai.kotlinlogging.KotlinLogging import kotlinx.datetime.Instant import kotlinx.datetime.LocalDate import kotlinx.datetime.LocalDateTime @@ -25,19 +26,18 @@ import org.jetbrains.kotlinx.dataframe.api.isColumnGroup import org.jetbrains.kotlinx.dataframe.api.isFrameColumn import org.jetbrains.kotlinx.dataframe.api.isSubtypeOf import org.jetbrains.kotlinx.dataframe.api.map -import org.jetbrains.kotlinx.dataframe.api.parser import org.jetbrains.kotlinx.dataframe.api.to import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion import org.jetbrains.kotlinx.dataframe.columns.size import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException import org.jetbrains.kotlinx.dataframe.hasNulls +import org.jetbrains.kotlinx.dataframe.impl.api.Parsers.resetToDefault import org.jetbrains.kotlinx.dataframe.impl.canParse import org.jetbrains.kotlinx.dataframe.impl.catchSilent import org.jetbrains.kotlinx.dataframe.impl.createStarProjectedType import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser import org.jetbrains.kotlinx.dataframe.impl.javaDurationCanParse import org.jetbrains.kotlinx.dataframe.io.isUrl -import org.jetbrains.kotlinx.dataframe.io.readJsonStr import org.jetbrains.kotlinx.dataframe.values import java.math.BigDecimal import java.math.BigInteger @@ -61,6 +61,8 @@ import java.time.LocalDate as JavaLocalDate import java.time.LocalDateTime as JavaLocalDateTime import java.time.LocalTime as JavaLocalTime +private val logger = KotlinLogging.logger { } + internal interface StringParser { fun toConverter(options: ParserOptions?): TypeConverter @@ -335,6 +337,94 @@ internal object Parsers : GlobalParserOptions { parser } + // TODO rewrite using parser service later https://github.com/Kotlin/dataframe/issues/962 + // null when dataframe-json is not present + private val readJsonStrAnyFrame: ((text: String) -> AnyFrame)? by lazy { + try { + val klass = Class.forName("org.jetbrains.kotlinx.dataframe.io.JsonKt") + val typeClashTactic = Class.forName("org.jetbrains.kotlinx.dataframe.io.JSON\$TypeClashTactic") + val readJsonStr = klass.getMethod( + "readJsonStr", + // this = + DataFrame.Companion::class.java, + // text = + String::class.java, + // header = + List::class.java, + // keyValuePaths = + List::class.java, + // typeClashTactic = + typeClashTactic, + // unifyNumbers = + Boolean::class.java, + ) + + return@lazy { text: String -> + readJsonStr.invoke( + null, + // this = + DataFrame.Companion, + // text = + text, + // header = + emptyList(), + // keyValuePaths = + emptyList(), + // typeClashTactic = + typeClashTactic.enumConstants[0], + // unifyNumbers = + true, + ) as AnyFrame + } + } catch (_: ClassNotFoundException) { + return@lazy null + } + } + + // TODO rewrite using parser service later https://github.com/Kotlin/dataframe/issues/962 + // null when dataframe-json is not present + private val readJsonStrAnyRow: ((text: String) -> AnyRow)? by lazy { + try { + val klass = Class.forName("org.jetbrains.kotlinx.dataframe.io.JsonKt") + val typeClashTactic = Class.forName("org.jetbrains.kotlinx.dataframe.io.JSON\$TypeClashTactic") + val readJsonStr = klass.getMethod( + "readJsonStr", + // this = + DataRow.Companion::class.java, + // text = + String::class.java, + // header = + List::class.java, + // keyValuePaths = + List::class.java, + // typeClashTactic = + typeClashTactic, + // unifyNumbers = + Boolean::class.java, + ) + + return@lazy { text: String -> + readJsonStr.invoke( + null, + // this = + DataRow.Companion, + // text = + text, + // header = + emptyList(), + // keyValuePaths = + emptyList(), + // typeClashTactic = + typeClashTactic.enumConstants[0], + // unifyNumbers = + true, + ) as AnyRow + } + } catch (_: ClassNotFoundException) { + return@lazy null + } + } + internal val parsersOrder = listOf( // Int stringParser { it.toIntOrNull() }, @@ -408,7 +498,14 @@ internal object Parsers : GlobalParserOptions { stringParser(catch = true) { val trimmed = it.trim() if (trimmed.startsWith("[") && trimmed.endsWith("]")) { - DataFrame.readJsonStr(it) + if (readJsonStrAnyFrame == null) { + logger.warn { + "parse() encountered a string that looks like a JSON array, but the dataframe-json dependency was not detected. Skipping for now." + } + null + } else { + readJsonStrAnyFrame!!(trimmed) + } } else { null } @@ -417,7 +514,14 @@ internal object Parsers : GlobalParserOptions { stringParser(catch = true) { val trimmed = it.trim() if (trimmed.startsWith("{") && trimmed.endsWith("}")) { - DataRow.readJsonStr(it) + if (readJsonStrAnyRow == null) { + logger.warn { + "parse() encountered a string that looks like a JSON object, but the dataframe-json dependency was not detected. Skipping for now." + } + null + } else { + readJsonStrAnyRow!!(trimmed) + } } else { null } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/BytesUtils.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/BytesUtils.kt deleted file mode 100644 index 9994ba56c7..0000000000 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/BytesUtils.kt +++ /dev/null @@ -1,5 +0,0 @@ -package org.jetbrains.kotlinx.dataframe.impl.io - -import java.util.Base64 - -internal fun ByteArray.toBase64(): String = Base64.getEncoder().encodeToString(this) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/compression.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/compression.kt deleted file mode 100644 index 8d95b811ec..0000000000 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/compression.kt +++ /dev/null @@ -1,11 +0,0 @@ -package org.jetbrains.kotlinx.dataframe.impl.io - -import java.io.ByteArrayOutputStream -import java.util.zip.GZIPOutputStream - -internal fun ByteArray.encodeGzip(): ByteArray { - val bos = ByteArrayOutputStream() - GZIPOutputStream(bos).use { it.write(this) } - - return bos.toByteArray() -} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/image.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/image.kt index 782288718c..9de00d56a2 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/image.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/image.kt @@ -58,3 +58,26 @@ internal fun BufferedImage.toByteArray(format: String = DEFAULT_IMG_FORMAT): Byt ImageIO.write(this, format, bos) bos.toByteArray() } + +// helper overload for friend modules +@JvmName("resizeKeepingAspectRatioOverload") +internal fun resizeKeepingAspectRatio( + image: BufferedImage, + maxSize: Int, + resultImageType: Int = BufferedImage.TYPE_INT_ARGB, + interpolation: Any = RenderingHints.VALUE_INTERPOLATION_NEAREST_NEIGHBOR, + renderingQuality: Any = RenderingHints.VALUE_RENDER_QUALITY, + antialiasing: Any = RenderingHints.VALUE_ANTIALIAS_ON, + observer: ImageObserver? = null, +) = image.resizeKeepingAspectRatio( + maxSize = maxSize, + resultImageType = resultImageType, + interpolation = interpolation, + renderingQuality = renderingQuality, + antialiasing = antialiasing, + observer = observer, +) + +// helper overload for friend modules +@JvmName("toByteArrayOverload") +internal fun toByteArray(image: BufferedImage, format: String = DEFAULT_IMG_FORMAT) = image.toByteArray(format) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt deleted file mode 100644 index e47f176d32..0000000000 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readJson.kt +++ /dev/null @@ -1,724 +0,0 @@ -package org.jetbrains.kotlinx.dataframe.impl.io - -import kotlinx.serialization.json.JsonArray -import kotlinx.serialization.json.JsonNull -import kotlinx.serialization.json.JsonObject -import kotlinx.serialization.json.JsonPrimitive -import kotlinx.serialization.json.boolean -import kotlinx.serialization.json.booleanOrNull -import kotlinx.serialization.json.double -import kotlinx.serialization.json.doubleOrNull -import kotlinx.serialization.json.float -import kotlinx.serialization.json.floatOrNull -import kotlinx.serialization.json.int -import kotlinx.serialization.json.intOrNull -import kotlinx.serialization.json.jsonArray -import kotlinx.serialization.json.long -import kotlinx.serialization.json.longOrNull -import org.jetbrains.kotlinx.dataframe.AnyCol -import org.jetbrains.kotlinx.dataframe.AnyFrame -import org.jetbrains.kotlinx.dataframe.DataColumn -import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.DataRow -import org.jetbrains.kotlinx.dataframe.api.JsonPath -import org.jetbrains.kotlinx.dataframe.api.KeyValueProperty -import org.jetbrains.kotlinx.dataframe.api.cast -import org.jetbrains.kotlinx.dataframe.api.columnOf -import org.jetbrains.kotlinx.dataframe.api.concat -import org.jetbrains.kotlinx.dataframe.api.dataFrameOf -import org.jetbrains.kotlinx.dataframe.api.firstOrNull -import org.jetbrains.kotlinx.dataframe.api.getColumn -import org.jetbrains.kotlinx.dataframe.api.mapIndexed -import org.jetbrains.kotlinx.dataframe.api.named -import org.jetbrains.kotlinx.dataframe.api.schema -import org.jetbrains.kotlinx.dataframe.api.splitInto -import org.jetbrains.kotlinx.dataframe.api.toDataFrame -import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup -import org.jetbrains.kotlinx.dataframe.columns.FrameColumn -import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion -import org.jetbrains.kotlinx.dataframe.documentation.UnifyingNumbers -import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator -import org.jetbrains.kotlinx.dataframe.impl.DataCollectorBase -import org.jetbrains.kotlinx.dataframe.impl.api.chunkedImpl -import org.jetbrains.kotlinx.dataframe.impl.asList -import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType -import org.jetbrains.kotlinx.dataframe.impl.commonType -import org.jetbrains.kotlinx.dataframe.impl.createDataCollector -import org.jetbrains.kotlinx.dataframe.impl.guessValueType -import org.jetbrains.kotlinx.dataframe.impl.schema.DataFrameSchemaImpl -import org.jetbrains.kotlinx.dataframe.impl.schema.extractSchema -import org.jetbrains.kotlinx.dataframe.impl.schema.intersectSchemas -import org.jetbrains.kotlinx.dataframe.impl.splitByIndices -import org.jetbrains.kotlinx.dataframe.io.ARRAY_COLUMN_NAME -import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic -import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ANY_COLUMNS -import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS -import org.jetbrains.kotlinx.dataframe.io.VALUE_COLUMN_NAME -import org.jetbrains.kotlinx.dataframe.ncol -import org.jetbrains.kotlinx.dataframe.nrow -import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema -import org.jetbrains.kotlinx.dataframe.type -import org.jetbrains.kotlinx.dataframe.typeClass -import org.jetbrains.kotlinx.dataframe.values -import kotlin.reflect.KType -import kotlin.reflect.KTypeProjection -import kotlin.reflect.full.createType -import kotlin.reflect.typeOf - -private fun DataFrame.unwrapUnnamedColumns() = dataFrameOf(columns().map { it.unwrapUnnamedColumn() }) - -private fun AnyCol.unwrapUnnamedColumn() = if (this is UnnamedColumn) col else this - -private enum class AnyColType { - ANY, - ARRAYS, - OBJECTS, -} - -internal interface AnyKeyValueProperty : KeyValueProperty { - override val value: Any? -} - -internal fun readJsonImpl( - parsed: Any?, - unifyNumbers: Boolean, - header: List, - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): DataFrame<*> { - val df: AnyFrame = when (typeClashTactic) { - ARRAY_AND_VALUE_COLUMNS -> { - when (parsed) { - is JsonArray -> fromJsonListArrayAndValueColumns( - records = parsed, - unifyNumbers = unifyNumbers, - header = header, - keyValuePaths = keyValuePaths, - ) - - else -> fromJsonListArrayAndValueColumns( - records = listOf(parsed), - unifyNumbers = unifyNumbers, - keyValuePaths = keyValuePaths, - ) - } - } - - ANY_COLUMNS -> { - when (parsed) { - is JsonArray -> fromJsonListAnyColumns( - records = parsed, - unifyNumbers = unifyNumbers, - header = header, - keyValuePaths = keyValuePaths, - ) - - else -> fromJsonListAnyColumns( - records = listOf(parsed), - unifyNumbers = unifyNumbers, - keyValuePaths = keyValuePaths, - ) - } - } - } - return df.unwrapUnnamedColumns() -} - -/** - * Json to DataFrame converter that creates [Any] columns. - * A.k.a. [TypeClashTactic.ANY_COLUMNS]. - * - * @param records List of json elements to be converted to a [DataFrame]. - * @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. - * @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[KeyValueProperty]> - * will be created. - * @param header Optional list of column names. If given, [records] will be read like an object with [header] being the keys. - * @return [DataFrame] from the given [records]. - */ -internal fun fromJsonListAnyColumns( - records: List<*>, - unifyNumbers: Boolean, - keyValuePaths: List = emptyList(), - header: List = emptyList(), - jsonPath: JsonPath = JsonPath(), -): AnyFrame { - var hasPrimitive = false - var hasArray = false - var hasObject = false - - // list element type can be JsonObject, JsonArray or primitive - val nameGenerator = ColumnNameGenerator() - records.forEach { record -> - when (record) { - is JsonObject -> { - hasObject = true - record.entries.forEach { nameGenerator.addIfAbsent(it.key) } - } - - is JsonArray -> hasArray = true - - is JsonNull, null -> Unit - - is JsonPrimitive -> hasPrimitive = true - } - } - - val colType = when { - hasArray && !hasPrimitive && !hasObject -> AnyColType.ARRAYS - hasObject && !hasPrimitive && !hasArray -> AnyColType.OBJECTS - else -> AnyColType.ANY - } - val justPrimitives = hasPrimitive && !hasArray && !hasObject - val isKeyValue = keyValuePaths.any { jsonPath.matches(it) } - - if (isKeyValue && colType != AnyColType.OBJECTS) { - error("Key value path $jsonPath does not match objects.") - } - - @Suppress("KotlinConstantConditions") - val columns: List = when { - // Create one column of type Any? (or guessed primitive type) from all the records - colType == AnyColType.ANY -> { - val collector: DataCollectorBase = - if (justPrimitives) { - createDataCollector(records.size) // guess the type - } else { - createDataCollector(records.size, typeOf()) // use Any? - } - - val nanIndices = mutableListOf() - records.forEachIndexed { i, v -> - when (v) { - is JsonObject -> { - val parsed = - fromJsonListAnyColumns( - records = listOf(v), - unifyNumbers = unifyNumbers, - keyValuePaths = keyValuePaths, - jsonPath = jsonPath.replaceLastWildcardWithIndex(i), - ) - collector.add( - if (parsed.isSingleUnnamedColumn()) { - (parsed.getColumn(0) as UnnamedColumn).col.values.first() - } else { - parsed.firstOrNull() ?: DataRow.empty - }, - ) - } - - is JsonArray -> { - val parsed = fromJsonListAnyColumns( - records = v, - unifyNumbers = unifyNumbers, - keyValuePaths = keyValuePaths, - jsonPath = jsonPath.replaceLastWildcardWithIndex(i).appendArrayWithWildcard(), - ) - collector.add( - if (parsed.isSingleUnnamedColumn()) { - (parsed.getColumn(0) as UnnamedColumn).col.values.asList() - } else { - parsed.unwrapUnnamedColumns() - }, - ) - } - - is JsonNull -> collector.add(null) - - is JsonPrimitive -> { - when { - v.content == "NaN" -> { - nanIndices.add(i) - collector.add(null) - } - - v.isString -> collector.add(v.content) - - v.booleanOrNull != null -> collector.add(v.boolean) - - v.intOrNull != null -> collector.add(v.int) - - v.longOrNull != null -> collector.add(v.long) - - v.floatOrNull != null -> collector.add(v.float) - - v.doubleOrNull != null -> collector.add(v.double) - - else -> error("Malformed JSON element ${v::class}: $v") - } - } - - else -> collector.add(v) - } - } - val column = createColumnGuessingType(VALUE_COLUMN_NAME, collector.values, unifyNumbers = unifyNumbers) - val res = if (nanIndices.isNotEmpty()) { - fun DataColumn.updateNaNs(nanValue: C): DataColumn { - var j = 0 - var nextNanIndex = nanIndices[j] - return mapIndexed(column.type) { i, v -> - if (i == nextNanIndex) { - j++ - nextNanIndex = if (j < nanIndices.size) nanIndices[j] else -1 - nanValue - } else { - v - } - } - } - when (column.typeClass) { - Double::class -> column.cast().updateNaNs(Double.NaN) - Float::class -> column.cast().updateNaNs(Float.NaN) - String::class -> column.cast().updateNaNs("NaN") - else -> column - } - } else { - column - } - listOf(UnnamedColumn(res)) - } - - // Create one column of type FrameColumn, or List<> from all the records if they are all arrays - colType == AnyColType.ARRAYS -> { - val values = mutableListOf() - val startIndices = ArrayList() - records.forEach { - startIndices.add(values.size) - when (it) { - is JsonArray -> values.addAll(it) - is JsonNull, null -> Unit - else -> error("Expected JsonArray, got $it") - } - } - val parsed = fromJsonListAnyColumns( - records = values, - unifyNumbers = unifyNumbers, - keyValuePaths = keyValuePaths, - jsonPath = jsonPath.appendArrayWithWildcard(), - ) - - val res = when { - parsed.isSingleUnnamedColumn() -> { - val col = (parsed.getColumn(0) as UnnamedColumn).col - val elementType = col.type - val columnValues = col.values - .asList() - .splitByIndices(startIndices.asSequence()) - .toList() - DataColumn.createValueColumn( - name = ARRAY_COLUMN_NAME, - values = columnValues, - type = List::class.createType(listOf(KTypeProjection.invariant(elementType))), - ) - } - - else -> - parsed.unwrapUnnamedColumns() - .chunkedImpl( - startIndices = startIndices, - name = ARRAY_COLUMN_NAME, // will be erased - ) - } - listOf(UnnamedColumn(res)) - } - - // Create one column of type FrameColumn - colType == AnyColType.OBJECTS && isKeyValue -> { - // collect the value types to make sure Value columns with lists and other values aren't all turned into lists - val valueTypes = mutableSetOf() - val dataFrames = records.map { record -> - when (record) { - is JsonObject -> { - val map = record.mapValues { (key, value) -> - val parsed = fromJsonListAnyColumns( - records = listOf(value), - unifyNumbers = unifyNumbers, - keyValuePaths = keyValuePaths, - jsonPath = jsonPath.append(key), - ) - if (parsed.isSingleUnnamedColumn()) { - (parsed.getColumn(0) as UnnamedColumn).col.values.first() - } else { - parsed.unwrapUnnamedColumns().firstOrNull() - } - } - val valueType = map.values.map { - guessValueType(sequenceOf(it), unifyNumbers = unifyNumbers) - }.commonType() - - valueTypes += valueType - - dataFrameOf( - columnOf(*map.keys.toTypedArray()).named(KeyValueProperty<*>::key.name), - createColumnGuessingType( - values = map.values, - suggestedType = TypeSuggestion.Use(valueType), - unifyNumbers = unifyNumbers, - ).named(KeyValueProperty<*>::value.name), - ) - } - - is JsonNull, null -> DataFrame.emptyOf() - - else -> error("Expected JsonObject, got $record") - } - } - - val valueColumns = dataFrames.map { it[KeyValueProperty<*>::value.name] } - val valueColumnSchema = when { - // in these cases we can safely combine the columns to get a single column schema - valueColumns.all { it is ColumnGroup<*> } || valueColumns.all { it is FrameColumn<*> } -> - valueColumns.concat().extractSchema() - - // to avoid listification, we create the value columns schema ourselves (https://github.com/Kotlin/dataframe/issues/184) - else -> ColumnSchema.Value(valueTypes.commonType()) - } - - listOf( - UnnamedColumn( - DataColumn.createFrameColumn( - name = VALUE_COLUMN_NAME, // will be erased unless at top-level - groups = dataFrames, - schema = lazy { - DataFrameSchemaImpl( - columns = mapOf( - KeyValueProperty<*>::key.name to ColumnSchema.Value(typeOf()), - KeyValueProperty<*>::value.name to valueColumnSchema, - ), - ) - }, - ), - ), - ) - } - - // Create multiple columns from all the records if they are all objects, merging the objects in essence - colType == AnyColType.OBJECTS && !isKeyValue -> { - nameGenerator.names.map { colName -> - val values = ArrayList(records.size) - - records.forEach { - when (it) { - is JsonObject -> values.add(it[colName]) - is JsonNull, null -> values.add(null) - else -> error("Expected JsonObject, got $it") - } - } - - val parsed = fromJsonListAnyColumns( - records = values, - unifyNumbers = unifyNumbers, - keyValuePaths = keyValuePaths, - jsonPath = jsonPath.append(colName), - ) - when { - parsed.ncol == 0 -> - DataColumn.createValueColumn( - name = colName, - values = arrayOfNulls(values.size).toList(), - type = typeOf(), - ) - - parsed.isSingleUnnamedColumn() -> - (parsed.getColumn(0) as UnnamedColumn).col.rename(colName) - - else -> - DataColumn.createColumnGroup(colName, parsed.unwrapUnnamedColumns()) as AnyCol - } - } - } - - else -> error("") - } - - return when { - columns.isEmpty() -> DataFrame.empty(records.size) - - columns.size == 1 && hasArray && header.isNotEmpty() && columns[0].typeClass == List::class -> - columns[0] - .cast>() - .splitInto(*header.toTypedArray()) - - else -> columns.toDataFrame() - } -} - -private fun AnyFrame.isSingleUnnamedColumn() = ncol == 1 && getColumn(0) is UnnamedColumn - -/** - * Json to DataFrame converter that creates allows creates `value` and `array` accessors - * instead of [Any] columns. - * A.k.a. [TypeClashTactic.ARRAY_AND_VALUE_COLUMNS]. - * - * @param records List of json elements to be converted to a [DataFrame]. - * @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. - * @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[KeyValueProperty]> - * will be created. - * @param header Optional list of column names. If given, [records] will be read like an object with [header] being the keys. - * @return [DataFrame] from the given [records]. - */ -internal fun fromJsonListArrayAndValueColumns( - records: List<*>, - unifyNumbers: Boolean, - keyValuePaths: List = emptyList(), - header: List = emptyList(), - jsonPath: JsonPath = JsonPath(), -): AnyFrame { - var hasPrimitive = false - var hasArray = false - val isKeyValue = keyValuePaths.any { jsonPath.matches(it) } - - // list element type can be JsonObject, JsonArray or primitive - // So first, we gather all properties of objects to merge including "array" and "value" if needed - // so the resulting type of a property with instances 123, ["abc"], and { "a": 1, "b": 2 } will be - // { array: List, value: Int?, a: Int?, b: Int? } - // and instances will look like - // { "array": [], "value": 123, "a": null, "b": null } - - val nameGenerator = ColumnNameGenerator() - records.forEach { record -> - when (record) { - is JsonObject -> record.entries.forEach { - nameGenerator.addIfAbsent(it.key) - } - - is JsonArray -> hasArray = true - - is JsonNull, null -> Unit - - is JsonPrimitive -> hasPrimitive = true - } - } - if (records.all { it == null || it is JsonNull }) hasPrimitive = true - - // Add a value column to the collected names if needed - val valueColumn = if (hasPrimitive || records.isEmpty()) { - nameGenerator.addUnique(VALUE_COLUMN_NAME) - } else { - null - } - - // Add an array column to the collected names if needed - val arrayColumn = if (hasArray) { - nameGenerator.addUnique(ARRAY_COLUMN_NAME) - } else { - null - } - - // only properties that consist of just objects (or are empty) can be merged to key/value FrameColumns - if (isKeyValue && (hasPrimitive || hasArray)) { - error("Key value path $jsonPath does not match objects.") - } - - // Create columns from the collected names - val columns: List = when { - // instead of using the names, generate a single key/value frame column - isKeyValue -> { - val dataFrames = records.map { record -> - when (record) { - is JsonObject -> { - val map = record.mapValues { (key, value) -> - val parsed = fromJsonListArrayAndValueColumns( - records = listOf(value), - unifyNumbers = unifyNumbers, - keyValuePaths = keyValuePaths, - jsonPath = jsonPath.append(key), - ) - if (parsed.isSingleUnnamedColumn()) { - (parsed.getColumn(0) as UnnamedColumn).col.values.first() - } else { - parsed.unwrapUnnamedColumns().firstOrNull() - } - } - val valueType = - map.values - .map { guessValueType(sequenceOf(it), unifyNumbers = unifyNumbers) } - .commonType() - - dataFrameOf( - columnOf(*map.keys.toTypedArray()).named(KeyValueProperty<*>::key.name), - createColumnGuessingType( - values = map.values, - suggestedType = TypeSuggestion.Use(valueType), - unifyNumbers = unifyNumbers, - ).named(KeyValueProperty<*>::value.name), - ) - } - - is JsonNull, null -> DataFrame.emptyOf() - - else -> error("Expected JsonObject, got $record") - } - } - - listOf( - UnnamedColumn( - DataColumn.createFrameColumn( - name = VALUE_COLUMN_NAME, // will be erased unless at top-level - groups = dataFrames, - schema = lazy { - dataFrames.mapNotNull { it.takeIf { it.nrow > 0 }?.schema() }.intersectSchemas() - }, - ), - ), - ) - } - - // generate columns using the collected names - else -> - nameGenerator.names.map { colName -> - when { - // Collect primitive values from records into the `value` column if needed - colName == valueColumn && (hasPrimitive || records.isEmpty()) -> { - val collector = createDataCollector(records.size) - val nanIndices = mutableListOf() - records.forEachIndexed { i, v -> - when (v) { - is JsonObject -> collector.add(null) - - is JsonArray -> collector.add(null) - - is JsonNull -> collector.add(null) - - is JsonPrimitive -> { - when { - v.content == "NaN" -> { - nanIndices.add(i) - collector.add(null) - } - - v.isString -> collector.add(v.content) - - v.booleanOrNull != null -> collector.add(v.boolean) - - v.intOrNull != null -> collector.add(v.int) - - v.longOrNull != null -> collector.add(v.long) - - v.floatOrNull != null -> collector.add(v.float) - - v.doubleOrNull != null -> collector.add(v.double) - - else -> error("Malformed JSON element ${v::class}: $v") - } - } - - else -> collector.add(v) - } - } - val column = createColumnGuessingType(colName, collector.values, unifyNumbers = unifyNumbers) - val res = if (nanIndices.isNotEmpty()) { - fun DataColumn.updateNaNs(nanValue: C): DataColumn { - var j = 0 - var nextNanIndex = nanIndices[j] - return mapIndexed(column.type) { i, v -> - if (i == nextNanIndex) { - j++ - nextNanIndex = if (j < nanIndices.size) nanIndices[j] else -1 - nanValue - } else { - v - } - } - } - when (column.typeClass) { - Double::class -> column.cast().updateNaNs(Double.NaN) - Float::class -> column.cast().updateNaNs(Float.NaN) - String::class -> column.cast().updateNaNs("NaN") - else -> column - } - } else { - column - } - UnnamedColumn(res) - } - - // Collect arrays from records into the `array` column if needed - colName == arrayColumn && hasArray -> { - val values = mutableListOf() - val startIndices = ArrayList() - records.forEach { - startIndices.add(values.size) - if (it is JsonArray) values.addAll(it.jsonArray) - } - val parsed = fromJsonListArrayAndValueColumns( - records = values, - unifyNumbers = unifyNumbers, - keyValuePaths = keyValuePaths, - jsonPath = jsonPath.appendArrayWithWildcard(), - ) - - val res = when { - parsed.isSingleUnnamedColumn() -> { - val col = (parsed.getColumn(0) as UnnamedColumn).col - val elementType = col.type - val columnValues = - col.values - .asList() - .splitByIndices(startIndices.asSequence()) - .toList() - DataColumn.createValueColumn( - name = colName, - values = columnValues, - type = List::class.createType(listOf(KTypeProjection.invariant(elementType))), - ) - } - - else -> parsed.unwrapUnnamedColumns().chunkedImpl(startIndices, colName) - } - UnnamedColumn(res) - } - - // Collect the current column name as property from the objects in records - else -> { - val values = ArrayList(records.size) - records.forEach { - when (it) { - is JsonObject -> values.add(it[colName]) - else -> values.add(null) - } - } - - val parsed = fromJsonListArrayAndValueColumns( - records = values, - unifyNumbers = unifyNumbers, - keyValuePaths = keyValuePaths, - jsonPath = jsonPath.append(colName), - ) - when { - parsed.ncol == 0 -> - DataColumn.createValueColumn( - name = colName, - values = arrayOfNulls(values.size).toList(), - type = typeOf(), - ) - - parsed.isSingleUnnamedColumn() -> - (parsed.getColumn(0) as UnnamedColumn).col.rename(colName) - - else -> - DataColumn.createColumnGroup(colName, parsed.unwrapUnnamedColumns()) as AnyCol - } - } - } - } - } - - return when { - columns.isEmpty() -> - DataFrame.empty(records.size) - - columns.size == 1 && hasArray && header.isNotEmpty() && columns[0].typeClass == List::class -> - columns[0] - .cast>() - .splitInto(*header.toTypedArray()) - - else -> - columns.toDataFrame() - } -} - -// we need it to check if AnyFrame created by recursive call has single unnamed column, -// unnamed column means this column is not created from field of a record [{"value": 1}, {"value": 2}], -// but filtered values [1, { ... }, []] -> [1, null, null] -// or arrays: [1, { ...}, []] -> [null, null, []] -private class UnnamedColumn(val col: DataColumn) : DataColumn by col diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt deleted file mode 100644 index d7c069214f..0000000000 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeJson.kt +++ /dev/null @@ -1,417 +0,0 @@ -@file:OptIn(ExperimentalSerializationApi::class) - -package org.jetbrains.kotlinx.dataframe.impl.io - -import kotlinx.serialization.ExperimentalSerializationApi -import kotlinx.serialization.json.Json -import kotlinx.serialization.json.JsonArray -import kotlinx.serialization.json.JsonElement -import kotlinx.serialization.json.JsonObject -import kotlinx.serialization.json.JsonPrimitive -import kotlinx.serialization.json.addAll -import kotlinx.serialization.json.buildJsonArray -import kotlinx.serialization.json.buildJsonObject -import kotlinx.serialization.json.encodeToJsonElement -import kotlinx.serialization.json.put -import kotlinx.serialization.json.putJsonArray -import kotlinx.serialization.json.putJsonObject -import org.jetbrains.kotlinx.dataframe.AnyCol -import org.jetbrains.kotlinx.dataframe.AnyFrame -import org.jetbrains.kotlinx.dataframe.ColumnsContainer -import org.jetbrains.kotlinx.dataframe.DataColumn -import org.jetbrains.kotlinx.dataframe.api.indices -import org.jetbrains.kotlinx.dataframe.api.isList -import org.jetbrains.kotlinx.dataframe.api.rows -import org.jetbrains.kotlinx.dataframe.api.schema -import org.jetbrains.kotlinx.dataframe.api.take -import org.jetbrains.kotlinx.dataframe.columns.CellKind -import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup -import org.jetbrains.kotlinx.dataframe.columns.ColumnKind -import org.jetbrains.kotlinx.dataframe.columns.FrameColumn -import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.COLUMNS -import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.DATA -import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KIND -import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KOTLIN_DATAFRAME -import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.METADATA -import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NCOL -import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NROW -import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.TYPE -import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.TYPES -import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.VERSION -import org.jetbrains.kotlinx.dataframe.io.ARRAY_COLUMN_NAME -import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions -import org.jetbrains.kotlinx.dataframe.io.CustomEncoder -import org.jetbrains.kotlinx.dataframe.io.VALUE_COLUMN_NAME -import org.jetbrains.kotlinx.dataframe.jupyter.KotlinNotebookPluginUtils -import org.jetbrains.kotlinx.dataframe.jupyter.KotlinNotebookPluginUtils.isDataframeConvertable -import org.jetbrains.kotlinx.dataframe.name -import org.jetbrains.kotlinx.dataframe.ncol -import org.jetbrains.kotlinx.dataframe.nrow -import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema -import org.jetbrains.kotlinx.dataframe.typeClass -import java.awt.image.BufferedImage -import java.io.IOException - -// See docs/serialization_format.md for a description of -// serialization versions and format. -internal const val SERIALIZATION_VERSION = "2.1.1" - -internal object SerializationKeys { - const val DATA = "data" - const val METADATA = "metadata" - const val KIND = "kind" - const val NCOL = "ncol" - const val NROW = "nrow" - const val VERSION = "\$version" - const val COLUMNS = "columns" - const val KOTLIN_DATAFRAME = "kotlin_dataframe" - const val TYPE = "type" - const val TYPES = "types" -} - -private val valueTypes = - setOf(Boolean::class, Double::class, Int::class, Float::class, Long::class, Short::class, Byte::class) - -@OptIn(ExperimentalSerializationApi::class) -private fun convert(value: Any?): JsonElement = - when (value) { - is JsonElement -> value - is Number -> JsonPrimitive(value) - is String -> JsonPrimitive(value) - is Char -> JsonPrimitive(value.toString()) - is Boolean -> JsonPrimitive(value) - null -> JsonPrimitive(null) - else -> JsonPrimitive(value.toString()) - } - -internal fun encodeRow(frame: ColumnsContainer<*>, index: Int): JsonObject { - val values: Map = frame.columns().associate { col -> - col.name to when { - col is ColumnGroup<*> -> encodeRow(col, index) - - col is FrameColumn<*> -> encodeFrame(col[index]) - - col.isList() -> { - col[index]?.let { - JsonArray((it as List<*>).map { value -> convert(value) }) - } ?: JsonPrimitive(null) - } - - col.typeClass in valueTypes -> { - val v = col[index] - convert(v) - } - - else -> JsonPrimitive(col[index]?.toString()) - } - } - - if (values.isEmpty()) return buildJsonObject { } - return JsonObject(values) -} - -internal fun encodeRowWithMetadata( - frame: ColumnsContainer<*>, - index: Int, - rowLimit: Int? = null, - customEncoders: List = emptyList(), -): JsonElement? { - val values: List> = frame.columns().map { col -> - when (col) { - is ColumnGroup<*> -> { - val schema = col.schema() - buildJsonObject { - put(DATA, encodeRowWithMetadata(col, index, rowLimit, customEncoders) ?: JsonPrimitive(null)) - putJsonObject(METADATA) { - put(KIND, JsonPrimitive(ColumnKind.Group.toString())) - put(COLUMNS, Json.encodeToJsonElement(schema.columns.keys)) - putJsonArray(TYPES) { - addAll( - schema.columns.values.map { columnSchema -> - createJsonTypeDescriptor(columnSchema) - }, - ) - } - } - } - } - - is FrameColumn<*> -> { - val data = if (rowLimit == null) { - encodeFrameWithMetadata(col[index], null, customEncoders) - } else { - encodeFrameWithMetadata(col[index].take(rowLimit), rowLimit, customEncoders) - } - val schema = col.schema.value - buildJsonObject { - put(DATA, data) - putJsonObject(METADATA) { - put(KIND, JsonPrimitive(ColumnKind.Frame.toString())) - put(COLUMNS, Json.encodeToJsonElement(schema.columns.keys)) - putJsonArray(TYPES) { - addAll( - schema.columns.values.map { columnSchema -> - createJsonTypeDescriptor(columnSchema) - }, - ) - } - put(NCOL, JsonPrimitive(col[index].ncol)) - put(NROW, JsonPrimitive(col[index].nrow)) - } - } - } - - else -> encodeValue(col, index, customEncoders) - }.let { col.name to it } - } - if (values.isEmpty()) return null - return JsonObject(values.toMap()) -} - -internal fun encodeValue(col: AnyCol, index: Int, customEncoders: List = emptyList()): JsonElement { - val matchingEncoder = customEncoders.firstOrNull { it.canEncode(col[index]) } - - return when { - matchingEncoder != null -> matchingEncoder.encode(col[index]) - - col.isList() -> col[index]?.let { list -> - val values = (list as List<*>).map { convert(it) } - JsonArray(values) - } ?: JsonArray(emptyList()) - - col.typeClass in valueTypes -> convert(col[index]) - - else -> JsonPrimitive(col[index]?.toString()) - } -} - -internal class DataframeConvertableEncoder( - private val encoders: List, - private val rowLimit: Int? = null, -) : CustomEncoder { - override fun canEncode(input: Any?): Boolean = isDataframeConvertable(input) - - override fun encode(input: Any?): JsonElement = - input?.let { - val data = encodeFrameWithMetadata( - KotlinNotebookPluginUtils.convertToDataFrame(input), - rowLimit, - encoders, - ) - buildJsonObject { - put(DATA, data) - putJsonObject(METADATA) { - put(KIND, JsonPrimitive(CellKind.DataFrameConvertable.toString())) - } - } - } ?: JsonPrimitive(null) -} - -internal class BufferedImageEncoder(private val options: Base64ImageEncodingOptions) : CustomEncoder { - override fun canEncode(input: Any?): Boolean = input is BufferedImage - - override fun encode(input: Any?): JsonElement = - JsonPrimitive( - input?.let { image -> encodeBufferedImageAsBase64(image as BufferedImage, options) } ?: "", - ) - - private fun encodeBufferedImageAsBase64( - image: BufferedImage, - imageEncodingOptions: Base64ImageEncodingOptions = Base64ImageEncodingOptions(), - ): String = - try { - val preparedImage = if (imageEncodingOptions.isLimitSizeOn) { - image.resizeKeepingAspectRatio(imageEncodingOptions.imageSizeLimit) - } else { - image - } - - val bytes = if (imageEncodingOptions.isGzipOn) { - preparedImage.toByteArray().encodeGzip() - } else { - preparedImage.toByteArray() - } - - bytes.toBase64() - } catch (_: IOException) { - "" - } -} - -private fun createJsonTypeDescriptor(columnSchema: ColumnSchema): JsonObject = - JsonObject( - mutableMapOf(KIND to JsonPrimitive(columnSchema.kind.toString())).also { - if (columnSchema.kind == ColumnKind.Value) { - it[TYPE] = JsonPrimitive(columnSchema.type.toString()) - } - }, - ) - -internal fun encodeFrameWithMetadata( - frame: AnyFrame, - rowLimit: Int? = null, - customEncoders: List = emptyList(), -): JsonArray { - val valueColumn = frame.extractValueColumn() - val arrayColumn = frame.extractArrayColumn() - - val arraysAreFrames = arrayColumn?.kind() == ColumnKind.Frame - - val data = frame.indices().map { rowIndex -> - valueColumn?.get(rowIndex) - ?: arrayColumn?.get(rowIndex)?.let { - if (arraysAreFrames) { - encodeFrameWithMetadata( - it as AnyFrame, - rowLimit, - customEncoders, - ) - } else { - null - } - } - ?: encodeRowWithMetadata(frame, rowIndex, rowLimit, customEncoders) - } - - return buildJsonArray { addAll(data.map { convert(it) }) } -} - -internal fun AnyFrame.extractValueColumn(): DataColumn<*>? { - val allColumns = columns() - - return allColumns.filter { it.name.startsWith(VALUE_COLUMN_NAME) } - .takeIf { isPossibleToFindUnnamedColumns } - ?.maxByOrNull { it.name } - ?.let { valueCol -> - // check that value in this column is not null only when other values are null - if (valueCol.kind() != ColumnKind.Value) { - null - } else { - // check that value in this column is not null only when other values are null - val isValidValueColumn = rows().all { row -> - if (valueCol[row] != null) { - allColumns.all { col -> - if (col.name != valueCol.name) { - col[row] == null - } else { - true - } - } - } else { - true - } - } - if (isValidValueColumn) { - valueCol - } else { - null - } - } - } -} - -// If there is only 1 column, then `isValidValueColumn` always true. -// But at the same time, we shouldn't treat dataFrameOf("value")(1,2,3) like an unnamed column -// because it was created by the user. -internal val AnyFrame.isPossibleToFindUnnamedColumns: Boolean - get() = columns().size != 1 - -internal fun AnyFrame.extractArrayColumn(): DataColumn<*>? { - val allColumns = columns() - - return columns().filter { it.name.startsWith(ARRAY_COLUMN_NAME) } - .takeIf { isPossibleToFindUnnamedColumns } - ?.maxByOrNull { it.name } - ?.let { arrayCol -> - if (arrayCol.kind() == ColumnKind.Group) { - null - } else { - // check that value in this column is not null only when other values are null - val isValidArrayColumn = rows().all { row -> - if (arrayCol[row] != null) { - allColumns.all { col -> - if (col.name != arrayCol.name) { - col[row] == null - } else { - true - } - } - } else { - true - } - } - if (isValidArrayColumn) { - arrayCol - } else { - null - } - } - } -} - -internal fun encodeFrame(frame: AnyFrame): JsonArray { - val valueColumn = frame.extractValueColumn() - val arrayColumn = frame.extractArrayColumn() - - val arraysAreFrames = arrayColumn?.kind() == ColumnKind.Frame - - val data = frame.indices().map { rowIndex -> - when { - valueColumn != null -> valueColumn[rowIndex] - - arrayColumn != null -> arrayColumn[rowIndex]?.let { - if (arraysAreFrames) { - encodeFrame(it as AnyFrame) - } else { - null - } - } - - else -> encodeRow(frame, rowIndex) - } - } - - return buildJsonArray { addAll(data.map { convert(it) }) } -} - -internal fun encodeDataFrameWithMetadata( - frame: AnyFrame, - rowLimit: Int, - nestedRowLimit: Int? = null, - customEncoders: List = emptyList(), -): JsonObject = - buildJsonObject { - put(VERSION, JsonPrimitive(SERIALIZATION_VERSION)) - putJsonObject(METADATA) { - putJsonArray(COLUMNS) { addAll(frame.columnNames().map { JsonPrimitive(it) }) } - putJsonArray(TYPES) { - addAll( - frame.schema().columns.values.map { colSchema -> - createJsonTypeDescriptor(colSchema) - }, - ) - } - put(NROW, JsonPrimitive(frame.rowsCount())) - put(NCOL, JsonPrimitive(frame.columnsCount())) - } - put( - KOTLIN_DATAFRAME, - encodeFrameWithMetadata( - frame = frame.take(rowLimit), - rowLimit = nestedRowLimit, - customEncoders = customEncoders, - ), - ) - } - -@OptIn(ExperimentalSerializationApi::class) -internal fun encodeFrameNoDynamicNestedTables(df: AnyFrame, limit: Int): JsonObject = - buildJsonObject { - put(NROW, df.rowsCount()) - put(NCOL, df.columnsCount()) - putJsonArray(COLUMNS) { addAll(df.columnNames()) } - put( - KOTLIN_DATAFRAME, - encodeFrame(df.take(limit)), - ) - } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/schema/Utils.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/schema/Utils.kt index e1ffbf8ec9..c0cdc57c7e 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/schema/Utils.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/schema/Utils.kt @@ -29,6 +29,10 @@ import kotlin.reflect.typeOf internal fun AnyFrame.extractSchema(): DataFrameSchema = DataFrameSchemaImpl(columns().filter { it.name().isNotEmpty() }.associate { it.name() to it.extractSchema() }) +// helper overload for friend modules +@JvmName("intersectSchemasOverload") +internal fun intersectSchemas(schemas: Iterable): DataFrameSchema = schemas.intersectSchemas() + internal fun Iterable.intersectSchemas(): DataFrameSchema { val collectedTypes = mutableMapOf>() var first = true diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt index 672a869ab4..f5021368c2 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt @@ -17,7 +17,7 @@ import java.net.URL * Opens a stream to [url] to create a [DataFrame] from it. * If the URL is a file URL, the file is read directly. * If the URL is an HTTP URL, it's also read directly, but if the server returns an error code, - * the error response is read as JSON and parsed as [DataFrame] too. + * the error response is read and parsed as [DataFrame] too. * * Public so it may be used in other modules. */ @@ -32,8 +32,8 @@ public fun catchHttpResponse(url: URL, body: (InputStream) -> AnyFrame): AnyFram if (code != 200) { val response = connection.responseMessage try { - // attempt to read error response as JSON - return DataFrame.readJson(connection.errorStream) + // attempt to read error response as dataframe + return DataFrame.read(connection.errorStream).df } catch (_: Exception) { throw RuntimeException("Server returned HTTP response code: $code. Response: $response") } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt index 2e3fa55f80..fc370459af 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt @@ -430,9 +430,17 @@ public fun AnyFrame.writeCSV(writer: Appendable, format: CSVFormat = CSVFormat.D } forEach { val values = it.values.map { - when (it) { - is AnyRow -> it.toJson() - is AnyFrame -> it.toJson() + when (it) { // todo use compileOnly? + is AnyRow -> + error( + "Encountered a DataRow when writing CSV. This needs to be converted to JSON, which is not supported by `writeCSV` anymore. Please use `df.writeCsv()` instead.", + ) + + is AnyFrame -> + error( + "Encountered a DataFrame when writing CSV. This needs to be converted to JSON, which is not supported by `writeCSV` anymore. Please use `df.writeCsv()` instead.", + ) + else -> it } } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt deleted file mode 100644 index 4e3ade50bc..0000000000 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ /dev/null @@ -1,501 +0,0 @@ -package org.jetbrains.kotlinx.dataframe.io - -import kotlinx.serialization.ExperimentalSerializationApi -import kotlinx.serialization.json.Json -import kotlinx.serialization.json.JsonElement -import kotlinx.serialization.json.decodeFromStream -import org.intellij.lang.annotations.Language -import org.jetbrains.kotlinx.dataframe.AnyFrame -import org.jetbrains.kotlinx.dataframe.AnyRow -import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.DataRow -import org.jetbrains.kotlinx.dataframe.api.JsonPath -import org.jetbrains.kotlinx.dataframe.api.KeyValueProperty -import org.jetbrains.kotlinx.dataframe.api.single -import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod -import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadJsonMethod -import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup -import org.jetbrains.kotlinx.dataframe.columns.FrameColumn -import org.jetbrains.kotlinx.dataframe.documentation.UnifyingNumbers -import org.jetbrains.kotlinx.dataframe.impl.io.encodeDataFrameWithMetadata -import org.jetbrains.kotlinx.dataframe.impl.io.encodeFrame -import org.jetbrains.kotlinx.dataframe.impl.io.encodeRow -import org.jetbrains.kotlinx.dataframe.impl.io.readJsonImpl -import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic -import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ANY_COLUMNS -import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS -import org.jetbrains.kotlinx.dataframe.util.READ_JSON -import java.io.File -import java.io.InputStream -import java.net.URL -import kotlin.reflect.typeOf - -public class JSON( - private val typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, - private val keyValuePaths: List = emptyList(), - private val unifyNumbers: Boolean = true, -) : SupportedDataFrameFormat { - - @Deprecated(READ_JSON, level = DeprecationLevel.HIDDEN) - public constructor( - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, - keyValuePaths: List = emptyList(), - ) : this(typeClashTactic, keyValuePaths, true) - - override fun readDataFrame(stream: InputStream, header: List): AnyFrame = - DataFrame.readJson( - stream = stream, - header = header, - typeClashTactic = typeClashTactic, - keyValuePaths = keyValuePaths, - unifyNumbers = unifyNumbers, - ) - - override fun readDataFrame(file: File, header: List): AnyFrame = - DataFrame.readJson( - file = file, - header = header, - typeClashTactic = typeClashTactic, - keyValuePaths = keyValuePaths, - unifyNumbers = unifyNumbers, - ) - - override fun acceptsExtension(ext: String): Boolean = ext == "json" - - override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough - - override val testOrder: Int = 10_000 - - override fun createDefaultReadMethod(pathRepresentation: String?): DefaultReadDfMethod = - DefaultReadJsonMethod( - path = pathRepresentation, - arguments = MethodArguments() - .add( - "keyValuePaths", - typeOf>(), - "listOf(${ - keyValuePaths.joinToString { - "org.jetbrains.kotlinx.dataframe.api.JsonPath(\"\"\"${it.path}\"\"\")" - } - })", - ) - .add( - "typeClashTactic", - typeOf(), - "org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.${typeClashTactic.name}", - ) - .add( - "unifyNumbers", - typeOf(), - unifyNumbers.toString(), - ), - ) - - /** - * Allows the choice of how to handle type clashes when reading a JSON file. - * Such as: - * ```json - * [ - * { "a": "text" }, - * { "a": { "b": 2 } }, - * { "a": [6, 7, 8] } - * ] - * ``` - * - * [ARRAY_AND_VALUE_COLUMNS] (default) will create a [DataFrame] looking like (including `null` and `[]` values): - * ``` - * ⌌----------------------------------------------⌍ - * | | a:{b:Int?, value:String?, array:List}| - * |--|-------------------------------------------| - * | 0| { b:null, value:"text", array:[] }| - * | 1| { b:2, value:null, array:[] }| - * | 2| { b:null, value:null, array:[6, 7, 8] }| - * ⌎----------------------------------------------⌏ - * ``` - * So, for the type clashing argument it will create a [ColumnGroup] with the properties `value`, `array`, - * and the unwrapped properties of the objects the property can be. - * - * [ANY_COLUMNS] will create a [DataFrame] looking like: - * ``` - * ⌌-------------⌍ - * | | a:Any| - * |--|----------| - * | 0| "text"| - * | 1| { b:2 }| - * | 2| [6, 7, 8]| - * ⌎-------------⌏ - * ``` - */ - public enum class TypeClashTactic { - ARRAY_AND_VALUE_COLUMNS, - ANY_COLUMNS, - } -} - -internal const val ARRAY_COLUMN_NAME: String = "array" -internal const val VALUE_COLUMN_NAME: String = "value" - -/** - * @param file Where to fetch the Json as [InputStream] to be converted to a [DataFrame]. - * @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[KeyValueProperty]> - * will be created. - * @param typeClashTactic How to handle type clashes when reading a JSON file. - * @param header Optional list of column names. If given, the file will be read like an object with [header] being the keys. - * @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. - * @return [DataFrame] from the given [file]. - */ -public fun DataFrame.Companion.readJson( - file: File, - header: List = emptyList(), - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, - unifyNumbers: Boolean = true, -): AnyFrame = DataFrame.readJson(file.toURI().toURL(), header, keyValuePaths, typeClashTactic, unifyNumbers) - -/** - * @param file Where to fetch the Json as [InputStream] to be converted to a [DataRow]. - * @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[KeyValueProperty]> - * will be created. - * @param typeClashTactic How to handle type clashes when reading a JSON file. - * @param header Optional list of column names. If given, the file will be read like an object with [header] being the keys. - * @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. - * @return [DataRow] from the given [file]. - */ -public fun DataRow.Companion.readJson( - file: File, - header: List = emptyList(), - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, - unifyNumbers: Boolean = true, -): AnyRow = DataFrame.readJson(file, header, keyValuePaths, typeClashTactic, unifyNumbers).single() - -/** - * @param path URL or file path from where to fetch the Json as [InputStream] to be converted to a [DataFrame]. - * @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[KeyValueProperty]> - * will be created. - * @param typeClashTactic How to handle type clashes when reading a JSON file. - * @param header Optional list of column names. If given, the stream will be read like an object with [header] being the keys. - * @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. - * @return [DataFrame] from the given [path]. - */ -public fun DataFrame.Companion.readJson( - path: String, - header: List = emptyList(), - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, - unifyNumbers: Boolean = true, -): AnyFrame = DataFrame.readJson(asUrl(path), header, keyValuePaths, typeClashTactic, unifyNumbers) - -/** - * @param path URL or file path from where to fetch the Json as [InputStream] to be converted to a [DataRow]. - * @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[KeyValueProperty]> - * will be created. - * @param typeClashTactic How to handle type clashes when reading a JSON file. - * @param header Optional list of column names. If given, the stream will be read like an object with [header] being the keys. - * @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. - * @return [DataRow] from the given [path]. - */ -public fun DataRow.Companion.readJson( - path: String, - header: List = emptyList(), - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, - unifyNumbers: Boolean = true, -): AnyRow = DataFrame.readJson(path, header, keyValuePaths, typeClashTactic, unifyNumbers).single() - -/** - * @param url Where to fetch the Json as [InputStream] to be converted to a [DataFrame]. - * @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[KeyValueProperty]> - * will be created. - * @param typeClashTactic How to handle type clashes when reading a JSON file. - * @param header Optional list of column names. If given, the stream will be read like an object with [header] being the keys. - * @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. - * @return [DataFrame] from the given [url]. - */ -public fun DataFrame.Companion.readJson( - url: URL, - header: List = emptyList(), - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, - unifyNumbers: Boolean = true, -): AnyFrame = catchHttpResponse(url) { DataFrame.readJson(it, header, keyValuePaths, typeClashTactic, unifyNumbers) } - -/** - * @param url Where to fetch the Json as [InputStream] to be converted to a [DataRow]. - * @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[KeyValueProperty]> - * will be created. - * @param typeClashTactic How to handle type clashes when reading a JSON file. - * @param header Optional list of column names. If given, the stream will be read like an object with [header] being the keys. - * @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. - * @return [DataRow] from the given [url]. - */ -public fun DataRow.Companion.readJson( - url: URL, - header: List = emptyList(), - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, - unifyNumbers: Boolean = true, -): AnyRow = DataFrame.readJson(url, header, keyValuePaths, typeClashTactic, unifyNumbers).single() - -/** - * @param stream Json as [InputStream] to be converted to a [DataFrame]. - * @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[KeyValueProperty]> - * will be created. - * @param typeClashTactic How to handle type clashes when reading a JSON file. - * @param header Optional list of column names. If given, [stream] will be read like an object with [header] being the keys. - * @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. - * @return [DataFrame] from the given [stream]. - */ -@OptIn(ExperimentalSerializationApi::class) -public fun DataFrame.Companion.readJson( - stream: InputStream, - header: List = emptyList(), - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, - unifyNumbers: Boolean = true, -): AnyFrame = - readJsonImpl(Json.decodeFromStream(stream), unifyNumbers, header, keyValuePaths, typeClashTactic) - -/** - * @param stream Json as [InputStream] to be converted to a [DataRow]. - * @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[KeyValueProperty]> - * will be created. - * @param typeClashTactic How to handle type clashes when reading a JSON file. - * @param header Optional list of column names. If given, [stream] will be read like an object with [header] being the keys. - * @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. - * @return [DataRow] from the given [stream]. - */ -public fun DataRow.Companion.readJson( - stream: InputStream, - header: List = emptyList(), - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, - unifyNumbers: Boolean = true, -): AnyRow = DataFrame.readJson(stream, header, keyValuePaths, typeClashTactic, unifyNumbers).single() - -/** - * @param text Json as [String] to be converted to a [DataFrame]. - * @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[KeyValueProperty]> - * will be created. - * @param typeClashTactic How to handle type clashes when reading a JSON file. - * @param header Optional list of column names. If given, [text] will be read like an object with [header] being the keys. - * @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. - * @return [DataFrame] from the given [text]. - */ -public fun DataFrame.Companion.readJsonStr( - @Language("json") text: String, - header: List = emptyList(), - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, - unifyNumbers: Boolean = true, -): AnyFrame = readJsonImpl(Json.parseToJsonElement(text), unifyNumbers, header, keyValuePaths, typeClashTactic) - -/** - * @param text Json as [String] to be converted to a [DataRow]. - * @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[KeyValueProperty]> - * will be created. - * @param typeClashTactic How to handle type clashes when reading a JSON file. - * @param header Optional list of column names. If given, [text] will be read like an object with [header] being the keys. - * @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. - * @return [DataRow] from the given [text]. - */ -public fun DataRow.Companion.readJsonStr( - @Language("json") text: String, - header: List = emptyList(), - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, - unifyNumbers: Boolean = true, -): AnyRow = DataFrame.readJsonStr(text, header, keyValuePaths, typeClashTactic, unifyNumbers).single() - -public fun AnyFrame.toJson(prettyPrint: Boolean = false): String { - val json = Json { - this.prettyPrint = prettyPrint - isLenient = true - allowSpecialFloatingPointValues = true - } - return json.encodeToString(JsonElement.serializer(), encodeFrame(this@toJson)) -} - -/** - * Converts the DataFrame to a JSON string representation with additional metadata about serialized data. - * It is heavily used to implement some integration features in Kotlin Notebook IntelliJ IDEA plugin. - * - * @param rowLimit The maximum number of top-level dataframe rows to include in the output JSON. - * @param nestedRowLimit The maximum number of nested frame rows to include in the output JSON. - * If null, all rows are included. - * Applied for each frame column recursively - * @param prettyPrint Specifies whether the output JSON should be formatted with indentation and line breaks. - * @param customEncoders The options for encoding things like images. - * The default is empty list, which indicates that the image is not encoded as Base64. - * - * @return The DataFrame converted to a JSON string with metadata. - */ -public fun AnyFrame.toJsonWithMetadata( - rowLimit: Int, - nestedRowLimit: Int? = null, - prettyPrint: Boolean = false, - customEncoders: List = emptyList(), -): String { - val json = Json { - this.prettyPrint = prettyPrint - isLenient = true - allowSpecialFloatingPointValues = true - } - return json.encodeToString( - JsonElement.serializer(), - encodeDataFrameWithMetadata(this@toJsonWithMetadata, rowLimit, nestedRowLimit, customEncoders), - ) -} - -/** - * Interface for defining a custom encoder. That applied to the value during dataframe JSON serialization - */ -public interface CustomEncoder { - /** - * Determines whether this encoder can encode the given input. - * - * @param input The input object to be checked for suitability. - * @return `true` if the input can be encoded, otherwise `false`. - */ - public fun canEncode(input: Any?): Boolean - - /** - * Encodes the provided input into a JSON element. - * - * @param input The input object to be encoded. - * @return A JsonElement representing the encoded input. - */ - public fun encode(input: Any?): JsonElement -} - -internal const val DEFAULT_IMG_SIZE = 600 - -/** - * Class representing the options for encoding images. - * - * @property imageSizeLimit The maximum size to which images should be resized. Defaults to the value of DEFAULT_IMG_SIZE. - * @property options Bitwise-OR of the [GZIP_ON] and [LIMIT_SIZE_ON] constants. Defaults to [GZIP_ON] or [LIMIT_SIZE_ON]. - */ -public class Base64ImageEncodingOptions( - public val imageSizeLimit: Int = DEFAULT_IMG_SIZE, - private val options: Int = GZIP_ON or LIMIT_SIZE_ON, -) { - public val isGzipOn: Boolean - get() = options and GZIP_ON == GZIP_ON - - public val isLimitSizeOn: Boolean - get() = options and LIMIT_SIZE_ON == LIMIT_SIZE_ON - - public companion object { - public const val ALL_OFF: Int = 0 - public const val GZIP_ON: Int = 1 // 2^0 - public const val LIMIT_SIZE_ON: Int = 2 // 2^1 - } -} - -public fun AnyRow.toJson(prettyPrint: Boolean = false): String { - val json = Json { - this.prettyPrint = prettyPrint - isLenient = true - allowSpecialFloatingPointValues = true - } - return json.encodeToString(JsonElement.serializer(), encodeRow(df(), index())) -} - -public fun AnyFrame.writeJson(file: File, prettyPrint: Boolean = false) { - file.writeText(toJson(prettyPrint)) -} - -public fun AnyFrame.writeJson(path: String, prettyPrint: Boolean = false): Unit = writeJson(File(path), prettyPrint) - -public fun AnyFrame.writeJson(writer: Appendable, prettyPrint: Boolean = false) { - writer.append(toJson(prettyPrint)) -} - -public fun AnyRow.writeJson(file: File, prettyPrint: Boolean = false) { - file.writeText(toJson(prettyPrint)) -} - -public fun AnyRow.writeJson(path: String, prettyPrint: Boolean = false) { - writeJson(File(path), prettyPrint) -} - -public fun AnyRow.writeJson(writer: Appendable, prettyPrint: Boolean = false) { - writer.append(toJson(prettyPrint)) -} - -// region deprecations - -/** Here for binary compatibility. */ -@Deprecated(message = READ_JSON, level = DeprecationLevel.HIDDEN) -public fun DataFrame.Companion.readJson( - file: File, - header: List = emptyList(), - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyFrame = DataFrame.readJson(file, header, keyValuePaths, typeClashTactic, true) - -/** Here for binary compatibility. */ -@Deprecated(message = READ_JSON, level = DeprecationLevel.HIDDEN) -public fun DataRow.Companion.readJson( - file: File, - header: List = emptyList(), - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyRow = DataRow.readJson(file, header, keyValuePaths, typeClashTactic, true) - -/** Here for binary compatibility. */ -@Deprecated(message = READ_JSON, level = DeprecationLevel.HIDDEN) -public fun DataFrame.Companion.readJson( - stream: InputStream, - header: List = emptyList(), - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyFrame = DataFrame.readJson(stream, header, keyValuePaths, typeClashTactic, true) - -/** Here for binary compatibility. */ -@Deprecated(message = READ_JSON, level = DeprecationLevel.HIDDEN) -public fun DataRow.Companion.readJson( - stream: InputStream, - header: List = emptyList(), - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyRow = DataRow.readJson(stream, header, keyValuePaths, typeClashTactic, true) - -/** Here for binary compatibility. */ -@Deprecated(message = READ_JSON, level = DeprecationLevel.HIDDEN) -public fun DataFrame.Companion.readJson( - url: URL, - header: List = emptyList(), - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyFrame = DataFrame.readJson(url, header, keyValuePaths, typeClashTactic, true) - -/** Here for binary compatibility. */ -@Deprecated(message = READ_JSON, level = DeprecationLevel.HIDDEN) -public fun DataRow.Companion.readJson( - url: URL, - header: List = emptyList(), - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyRow = DataRow.readJson(url, header, keyValuePaths, typeClashTactic, true) - -/** Here for binary compatibility. */ -@Deprecated(message = READ_JSON, level = DeprecationLevel.HIDDEN) -public fun DataFrame.Companion.readJsonStr( - @Language("json") text: String, - header: List = emptyList(), - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyFrame = DataFrame.readJsonStr(text, header, keyValuePaths, typeClashTactic, true) - -/** Here for binary compatibility. */ -@Deprecated(message = READ_JSON, level = DeprecationLevel.HIDDEN) -public fun DataRow.Companion.readJsonStr( - @Language("json") text: String, - header: List = emptyList(), - keyValuePaths: List = emptyList(), - typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS, -): AnyRow = DataRow.readJsonStr(text, header, keyValuePaths, typeClashTactic, true) - -// endregion diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/Utils.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/Utils.kt index 3865c36343..9be2b8e52f 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/Utils.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/Utils.kt @@ -1,8 +1,5 @@ package org.jetbrains.kotlinx.dataframe -import kotlinx.serialization.json.Json -import kotlinx.serialization.json.JsonObject -import kotlinx.serialization.json.jsonObject import org.jetbrains.kotlinx.dataframe.api.print import org.jetbrains.kotlinx.dataframe.api.schema import org.jetbrains.kotlinx.dataframe.io.renderToString @@ -31,5 +28,3 @@ fun > T.alsoDebug(println: String? = null, rowsLimit: Int = 20) print(borders = true, title = true, columnTypes = true, valueLimit = -1, rowsLimit = rowsLimit) schema().print() } - -fun parseJsonStr(jsonStr: String): JsonObject = Json.parseToJsonElement(jsonStr).jsonObject diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt deleted file mode 100644 index fe119fc414..0000000000 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt +++ /dev/null @@ -1,180 +0,0 @@ -package org.jetbrains.kotlinx.dataframe.io - -import io.kotest.matchers.shouldBe -import io.kotest.matchers.string.shouldContain -import kotlinx.serialization.json.JsonObject -import kotlinx.serialization.json.jsonArray -import kotlinx.serialization.json.jsonObject -import kotlinx.serialization.json.jsonPrimitive -import org.jetbrains.kotlinx.dataframe.api.dataFrameOf -import org.jetbrains.kotlinx.dataframe.impl.io.BufferedImageEncoder -import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KOTLIN_DATAFRAME -import org.jetbrains.kotlinx.dataframe.impl.io.resizeKeepingAspectRatio -import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions.Companion.ALL_OFF -import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions.Companion.GZIP_ON -import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions.Companion.LIMIT_SIZE_ON -import org.jetbrains.kotlinx.dataframe.parseJsonStr -import org.jetbrains.kotlinx.dataframe.testResource -import org.junit.Test -import org.junit.runner.RunWith -import org.junit.runners.Parameterized -import java.awt.image.BufferedImage -import java.io.ByteArrayInputStream -import java.io.ByteArrayOutputStream -import java.io.File -import java.util.Base64 -import java.util.zip.GZIPInputStream -import javax.imageio.ImageIO - -@RunWith(Parameterized::class) -class ImageSerializationTests(private val encodingOptions: Base64ImageEncodingOptions?) { - @Test - fun `serialize images as base64`() { - val images = readImagesFromResources() - val json = encodeImagesAsJson(images, encodingOptions) - - if (encodingOptions == DISABLED) { - checkImagesEncodedAsToString(json, images.size) - return - } - - val decodedImages = decodeImagesFromJson(json, images.size, encodingOptions) - - for ((decodedImage, original) in decodedImages.zip(images)) { - val expectedImage = resizeIfNeeded(original, encodingOptions) - isImagesIdentical(decodedImage, expectedImage, 2) shouldBe true - } - } - - private fun readImagesFromResources(): List { - val dir = File(testResource("imgs").path) - - return dir.listFiles()?.map { file -> - try { - ImageIO.read(file) - } catch (ex: Exception) { - throw IllegalArgumentException("Error reading ${file.name}: ${ex.message}") - } - } ?: emptyList() - } - - private fun encodeImagesAsJson( - images: List, - encodingOptions: Base64ImageEncodingOptions?, - ): JsonObject { - val df = dataFrameOf(listOf("imgs"), images) - val jsonStr = df.toJsonWithMetadata( - 20, - nestedRowLimit = 20, - customEncoders = listOfNotNull(encodingOptions?.let { BufferedImageEncoder(encodingOptions) }), - ) - - return parseJsonStr(jsonStr) - } - - private fun checkImagesEncodedAsToString(json: JsonObject, numImgs: Int) { - for (i in 0.. { - val result = mutableListOf() - for (i in 0.. decompressGzip(Base64.getDecoder().decode(imgString)) - else -> Base64.getDecoder().decode(imgString) - } - - private fun decompressGzip(input: ByteArray): ByteArray = - ByteArrayOutputStream().use { byteArrayOutputStream -> - GZIPInputStream(input.inputStream()).use { inputStream -> - inputStream.copyTo(byteArrayOutputStream) - } - byteArrayOutputStream.toByteArray() - } - - private fun resizeIfNeeded(image: BufferedImage, encodingOptions: Base64ImageEncodingOptions): BufferedImage = - when { - !encodingOptions.isLimitSizeOn -> image - else -> image.resizeKeepingAspectRatio(encodingOptions.imageSizeLimit) - } - - private fun createImageFromBytes(bytes: ByteArray): BufferedImage { - val bais = ByteArrayInputStream(bytes) - return ImageIO.read(bais) - } - - private fun isImagesIdentical(img1: BufferedImage, img2: BufferedImage, allowedDelta: Int): Boolean { - // First check dimensions - if (img1.width != img2.width || img1.height != img2.height) { - return false - } - - // Then check each pixel - for (y in 0 until img1.height) { - for (x in 0 until img1.width) { - val rgb1 = img1.getRGB(x, y) - val rgb2 = img2.getRGB(x, y) - - val r1 = (rgb1 shr 16) and 0xFF - val g1 = (rgb1 shr 8) and 0xFF - val b1 = rgb1 and 0xFF - - val r2 = (rgb2 shr 16) and 0xFF - val g2 = (rgb2 shr 8) and 0xFF - val b2 = rgb2 and 0xFF - - val diff = kotlin.math.abs(r1 - r2) + kotlin.math.abs(g1 - g2) + kotlin.math.abs(b1 - b2) - - // If the difference in color components exceed our allowance return false - if (diff > allowedDelta) { - return false - } - } - } - - // If no exceeding difference was found, the images are identical within our allowedDelta - return true - } - - companion object { - private val DEFAULT = Base64ImageEncodingOptions() - private val GZIP_ON_RESIZE_OFF = Base64ImageEncodingOptions(options = GZIP_ON) - private val GZIP_OFF_RESIZE_OFF = Base64ImageEncodingOptions(options = ALL_OFF) - private val GZIP_ON_RESIZE_TO_700 = - Base64ImageEncodingOptions(imageSizeLimit = 700, options = GZIP_ON or LIMIT_SIZE_ON) - private val DISABLED = null - - @JvmStatic - @Parameterized.Parameters - fun imageEncodingOptionsToTest(): Collection = - listOf( - DEFAULT, - GZIP_ON_RESIZE_OFF, - GZIP_OFF_RESIZE_OFF, - GZIP_ON_RESIZE_TO_700, - null, - ) - } -} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt deleted file mode 100644 index baff3b703f..0000000000 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ /dev/null @@ -1,1144 +0,0 @@ -package org.jetbrains.kotlinx.dataframe.io - -import io.kotest.assertions.throwables.shouldNotThrowAny -import io.kotest.assertions.throwables.shouldThrow -import io.kotest.matchers.collections.shouldBeIn -import io.kotest.matchers.shouldBe -import io.kotest.matchers.string.shouldContain -import io.kotest.matchers.string.shouldNotContain -import io.kotest.matchers.types.instanceOf -import io.kotest.matchers.types.shouldBeInstanceOf -import kotlinx.serialization.json.Json -import kotlinx.serialization.json.JsonElement -import kotlinx.serialization.json.boolean -import kotlinx.serialization.json.int -import kotlinx.serialization.json.jsonArray -import kotlinx.serialization.json.jsonObject -import kotlinx.serialization.json.jsonPrimitive -import org.intellij.lang.annotations.Language -import org.jetbrains.kotlinx.dataframe.AnyFrame -import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.DataRow -import org.jetbrains.kotlinx.dataframe.alsoDebug -import org.jetbrains.kotlinx.dataframe.api.JsonPath -import org.jetbrains.kotlinx.dataframe.api.allNulls -import org.jetbrains.kotlinx.dataframe.api.colsOf -import org.jetbrains.kotlinx.dataframe.api.columnsCount -import org.jetbrains.kotlinx.dataframe.api.convert -import org.jetbrains.kotlinx.dataframe.api.dataFrameOf -import org.jetbrains.kotlinx.dataframe.api.forEach -import org.jetbrains.kotlinx.dataframe.api.getColumnGroup -import org.jetbrains.kotlinx.dataframe.api.getFrameColumn -import org.jetbrains.kotlinx.dataframe.api.schema -import org.jetbrains.kotlinx.dataframe.api.toFloat -import org.jetbrains.kotlinx.dataframe.api.toMap -import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup -import org.jetbrains.kotlinx.dataframe.columns.ColumnKind -import org.jetbrains.kotlinx.dataframe.columns.FrameColumn -import org.jetbrains.kotlinx.dataframe.columns.ValueColumn -import org.jetbrains.kotlinx.dataframe.impl.io.SERIALIZATION_VERSION -import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.COLUMNS -import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.DATA -import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KIND -import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KOTLIN_DATAFRAME -import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.METADATA -import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NCOL -import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NROW -import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.VERSION -import org.jetbrains.kotlinx.dataframe.impl.io.readJsonImpl -import org.jetbrains.kotlinx.dataframe.impl.nothingType -import org.jetbrains.kotlinx.dataframe.impl.nullableNothingType -import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ANY_COLUMNS -import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS -import org.jetbrains.kotlinx.dataframe.parseJsonStr -import org.jetbrains.kotlinx.dataframe.testJson -import org.jetbrains.kotlinx.dataframe.type -import org.jetbrains.kotlinx.dataframe.values -import org.junit.Test -import kotlin.Double -import kotlin.reflect.typeOf - -@Suppress("ktlint:standard:argument-list-wrapping") -class JsonTests { - - @Test - fun `parse json array with header`() { - @Language("json") - val json = - """ - [ - [1, "a"], - [2, "b"], - [3, "c"], - [4, "d"], - [5, "e"] - ] - """.trimIndent() - val df = DataFrame.readJsonStr(json, header = listOf("numbers", "letters")) - .alsoDebug() - - df.columnsCount() shouldBe 2 - df.rowsCount() shouldBe 5 - df["numbers"].type() shouldBe typeOf() - df["letters"].type() shouldBe typeOf() - df["numbers"].values() shouldBe listOf(1, 2, 3, 4, 5) - df["letters"].values() shouldBe listOf("a", "b", "c", "d", "e") - } - - @Test - fun `parse json array with header Any`() { - @Language("json") - val json = - """ - [ - [1, "a"], - [2, "b"], - [3, "c"], - [4, "d"], - [5, "e"] - ] - """.trimIndent() - val df = DataFrame.readJsonStr(json, header = listOf("numbers", "letters"), typeClashTactic = ANY_COLUMNS) - .alsoDebug() - - df.columnsCount() shouldBe 2 - df.rowsCount() shouldBe 5 - df["numbers"].type() shouldBe typeOf() - df["letters"].type() shouldBe typeOf() - df["numbers"].values() shouldBe listOf(1, 2, 3, 4, 5) - df["letters"].values() shouldBe listOf("a", "b", "c", "d", "e") - } - - @Test - fun parseJson1() { - @Language("json") - val json = - """ - [ - {"a":1, "b":"text"}, - {"a":2, "b":5, "c":4.5} - ] - """.trimIndent() - val df = DataFrame.readJsonStr(json).alsoDebug() - df.columnsCount() shouldBe 3 - df.rowsCount() shouldBe 2 - df["a"].type() shouldBe typeOf() - df["b"].type() shouldBe typeOf>() - df["c"].type() shouldBe typeOf() - } - - @Test - fun parseJson1Any() { - @Language("json") - val json = - """ - [ - {"a":1, "b":"text"}, - {"a":2, "b":5, "c":4.5} - ] - """.trimIndent() - val df = DataFrame.readJsonStr(json, typeClashTactic = ANY_COLUMNS).alsoDebug() - df.columnsCount() shouldBe 3 - df.rowsCount() shouldBe 2 - df["a"].type() shouldBe typeOf() - df["b"].type() shouldBe typeOf>() - df["c"].type() shouldBe typeOf() - } - - @Test - fun parseJson2() { - @Language("json") - val json = - """ - [ - {"a":"text"}, - {"a":{"b":2}}, - {"a":[6,7,8]} - ] - """.trimIndent() - val df = DataFrame.readJsonStr(json).alsoDebug() - df.columnsCount() shouldBe 1 - df.rowsCount() shouldBe 3 - val group = df["a"] as ColumnGroup<*> - group.columnsCount() shouldBe 3 - group["b"].type() shouldBe typeOf() - group["value"].type() shouldBe typeOf() - group["array"].type() shouldBe typeOf>() - } - - @Test - fun parseJson2Any() { - @Language("json") - val json = - """ - [ - {"a":"text"}, - {"a":{"b":2}}, - {"a":[6,7,8]} - ] - """.trimIndent() - val df = DataFrame.readJsonStr(json, typeClashTactic = ANY_COLUMNS).alsoDebug() - df.columnsCount() shouldBe 1 - df.rowsCount() shouldBe 3 - val a = df["a"] as ValueColumn<*> - a.type() shouldBe typeOf() - a[0] shouldBe "text" - (a[1] as DataRow<*>)["b"] shouldBe 2 - a[2] shouldBe listOf(6, 7, 8) - } - - @Test - fun parseJson3() { - @Language("json") - val json = - """ - [ - {"a":[3, 5]}, - {}, - {"a":[3.4, 5.6]} - ] - """.trimIndent() - val df = DataFrame.readJsonStr(json).alsoDebug() - df.columnsCount() shouldBe 1 - df.rowsCount() shouldBe 3 - df["a"].type() shouldBe typeOf>() - df[1]["a"] shouldBe emptyList() - } - - @Test - fun parseJson3Any() { - @Language("json") - val json = - """ - [ - {"a":[3, 5]}, - {}, - {"a":[3.4, 5.6]} - ] - """.trimIndent() - val df = DataFrame.readJsonStr(json, typeClashTactic = ANY_COLUMNS).alsoDebug() - df.columnsCount() shouldBe 1 - df.rowsCount() shouldBe 3 - df["a"].type() shouldBe typeOf>() - df[1]["a"] shouldBe emptyList() - } - - @Test - fun parseJson4() { - @Language("json") - val json = - """ - [ - {"a":[ {"b":2}, {"c":3} ]}, - {"a":[ {"b":4.0}, {"d":5} ]} - ] - """.trimIndent() - val df = DataFrame.readJsonStr(json).alsoDebug() - df.columnsCount() shouldBe 1 - df.rowsCount() shouldBe 2 - val group = df["a"] as FrameColumn<*> - group[0].alsoDebug().let { - it.columnsCount() shouldBe 3 - it.rowsCount() shouldBe 2 - it["b"].type() shouldBe typeOf() - it["c"].type() shouldBe typeOf() - it["d"].type() shouldBe typeOf() - it["b"].values.toList() shouldBe listOf(2, null) - it["c"].values.toList() shouldBe listOf(null, 3) - it["d"].values.toList() shouldBe listOf(null, null) - } - - group[1].alsoDebug().let { - it.columnsCount() shouldBe 3 - it.rowsCount() shouldBe 2 - it["b"].type() shouldBe typeOf() - it["c"].type() shouldBe typeOf() - it["d"].type() shouldBe typeOf() - it["b"].values.toList() shouldBe listOf(4, null) - it["c"].values.toList() shouldBe listOf(null, null) - it["d"].values.toList() shouldBe listOf(null, 5) - } - } - - @Test - fun parseJson4Any() { - @Language("json") - val json = - """ - [ - {"a":[ {"b":2}, {"c":3} ]}, - {"a":[ {"b":4}, {"d":5} ]} - ] - """.trimIndent() - val df = DataFrame.readJsonStr(json, typeClashTactic = ANY_COLUMNS).alsoDebug() - df.columnsCount() shouldBe 1 - df.rowsCount() shouldBe 2 - val group = df["a"] as FrameColumn<*> - group[0].alsoDebug().let { - it.columnsCount() shouldBe 3 - it.rowsCount() shouldBe 2 - it["b"].type() shouldBe typeOf() - it["c"].type() shouldBe typeOf() - it["d"].type() shouldBe typeOf() - it["b"].values.toList() shouldBe listOf(2, null) - it["c"].values.toList() shouldBe listOf(null, 3) - it["d"].values.toList() shouldBe listOf(null, null) - } - - group[1].alsoDebug().let { - it.columnsCount() shouldBe 3 - it.rowsCount() shouldBe 2 - it["b"].type() shouldBe typeOf() - it["c"].type() shouldBe typeOf() - it["d"].type() shouldBe typeOf() - it["b"].values.toList() shouldBe listOf(4, null) - it["c"].values.toList() shouldBe listOf(null, null) - it["d"].values.toList() shouldBe listOf(null, 5) - } - } - - @Test - fun `json and number unification`() { - @Language("json") - val json = - """ - [ - {"a":1}, - {"a":2.0}, - {"a":3}, - {"a":4.5} - ] - """.trimIndent() - val df1 = DataFrame.readJsonStr(json, unifyNumbers = true).alsoDebug() - df1.columnsCount() shouldBe 1 - df1.rowsCount() shouldBe 4 - df1["a"].type() shouldBe typeOf() - df1["a"].values.toList() shouldBe listOf(1.0, 2.0, 3.0, 4.5) - - val df2 = DataFrame.readJsonStr(json, unifyNumbers = false).alsoDebug() - df2.columnsCount() shouldBe 1 - df2.rowsCount() shouldBe 4 - df2["a"].type() shouldBe typeOf() - df2["a"].values.toList() shouldBe listOf(1, 2.0f, 3, 4.5f) - } - - @Test - fun `parse json with nested json array with mixed values`() { - @Language("json") - val json = - """ - [ - {"a":"text"}, - {"a":{"b":2}}, - {"a":[6, {"a": "b"}, [1, {"a" : "b"}],8]}, - {"a":[{"a": "b"}, {"a" : "c"}, {"a" : "d"}]} - ] - """.trimIndent() - val df = DataFrame.readJsonStr(json).alsoDebug() - df.columnsCount() shouldBe 1 - df.rowsCount() shouldBe 4 - val group = df["a"] as ColumnGroup<*> - group.columnsCount() shouldBe 3 - group["b"].type() shouldBe typeOf() - group["value"].type() shouldBe typeOf() - group["array"].type() shouldBe typeOf>() - val nestedDf = group.getFrameColumn("array")[2] - nestedDf["a"].type() shouldBe typeOf() - nestedDf["value"].type() shouldBe typeOf() - nestedDf["array"].type() shouldBe typeOf>() - group.getFrameColumn("array")[3].alsoDebug().let { - it.columnsCount() shouldBe 3 - it.rowsCount() shouldBe 3 - it["a"].type() shouldBe typeOf() - it["a"].values.toList() shouldBe listOf("b", "c", "d") - } - } - - @Test - fun `parse json with nested json array with mixed values Any`() { - @Language("json") - val json = - """ - [ - {"a":"text"}, - {"a":{"b":2}}, - {"a":[6, {"a": "b"}, [1, {"a" : "b"}],8]}, - {"a":[{"a": "b"}, {"a" : "c"}, {"a" : "d"}]} - ] - """.trimIndent() - val df = DataFrame.readJsonStr(json, typeClashTactic = ANY_COLUMNS).alsoDebug() - df.columnsCount() shouldBe 1 - df.rowsCount() shouldBe 4 - val a = df["a"] as ValueColumn<*> - a.type() shouldBe typeOf() - a[0] shouldBe "text" - (a[1] as DataRow<*>).let { - it.columnsCount() shouldBe 1 - it["b"] shouldBe 2 - } - (a[2] as List<*>).let { - it[0] shouldBe 6 - (it[1] as DataRow<*>).let { - it.columnsCount() shouldBe 1 - it["a"] shouldBe "b" - } - (it[2] as List<*>).let { - it[0] shouldBe 1 - (it[1] as DataRow<*>).let { - it.columnsCount() shouldBe 1 - it["a"] shouldBe "b" - } - } - it[3] shouldBe 8 - } - (a[3] as DataFrame<*>) - .alsoDebug() - .let { - it.columnsCount() shouldBe 1 - it.rowsCount() shouldBe 3 - it["a"].type() shouldBe typeOf() - it["a"].values.toList() shouldBe listOf("b", "c", "d") - } - } - - @Test - fun `write df with primitive types`() { - val df = dataFrameOf("colInt", "colDouble?", "colBoolean?")( - 1, 1.0, true, - 2, null, false, - 3, 3.0, null, - ).alsoDebug("df:") - - val res = DataFrame.readJsonStr(df.toJson()).alsoDebug("res:") - res shouldBe df.convert { colsOf() }.toFloat() - } - - @Test - fun `write df with primitive types Any`() { - val df = dataFrameOf("colInt", "colDouble?", "colBoolean?")( - 1, 1.0, true, - 2, null, false, - 3, 3.0, null, - ).alsoDebug("df:") - - val res = - DataFrame.readJsonStr(df.toJson(), typeClashTactic = ANY_COLUMNS).alsoDebug("res:") - res shouldBe df.convert { colsOf() }.toFloat() - } - - @Test - fun `NaN double serialization Any`() { - val df = dataFrameOf("v")(1.1, Double.NaN) - df["v"].type() shouldBe typeOf() - - val df2 = DataFrame.readJsonStr(df.toJson(), typeClashTactic = ANY_COLUMNS) - df2["v"].type() shouldBe typeOf() - df2 shouldBe df.convert("v").toFloat() - } - - @Test - fun `NaN float serialization`() { - val df = dataFrameOf("v")(1.1f, Float.NaN) - df["v"].type() shouldBe typeOf() - val actual = DataFrame.readJsonStr(df.toJson()).convert("v").toFloat() - actual shouldBe df - } - - @Test - fun `NaN float serialization Any`() { - val df = dataFrameOf("v")(1.1f, Float.NaN) - df["v"].type() shouldBe typeOf() - val actual = DataFrame.readJsonStr(df.toJson(), typeClashTactic = ANY_COLUMNS).convert("v").toFloat() - actual shouldBe df - } - - @Test - fun `NaN string serialization`() { - val df = dataFrameOf("v")("NaM", "NaN") - df["v"].type() shouldBe typeOf() - DataFrame.readJsonStr(df.toJson()) shouldBe df - } - - @Test - fun `NaN string serialization Any`() { - val df = dataFrameOf("v")("NaM", "NaN") - df["v"].type() shouldBe typeOf() - DataFrame.readJsonStr(df.toJson(), typeClashTactic = ANY_COLUMNS) shouldBe df - } - - @Test - fun `list serialization`() { - val df = dataFrameOf("a")(listOf(1, 2, 3)) - DataFrame.readJsonStr(df.toJson()) shouldBe df - } - - @Test - fun `list serialization Any`() { - val df = dataFrameOf("a")(listOf(1, 2, 3)) - DataFrame.readJsonStr(df.toJson(), typeClashTactic = ANY_COLUMNS) shouldBe df - } - - @Test - fun `list serialization with nulls`() { - val df = dataFrameOf("a")(listOf(1, 2, 3), null) - val text = df.toJson() - val df1 = DataFrame.readJsonStr(text) - df1["a"][1] shouldBe emptyList() - } - - @Test - fun `list serialization with nulls Any`() { - val df = dataFrameOf("a")(listOf(1, 2, 3), null) - val text = df.toJson() - val df1 = DataFrame.readJsonStr(text, typeClashTactic = ANY_COLUMNS) - df1["a"][1] shouldBe emptyList() - } - - @Test - fun `serialize column with name 'value'`() { - val df = dataFrameOf("a")(dataFrameOf("value")(1, 2, 3)) - - @Language("json") - val json = df.toJson() - json shouldContain "\"value\":1" - val df1 = DataFrame.readJsonStr(json) - df shouldBe df1 - } - - @Test - fun `literal json field named 'value'`() { - @Language("json") - val json = - """ - { - "data": { - "source": { - "value": "123" - } - } - } - """.trimIndent() - val df = DataFrame.readJsonStr(json) - df[0].getColumnGroup("data").getColumnGroup("source")["value"] shouldBe "123" - } - - @Test - fun `array json field named 'value'`() { - @Language("json") - val json = """{ "value": ["123"] }""" - - val df = DataFrame.readJsonStr(json).alsoDebug() - df[0]["value"] shouldBe listOf("123") - } - - @Test - fun `record json field named 'value'`() { - @Language("json") - val json = """{ "value": { "test" : "123" } }""" - - val df = DataFrame.readJsonStr(json) - df[0].getColumnGroup("value")["test"] shouldBe "123" - } - - @Test - fun `json field named 'array'`() { - @Language("json") - val json = - """ - { - "data": { - "source": { - "array": "123" - } - } - } - """.trimIndent() - - val df = DataFrame.readJsonStr(json) - df[0].getColumnGroup("data").getColumnGroup("source")["array"] shouldBe "123" - } - - @Test - fun `array json field named 'array'`() { - @Language("json") - val json = - """ - [{ - "a": { - "value": "text", - "array": [] - } - }, { - "a": { - "b": 2, - "array": [] - } - }, { - "a": { - "array": [6, 7, 8] - } - }] - """.trimIndent() - - val df = DataFrame.readJsonStr(json).alsoDebug() - val group = df.getColumnGroup("a") - group["array"].type() shouldBe typeOf>() - group["value"].type() shouldBe typeOf() - group["b"].type() shouldBe typeOf() - } - - @Test - fun `value field name clash`() { - @Language("json") - val json = - """ - [ - {"a":"text", "c": 1}, - {"a":{"b":2,"value":1.0, "array": null, "array1":12}}, - {"a":[6,7,8]}, - null - ] - """.trimIndent() - val df = DataFrame.readJsonStr(json).alsoDebug() - df.columnsCount() shouldBe 2 - df.rowsCount() shouldBe 4 - df["c"].type() shouldBe typeOf() - val group = df["a"] as ColumnGroup<*> - group.columnsCount() shouldBe 6 - group["b"].type() shouldBe typeOf() - group["value"].type() shouldBe typeOf() - group["value1"].type() shouldBe typeOf() - group["array"].type() shouldBe nothingType(nullable = true) - - val schema = df.schema().toString() - schema shouldContain "Nothing?" - schema shouldNotContain "Void?" - - group["array1"].type() shouldBe typeOf() - group["array2"].type() shouldBe typeOf>() - } - - @Test - fun `value field (no) name clash Any`() { - @Language("json") - val json = - """ - [ - {"a":"text", "c": 1}, - {"a":{"b":2,"value":1.0, "array": null, "array1":12}}, - {"a":[6,7,8]}, - null - ] - """.trimIndent() - val df = DataFrame.readJsonStr(json, typeClashTactic = ANY_COLUMNS).alsoDebug() - df.columnsCount() shouldBe 2 - df.rowsCount() shouldBe 4 - val c = df["c"] as ValueColumn<*> - c.type() shouldBe typeOf() - c[0] shouldBe 1 - c[1..3].allNulls() shouldBe true - val a = df["a"] as ValueColumn<*> - a.type() shouldBe typeOf() - a[0] shouldBe "text" - (a[1] as DataRow<*>).let { - it.columnsCount() shouldBe 4 - it["b"] shouldBe 2 - it["value"] shouldBe 1.0 - it["array"] shouldBe null - it["array1"] shouldBe 12 - } - a[2] shouldBe listOf(6, 7, 8) - a[3] shouldBe null - } - - @Test - fun `objects with null Any`() { - @Language("json") - val json = - """ - [ - {"a":{"b":1}}, - {"a":{"b":2}}, - {"a":{"b": null}}, - {"a": {}}, - {"a": null}, - {}, - null - ] - """.trimIndent() - val df = DataFrame.readJsonStr(json, typeClashTactic = ANY_COLUMNS).alsoDebug() - df.columnsCount() shouldBe 1 - df.rowsCount() shouldBe 7 - val a = df["a"] as ColumnGroup<*> - a.columnsCount() shouldBe 1 - a["b"].let { - it.type() shouldBe typeOf() - it[0] shouldBe 1 - it[1] shouldBe 2 - it[2..6].allNulls() shouldBe true - } - } - - @Test - fun `primitive arrays with null Any`() { - @Language("json") - val json = - """ - [ - {"a":[1,2,3]}, - {"a":[null]}, - {"a":[]}, - {"a": null}, - {}, - null - ] - """.trimIndent() - val df = DataFrame.readJsonStr(json, typeClashTactic = ANY_COLUMNS).alsoDebug() - df.columnsCount() shouldBe 1 - df.rowsCount() shouldBe 6 - val a = df["a"] as ValueColumn<*> - a.type shouldBe typeOf>() - a[0] shouldBe listOf(1, 2, 3) - a[1] shouldBe listOf(null) - a[2..5].forEach { - it shouldBe emptyList() - } - } - - @Test - fun `non-primitive arrays with null Any`() { - @Language("json") - val json = - """ - [ - {"a":[null, null]}, - {"a":[{"b" : 1},{"b": 2}]}, - {"a":[]}, - {"a": null}, - {}, - null - ] - """.trimIndent() - val df = DataFrame.readJsonStr(json, typeClashTactic = ANY_COLUMNS).alsoDebug() - df.columnsCount() shouldBe 1 - df.rowsCount() shouldBe 6 - val a = df["a"] as FrameColumn<*> - a[0].let { - it.columnsCount() shouldBe 1 - it.rowsCount() shouldBe 2 - it["b"].let { - it.type() shouldBe typeOf() - it[0] shouldBe null - it[1] shouldBe null - } - } - a[1].let { - it.columnsCount() shouldBe 1 - it.rowsCount() shouldBe 2 - it["b"].let { - it.type() shouldBe typeOf() - it[0] shouldBe 1 - it[1] shouldBe 2 - } - } - a[2..5].forEach { - it.columnsCount() shouldBe 0 - it.rowsCount() shouldBe 0 - } - } - - @Test - fun `Listification test Array Value`() { - @Language("json") - val json = - """ - [ - {"a":[1,2,3]}, - {"a":null}, - {"a":1} - ] - """.trimIndent() - val df = DataFrame.readJsonStr( - text = json, - typeClashTactic = ARRAY_AND_VALUE_COLUMNS, - keyValuePaths = listOf(JsonPath()), - ).alsoDebug() - } - - @Test - fun `Listification test Any column`() { - @Language("json") - val json = - """ - [ - {"a":[1,2,3]}, - {"a":null}, - {"a":1} - ] - """.trimIndent() - val df = DataFrame.readJsonStr( - text = json, - typeClashTactic = ANY_COLUMNS, - keyValuePaths = listOf(JsonPath()), - ).alsoDebug() - } - - @Test - fun `KeyValue property Array Value`() { - @Language("json") - val json = - """ - [ - {"a":{"b":1}}, - {"a":{"c": 2, "d": null, "b":[1, 2, 3]}}, - {"a":{}}, - {"a": null}, - {}, - null - ] - """.trimIndent() - - // before - val noKeyValue = DataFrame.readJsonStr(json, typeClashTactic = ARRAY_AND_VALUE_COLUMNS) - .alsoDebug() - -// ⌌-------------------------------------------------------⌍ -// | | a:{b:{value:Int?, array:List}, c:Int?, d:Any?}| -// |--|----------------------------------------------------| -// | 0| { b:{ value:1, array:[] }, c:null, d:null }| -// | 1| { b:{ value:null, array:[1, 2, 3] }, c:2, d:null }| -// | 2| { b:{ value:null, array:[] }, c:null, d:null }| -// | 3| { b:{ value:null, array:[] }, c:null, d:null }| -// | 4| { b:{ value:null, array:[] }, c:null, d:null }| -// | 5| { b:{ value:null, array:[] }, c:null, d:null }| -// ⌎-------------------------------------------------------⌏ - noKeyValue.columnsCount() shouldBe 1 - noKeyValue.rowsCount() shouldBe 6 - noKeyValue["a"].also { - it shouldBe instanceOf>() - it as ColumnGroup<*> - - it["b"].type() shouldBe typeOf>() - it["b"]["value"].type() shouldBe typeOf() - it["b"]["array"].type() shouldBe typeOf>() - it["c"].type() shouldBe typeOf() - it["d"].type() shouldBe nothingType(nullable = true) - - it[0].let { - (it["b"] as DataRow<*>).toMap() shouldBe mapOf("value" to 1, "array" to emptyList()) - it["c"] shouldBe null - it["d"] shouldBe null - } - it[1].let { - (it["b"] as DataRow<*>).toMap() shouldBe mapOf("value" to null, "array" to listOf(1, 2, 3)) - it["c"] shouldBe 2 - it["d"] shouldBe null - } - (it as ColumnGroup<*>)[2..5].forEach { - it.let { - (it["b"] as DataRow<*>).toMap() shouldBe mapOf("value" to null, "array" to emptyList()) - it["c"] shouldBe null - it["d"] shouldBe null - } - } - } - - // $["a"] should be read as keyValue - val keyValuePaths = listOf( - JsonPath().append("a"), - ) - - // after - val withKeyValue = - DataFrame.readJsonStr(json, keyValuePaths = keyValuePaths, typeClashTactic = ARRAY_AND_VALUE_COLUMNS) - .alsoDebug() - .also { - it["a"][1].let { it as AnyFrame }.alsoDebug() - } -// ⌌------------------------------⌍ -// | | a:[key:String, value:Any?]| -// |--|---------------------------| -// | 0| [1 x 2] { key:b, value:1 }| -// | 1| [3 x 2]| -> { key:c, value:2 } -// | 2| [0 x 2]| { key:d, value:null } -// | 3| [0 x 2]| { key:b, value:[1,2,3] } -// | 4| [0 x 2]| -// | 5| [0 x 2]| -// ⌎------------------------------⌏ - - withKeyValue.columnsCount() shouldBe 1 - withKeyValue.rowsCount() shouldBe 6 - withKeyValue["a"].also { - it shouldBe instanceOf>() - it as FrameColumn<*> - - it[0].let { - it.columnsCount() shouldBe 2 - it.rowsCount() shouldBe 1 - it["key"].let { - it.type() shouldBe typeOf() - it[0] shouldBe "b" - } - it["value"].let { - it.type() shouldBe typeOf() // tightened by values, but Int? is also valid of course - it[0] shouldBe 1 - } - } - it[1].let { - it.columnsCount() shouldBe 2 - it.rowsCount() shouldBe 3 - it["key"].let { - it.type() shouldBe typeOf() - it[0] shouldBe "c" - it[1] shouldBe "d" - } - it["value"].let { - it.type() shouldBe typeOf() - it[0] shouldBe 2 - it[1] shouldBe null - } - } - it[2..5].forEach { - it.columnsCount() shouldBe 2 - it.rowsCount() shouldBe 0 - - it["key"].type() shouldBe typeOf() - it["value"].type() shouldBeIn listOf(typeOf(), typeOf()) // no data, so Any(?) ValueColumn - } - } - } - - @Test - fun `KeyValue property Any`() { // TODO needs more tests - @Language("json") - val json = - """ - [ - {"a":{"b": 1}}, - {"a":{"c": 2, "d": null, "b":[1, 2, 3]}}, - {"a":{}}, - {"a": null}, - {}, - null - ] - """.trimIndent() - - // before - val noKeyValue = DataFrame.readJsonStr(json, typeClashTactic = ANY_COLUMNS) - .alsoDebug() - -// ⌌------------------------------⌍ -// | | a:{b:Any?, c:Int?, d:Any?}| -// |--|---------------------------| -// | 0| { b:1 }| -// | 1|{ b:[1,2,3], c:2, d: null }| -// | 2| { }| -// | 3| { }| -// | 4| { }| -// | 5| { }| -// ⌎------------------------------⌏ - noKeyValue.columnsCount() shouldBe 1 - noKeyValue.rowsCount() shouldBe 6 - noKeyValue["a"].also { - it shouldBe instanceOf>() - it as ColumnGroup<*> - - it["b"].type() shouldBe typeOf() - it["c"].type() shouldBe typeOf() - it["d"].type() shouldBe nullableNothingType - - it[0].toMap() shouldBe mapOf("b" to 1, "c" to null, "d" to null) - it[1].toMap() shouldBe mapOf("b" to listOf(1, 2, 3), "c" to 2, "d" to null) - (it as ColumnGroup<*>)[2..5].forEach { - it.toMap() shouldBe mapOf("b" to null, "c" to null, "d" to null) - } - } - - // $["a"] should be read as keyValue - val keyValuePaths = listOf( - JsonPath().append("a"), - ) - - // after - val withKeyValue = DataFrame.readJsonStr( - text = json, - keyValuePaths = keyValuePaths, - typeClashTactic = ANY_COLUMNS, - ).alsoDebug().also { - it["a"][1].let { it as AnyFrame }.alsoDebug() - } - -// ⌌------------------------------⌍ -// | | a:[key:String, value:Any?]| -// |--|---------------------------| -// | 0| [1 x 2] { key:b, value:1 }| -// | 1| [3 x 2]| -> { key:c, value:2 } -// | 2| [0 x 2]| { key:d, value:null } -// | 3| [0 x 2]| { key:b, value:[1,2,3] } -// | 4| [0 x 2]| -// | 5| [0 x 2]| -// ⌎------------------------------⌏ - withKeyValue.columnsCount() shouldBe 1 - withKeyValue.rowsCount() shouldBe 6 - withKeyValue["a"].also { - it shouldBe instanceOf>() - it as FrameColumn<*> - - it[0].let { - it.columnsCount() shouldBe 2 - it.rowsCount() shouldBe 1 - it["key"].let { - it.type() shouldBe typeOf() - it[0] shouldBe "b" - } - it["value"].let { - it.type() shouldBe typeOf() // tightened by values, but Int? is also valid of course - it[0] shouldBe 1 - } - } - it[1].let { - it.columnsCount() shouldBe 2 - it.rowsCount() shouldBe 3 - it["key"].let { - it.type() shouldBe typeOf() - it[0] shouldBe "c" - it[1] shouldBe "d" - } - it["value"].let { - it.type() shouldBe typeOf() - it[0] shouldBe 2 - it[1] shouldBe null - it[2] shouldBe listOf(1, 2, 3) - } - } - it[2..5].forEach { - it.columnsCount() shouldBe 2 - it.rowsCount() shouldBe 0 - - it["key"].type() shouldBe typeOf() - it["value"].type() shouldBeIn listOf(typeOf(), typeOf()) // no data, so Any(?) ValueColumn - } - } - } - - @Test - fun `nulls in columns should be encoded explicitly`() { - val df = dataFrameOf("a", "b")("1", null, "2", 12) - df.toJson() shouldContain "\"b\":null" -// df.toJson(canonical = true) shouldContain "\"b\":null" - } - - @Test - fun `json with metadata flat table`() { - @Language("json") - val data = - """ - [{"id":3602279,"node_id":"MDEwOlJlcG9zaXRvcnkzNjAyMjc5","name":"kotlin-web-demo","full_name":"JetBrains/kotlin-web-demo"}] - """.trimIndent() - val df = DataFrame.readJsonStr(data) - val jsonStr = df.toJsonWithMetadata(df.rowsCount()).trimIndent() - val json = parseJsonStr(jsonStr) - - json[VERSION]!!.jsonPrimitive.content shouldBe SERIALIZATION_VERSION - - val metadata = json[METADATA]!!.jsonObject - metadata[NROW]!!.jsonPrimitive.int shouldBe 1 - metadata[NCOL]!!.jsonPrimitive.int shouldBe 4 - val columns = metadata[COLUMNS]!!.jsonArray.map { it.jsonPrimitive.content } - columns shouldBe listOf("id", "node_id", "name", "full_name") - - val decodedData = json[KOTLIN_DATAFRAME]!!.jsonArray - val decodedDf = DataFrame.readJsonStr(decodedData.toString()) - decodedDf shouldBe df - } - - @Test - fun `json with metadata column group`() { - @Language("json") - val data = - """ - [{"permissions":{"admin":false,"maintain":false,"push":false,"triage":false,"pull":true}}] - """.trimIndent() - val df = DataFrame.readJsonStr(data) - val jsonStr = df.toJsonWithMetadata(df.rowsCount()).trimIndent() - val json = parseJsonStr(jsonStr) - - val row = json[KOTLIN_DATAFRAME]!!.jsonArray[0].jsonObject - - val permissions = row["permissions"]!!.jsonObject - val metadata = permissions[METADATA]!!.jsonObject - metadata[KIND]!!.jsonPrimitive.content shouldBe ColumnKind.Group.toString() - - val decodedData = permissions[DATA]!!.jsonObject - - decodedData["admin"]!!.jsonPrimitive.boolean shouldBe false - decodedData["maintain"]!!.jsonPrimitive.boolean shouldBe false - decodedData["push"]!!.jsonPrimitive.boolean shouldBe false - decodedData["triage"]!!.jsonPrimitive.boolean shouldBe false - decodedData["pull"]!!.jsonPrimitive.boolean shouldBe true - } - - @Test - fun `json with metadata frame column`() { - val df = DataFrame.readJson(testJson("repositories")) - val jsonStr = df.toJsonWithMetadata(df.rowsCount()).trimIndent() - val json = parseJsonStr(jsonStr) - val row = json[KOTLIN_DATAFRAME]!!.jsonArray[0].jsonObject - - val contributors = row["contributors"]!!.jsonObject - - val metadata = contributors[METADATA]!!.jsonObject - metadata[KIND]!!.jsonPrimitive.content shouldBe ColumnKind.Frame.toString() - metadata[NCOL]!!.jsonPrimitive.int shouldBe 8 - metadata[NROW]!!.jsonPrimitive.int shouldBe 29 - - val decodedData = contributors[DATA]!!.jsonArray - decodedData.size shouldBe 29 - - val decodedDf = DataFrame.readJsonStr(decodedData.toString()) - decodedDf shouldBe df[0]["contributors"] as AnyFrame - } - - @Test - fun `json with metadata test row limit`() { - val df = DataFrame.readJson(testJson("repositories")) - val nestedFrameRowLimit = 20 - val jsonStr = df.toJsonWithMetadata(df.rowsCount(), nestedFrameRowLimit).trimIndent() - val json = parseJsonStr(jsonStr) - val row = json[KOTLIN_DATAFRAME]!!.jsonArray[0].jsonObject - - val contributors = row["contributors"]!!.jsonObject - - val metadata = contributors[METADATA]!!.jsonObject - metadata[KIND]!!.jsonPrimitive.content shouldBe ColumnKind.Frame.toString() - metadata[NCOL]!!.jsonPrimitive.int shouldBe 8 - metadata[NROW]!!.jsonPrimitive.int shouldBe 29 - - val decodedData = contributors[DATA]!!.jsonArray - decodedData.size shouldBe nestedFrameRowLimit - } - - @Test - fun `serialize column with list of objects`() { - val df = dataFrameOf("col")(Regex(".+").findAll("abc").toList()) - val json = shouldNotThrowAny { df.toJson() }!! - val list = DataFrame.readJsonStr(json)["col"][0].shouldBeInstanceOf>() - list[0].shouldBeInstanceOf() - } - - @Test - fun `serialize column with list of primitives`() { - val df = dataFrameOf("col")(listOf(1, 2, 3)) - val json = df.toJson() - DataFrame.readJsonStr(json) shouldBe df - } - - @Test - fun `parse invalid literal`() { - // https://github.com/Kotlin/kotlinx.serialization/issues/2511 - val json = Json.decodeFromString("""[jetbrains, jetbrains-youtrack, youtrack, youtrack-api]""") - shouldThrow { - readJsonImpl(json, true, emptyList()) - } - } -} diff --git a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeDelim.kt b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeDelim.kt index 930118e737..86a5e5aab0 100644 --- a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeDelim.kt +++ b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeDelim.kt @@ -74,8 +74,22 @@ internal fun writeDelimImpl( df.forEach { val values = it.values().map { when (it) { - is AnyRow -> it.toJson() - is AnyFrame -> it.toJson() + is AnyRow -> try { + it.toJson() + } catch (_: NoClassDefFoundError) { + error( + "Encountered a DataFrame when writing to csv/tsv/delim. This needs to be converted to JSON, so the dataframe-json dependency is required.", + ) + } + + is AnyFrame -> try { + it.toJson() + } catch (_: NoClassDefFoundError) { + error( + "Encountered a DataRow when writing to csv/tsv/delim. This needs to be converted to JSON, so the dataframe-json dependency is required.", + ) + } + else -> it } } diff --git a/dataframe-csv/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/DelimCsvTsvTests.kt b/dataframe-csv/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/DelimCsvTsvTests.kt index f303e5028c..153e26553f 100644 --- a/dataframe-csv/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/DelimCsvTsvTests.kt +++ b/dataframe-csv/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/DelimCsvTsvTests.kt @@ -796,6 +796,22 @@ class DelimCsvTsvTests { } } + @Test + fun `json dependency test`() { + val df = dataFrameOf("firstName", "lastName")( + "John", "Doe", + "Jane", "Doe", + ).group { "firstName" and "lastName" }.into { "name" } + + df.toCsvStr(quote = '\'') shouldBe + """ + name + '{"firstName":"John","lastName":"Doe"}' + '{"firstName":"Jane","lastName":"Doe"}' + + """.trimIndent() + } + companion object { private val irisDataset = testCsv("irisDataset") private val simpleCsv = testCsv("testCSV") diff --git a/dataframe-json/build.gradle.kts b/dataframe-json/build.gradle.kts index 40f1822cf1..c359c6f9b9 100644 --- a/dataframe-json/build.gradle.kts +++ b/dataframe-json/build.gradle.kts @@ -8,7 +8,6 @@ plugins { alias(kover) alias(ktlint) alias(kodex) - alias(buildconfig) alias(binary.compatibility.validator) } } diff --git a/dataframe-jupyter/build.gradle.kts b/dataframe-jupyter/build.gradle.kts index 428e8763b4..8170f5c84f 100644 --- a/dataframe-jupyter/build.gradle.kts +++ b/dataframe-jupyter/build.gradle.kts @@ -7,7 +7,6 @@ plugins { alias(kover) alias(ktlint) alias(jupyter.api) - alias(buildconfig) alias(binary.compatibility.validator) } } From a7c5d2bfc4278c4236d163b7c6abe1da2d1c3d1b Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Sat, 26 Apr 2025 15:52:20 +0200 Subject: [PATCH 07/15] fixing tests for dataframe-json --- dataframe-json/build.gradle.kts | 4 +- .../dataframe/io/ImageSerializationTests.kt | 2 +- .../jetbrains/kotlinx/dataframe/io/json.kt | 40 +++++++++--------- .../src/test/resources/imgs/img1.jpg | Bin .../src/test/resources/imgs/img2.jpg | Bin .../src/test/resources/imgs/img3.jpg | Bin .../src/test/resources/repositories.json | 1 + 7 files changed, 22 insertions(+), 25 deletions(-) rename {core => dataframe-json}/src/test/resources/imgs/img1.jpg (100%) rename {core => dataframe-json}/src/test/resources/imgs/img2.jpg (100%) rename {core => dataframe-json}/src/test/resources/imgs/img3.jpg (100%) create mode 100644 dataframe-json/src/test/resources/repositories.json diff --git a/dataframe-json/build.gradle.kts b/dataframe-json/build.gradle.kts index c359c6f9b9..be083b175c 100644 --- a/dataframe-json/build.gradle.kts +++ b/dataframe-json/build.gradle.kts @@ -7,7 +7,6 @@ plugins { alias(serialization) alias(kover) alias(ktlint) - alias(kodex) alias(binary.compatibility.validator) } } @@ -27,12 +26,11 @@ dependencies { implementation(libs.serialization.json) implementation(libs.sl4j) + testImplementation(kotlin("test")) testImplementation(libs.junit) testImplementation(libs.kotestAssertions) { exclude("org.jetbrains.kotlin", "kotlin-stdlib-jdk8") } - testImplementation(libs.kotlin.scriptingJvm) - testImplementation(libs.jsoup) testImplementation(libs.sl4jsimple) } diff --git a/dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt b/dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt index 21f1750e12..9657dd0c44 100644 --- a/dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt +++ b/dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt @@ -13,7 +13,6 @@ import org.jetbrains.kotlinx.dataframe.impl.io.resizeKeepingAspectRatio import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions.Companion.ALL_OFF import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions.Companion.GZIP_ON import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions.Companion.LIMIT_SIZE_ON -import org.junit.Test import org.junit.runner.RunWith import org.junit.runners.Parameterized import java.awt.image.BufferedImage @@ -24,6 +23,7 @@ import java.util.Base64 import java.util.zip.GZIPInputStream import javax.imageio.ImageIO import kotlin.math.abs +import kotlin.test.Test @RunWith(Parameterized::class) class ImageSerializationTests(private val encodingOptions: Base64ImageEncodingOptions?) { diff --git a/dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt index fd3ded5fe0..7978cf6430 100644 --- a/dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt +++ b/dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -49,13 +49,10 @@ import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.VERSION import org.jetbrains.kotlinx.dataframe.impl.io.readJsonImpl import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ANY_COLUMNS import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS -import org.jetbrains.kotlinx.dataframe.type -import org.jetbrains.kotlinx.dataframe.values -import org.junit.Test import java.net.URL -import kotlin.Double import kotlin.reflect.KType import kotlin.reflect.typeOf +import kotlin.test.Test @Suppress("ktlint:standard:argument-list-wrapping") class JsonTests { @@ -242,9 +239,9 @@ class JsonTests { it["b"].type() shouldBe typeOf() it["c"].type() shouldBe typeOf() it["d"].type() shouldBe typeOf() - it["b"].values.toList() shouldBe listOf(2, null) - it["c"].values.toList() shouldBe listOf(null, 3) - it["d"].values.toList() shouldBe listOf(null, null) + it["b"].values().toList() shouldBe listOf(2, null) + it["c"].values().toList() shouldBe listOf(null, 3) + it["d"].values().toList() shouldBe listOf(null, null) } group[1].alsoDebug().let { @@ -253,9 +250,9 @@ class JsonTests { it["b"].type() shouldBe typeOf() it["c"].type() shouldBe typeOf() it["d"].type() shouldBe typeOf() - it["b"].values.toList() shouldBe listOf(4, null) - it["c"].values.toList() shouldBe listOf(null, null) - it["d"].values.toList() shouldBe listOf(null, 5) + it["b"].values().toList() shouldBe listOf(4, null) + it["c"].values().toList() shouldBe listOf(null, null) + it["d"].values().toList() shouldBe listOf(null, 5) } } @@ -279,9 +276,9 @@ class JsonTests { it["b"].type() shouldBe typeOf() it["c"].type() shouldBe typeOf() it["d"].type() shouldBe typeOf() - it["b"].values.toList() shouldBe listOf(2, null) - it["c"].values.toList() shouldBe listOf(null, 3) - it["d"].values.toList() shouldBe listOf(null, null) + it["b"].values().toList() shouldBe listOf(2, null) + it["c"].values().toList() shouldBe listOf(null, 3) + it["d"].values().toList() shouldBe listOf(null, null) } group[1].alsoDebug().let { @@ -290,9 +287,9 @@ class JsonTests { it["b"].type() shouldBe typeOf() it["c"].type() shouldBe typeOf() it["d"].type() shouldBe typeOf() - it["b"].values.toList() shouldBe listOf(4, null) - it["c"].values.toList() shouldBe listOf(null, null) - it["d"].values.toList() shouldBe listOf(null, 5) + it["b"].values().toList() shouldBe listOf(4, null) + it["c"].values().toList() shouldBe listOf(null, null) + it["d"].values().toList() shouldBe listOf(null, 5) } } @@ -312,13 +309,13 @@ class JsonTests { df1.columnsCount() shouldBe 1 df1.rowsCount() shouldBe 4 df1["a"].type() shouldBe typeOf() - df1["a"].values.toList() shouldBe listOf(1.0, 2.0, 3.0, 4.5) + df1["a"].values().toList() shouldBe listOf(1.0, 2.0, 3.0, 4.5) val df2 = DataFrame.readJsonStr(json, unifyNumbers = false).alsoDebug() df2.columnsCount() shouldBe 1 df2.rowsCount() shouldBe 4 df2["a"].type() shouldBe typeOf() - df2["a"].values.toList() shouldBe listOf(1, 2.0f, 3, 4.5f) + df2["a"].values().toList() shouldBe listOf(1, 2.0f, 3, 4.5f) } @Test @@ -349,7 +346,7 @@ class JsonTests { it.columnsCount() shouldBe 3 it.rowsCount() shouldBe 3 it["a"].type() shouldBe typeOf() - it["a"].values.toList() shouldBe listOf("b", "c", "d") + it["a"].values().toList() shouldBe listOf("b", "c", "d") } } @@ -396,7 +393,7 @@ class JsonTests { it.columnsCount() shouldBe 1 it.rowsCount() shouldBe 3 it["a"].type() shouldBe typeOf() - it["a"].values.toList() shouldBe listOf("b", "c", "d") + it["a"].values().toList() shouldBe listOf("b", "c", "d") } } @@ -696,7 +693,7 @@ class JsonTests { df.columnsCount() shouldBe 1 df.rowsCount() shouldBe 6 val a = df["a"] as ValueColumn<*> - a.type shouldBe typeOf>() + a.type() shouldBe typeOf>() a[0] shouldBe listOf(1, 2, 3) a[1] shouldBe listOf(null) a[2..5].forEach { @@ -1135,6 +1132,7 @@ class JsonTests { @Test fun `parse invalid literal`() { // https://github.com/Kotlin/kotlinx.serialization/issues/2511 + @Suppress("JsonStandardCompliance") val json = Json.decodeFromString("""[jetbrains, jetbrains-youtrack, youtrack, youtrack-api]""") shouldThrow { readJsonImpl(json, true, emptyList()) diff --git a/core/src/test/resources/imgs/img1.jpg b/dataframe-json/src/test/resources/imgs/img1.jpg similarity index 100% rename from core/src/test/resources/imgs/img1.jpg rename to dataframe-json/src/test/resources/imgs/img1.jpg diff --git a/core/src/test/resources/imgs/img2.jpg b/dataframe-json/src/test/resources/imgs/img2.jpg similarity index 100% rename from core/src/test/resources/imgs/img2.jpg rename to dataframe-json/src/test/resources/imgs/img2.jpg diff --git a/core/src/test/resources/imgs/img3.jpg b/dataframe-json/src/test/resources/imgs/img3.jpg similarity index 100% rename from core/src/test/resources/imgs/img3.jpg rename to dataframe-json/src/test/resources/imgs/img3.jpg diff --git a/dataframe-json/src/test/resources/repositories.json b/dataframe-json/src/test/resources/repositories.json new file mode 100644 index 0000000000..078eb98a38 --- /dev/null +++ b/dataframe-json/src/test/resources/repositories.json @@ -0,0 +1 @@ +[{"contributors":[{"login":"satamas","id":5521317,"node_id":"MDQ6VXNlcjU1MjEzMTc=","gravatar_id":"","url":"https://api.github.com/users/satamas","type":"User","site_admin":false,"contributions":998},{"login":"NataliaUkhorskaya","id":968879,"node_id":"MDQ6VXNlcjk2ODg3OQ==","gravatar_id":"","url":"https://api.github.com/users/NataliaUkhorskaya","type":"User","site_admin":false,"contributions":371},{"login":"AlexanderPrendota","id":10503748,"node_id":"MDQ6VXNlcjEwNTAzNzQ4","gravatar_id":"","url":"https://api.github.com/users/AlexanderPrendota","type":"User","site_admin":false,"contributions":190},{"login":"svtk","id":1447386,"node_id":"MDQ6VXNlcjE0NDczODY=","gravatar_id":"","url":"https://api.github.com/users/svtk","type":"User","site_admin":false,"contributions":53},{"login":"zarechenskiy","id":3757088,"node_id":"MDQ6VXNlcjM3NTcwODg=","gravatar_id":"","url":"https://api.github.com/users/zarechenskiy","type":"User","site_admin":false,"contributions":18},{"login":"abreslav","id":888318,"node_id":"MDQ6VXNlcjg4ODMxOA==","gravatar_id":"","url":"https://api.github.com/users/abreslav","type":"User","site_admin":false,"contributions":13},{"login":"yole","id":46553,"node_id":"MDQ6VXNlcjQ2NTUz","gravatar_id":"","url":"https://api.github.com/users/yole","type":"User","site_admin":false,"contributions":11},{"login":"zoobestik","id":242514,"node_id":"MDQ6VXNlcjI0MjUxNA==","gravatar_id":"","url":"https://api.github.com/users/zoobestik","type":"User","site_admin":false,"contributions":5},{"login":"ilya-g","id":4257577,"node_id":"MDQ6VXNlcjQyNTc1Nzc=","gravatar_id":"","url":"https://api.github.com/users/ilya-g","type":"User","site_admin":false,"contributions":5},{"login":"pTalanov","id":442640,"node_id":"MDQ6VXNlcjQ0MjY0MA==","gravatar_id":"","url":"https://api.github.com/users/pTalanov","type":"User","site_admin":false,"contributions":4},{"login":"bashor","id":485321,"node_id":"MDQ6VXNlcjQ4NTMyMQ==","gravatar_id":"","url":"https://api.github.com/users/bashor","type":"User","site_admin":false,"contributions":3},{"login":"nikpachoo","id":3338311,"node_id":"MDQ6VXNlcjMzMzgzMTE=","gravatar_id":"","url":"https://api.github.com/users/nikpachoo","type":"User","site_admin":false,"contributions":3},{"login":"udalov","id":292714,"node_id":"MDQ6VXNlcjI5MjcxNA==","gravatar_id":"","url":"https://api.github.com/users/udalov","type":"User","site_admin":false,"contributions":2},{"login":"anton-bannykh","id":1115872,"node_id":"MDQ6VXNlcjExMTU4NzI=","gravatar_id":"","url":"https://api.github.com/users/anton-bannykh","type":"User","site_admin":false,"contributions":2},{"login":"rayshade","id":5259872,"node_id":"MDQ6VXNlcjUyNTk4NzI=","gravatar_id":"","url":"https://api.github.com/users/rayshade","type":"User","site_admin":false,"contributions":2},{"login":"yu-ishicawa","id":843678,"node_id":"MDQ6VXNlcjg0MzY3OA==","gravatar_id":"","url":"https://api.github.com/users/yu-ishicawa","type":"User","site_admin":false,"contributions":2},{"login":"gildor","id":186017,"node_id":"MDQ6VXNlcjE4NjAxNw==","gravatar_id":"","url":"https://api.github.com/users/gildor","type":"User","site_admin":false,"contributions":1},{"login":"AndreOnCrypto","id":3066457,"node_id":"MDQ6VXNlcjMwNjY0NTc=","gravatar_id":"","url":"https://api.github.com/users/AndreOnCrypto","type":"User","site_admin":false,"contributions":1},{"login":"DipanshKhandelwal","id":24923974,"node_id":"MDQ6VXNlcjI0OTIzOTc0","gravatar_id":"","url":"https://api.github.com/users/DipanshKhandelwal","type":"User","site_admin":false,"contributions":1},{"login":"dsavvinov","id":6999635,"node_id":"MDQ6VXNlcjY5OTk2MzU=","gravatar_id":"","url":"https://api.github.com/users/dsavvinov","type":"User","site_admin":false,"contributions":1},{"login":"Noia","id":397736,"node_id":"MDQ6VXNlcjM5NzczNg==","gravatar_id":"","url":"https://api.github.com/users/Noia","type":"User","site_admin":false,"contributions":1},{"login":"gzoritchak","id":1110254,"node_id":"MDQ6VXNlcjExMTAyNTQ=","gravatar_id":"","url":"https://api.github.com/users/gzoritchak","type":"User","site_admin":false,"contributions":1},{"login":"Harmitage","id":44910736,"node_id":"MDQ6VXNlcjQ0OTEwNzM2","gravatar_id":"","url":"https://api.github.com/users/Harmitage","type":"User","site_admin":false,"contributions":1},{"login":"JLLeitschuh","id":1323708,"node_id":"MDQ6VXNlcjEzMjM3MDg=","gravatar_id":"","url":"https://api.github.com/users/JLLeitschuh","type":"User","site_admin":false,"contributions":1},{"login":"dalinaum","id":145585,"node_id":"MDQ6VXNlcjE0NTU4NQ==","gravatar_id":"","url":"https://api.github.com/users/dalinaum","type":"User","site_admin":false,"contributions":1},{"login":"robstoll","id":5557885,"node_id":"MDQ6VXNlcjU1NTc4ODU=","gravatar_id":"","url":"https://api.github.com/users/robstoll","type":"User","site_admin":false,"contributions":1},{"login":"tginsberg","id":432945,"node_id":"MDQ6VXNlcjQzMjk0NQ==","gravatar_id":"","url":"https://api.github.com/users/tginsberg","type":"User","site_admin":false,"contributions":1},{"login":"joeldudleyr3","id":24230167,"node_id":"MDQ6VXNlcjI0MjMwMTY3","gravatar_id":"","url":"https://api.github.com/users/joeldudleyr3","type":"User","site_admin":false,"contributions":1},{"login":"ligi","id":111600,"node_id":"MDQ6VXNlcjExMTYwMA==","gravatar_id":"","url":"https://api.github.com/users/ligi","type":"User","site_admin":false,"contributions":1}]}] From 85642f5196bfbc2ee0d13bb4293031f9d17c6414 Mon Sep 17 00:00:00 2001 From: Ilya Muradyan Date: Sat, 26 Apr 2025 18:36:21 +0200 Subject: [PATCH 08/15] Fix build: avoid same-named classes in different modules --- .../org/jetbrains/kotlinx/dataframe/io/{json.kt => jsonTests.kt} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/{json.kt => jsonTests.kt} (100%) diff --git a/dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/jsonTests.kt similarity index 100% rename from dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt rename to dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/jsonTests.kt From 6ab959dae71185ad8598ca3a0c436ab2028ef467 Mon Sep 17 00:00:00 2001 From: Ilya Muradyan Date: Sat, 26 Apr 2025 19:04:56 +0200 Subject: [PATCH 09/15] Fix build: parametrized tests for JUnit5 --- dataframe-json/build.gradle.kts | 6 ++++-- .../dataframe/io/ImageSerializationTests.kt | 16 +++++++--------- gradle/libs.versions.toml | 1 + 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/dataframe-json/build.gradle.kts b/dataframe-json/build.gradle.kts index be083b175c..efb141d6f3 100644 --- a/dataframe-json/build.gradle.kts +++ b/dataframe-json/build.gradle.kts @@ -26,8 +26,10 @@ dependencies { implementation(libs.serialization.json) implementation(libs.sl4j) - testImplementation(kotlin("test")) - testImplementation(libs.junit) + testImplementation(libs.kotlin.test) + testImplementation(libs.junit.jupiter) + testImplementation(libs.junit.jupiter.engine) + testImplementation(libs.junit.jupiter.params) testImplementation(libs.kotestAssertions) { exclude("org.jetbrains.kotlin", "kotlin-stdlib-jdk8") } diff --git a/dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt b/dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt index 9657dd0c44..7b9cc5ce42 100644 --- a/dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt +++ b/dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ImageSerializationTests.kt @@ -13,8 +13,8 @@ import org.jetbrains.kotlinx.dataframe.impl.io.resizeKeepingAspectRatio import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions.Companion.ALL_OFF import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions.Companion.GZIP_ON import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions.Companion.LIMIT_SIZE_ON -import org.junit.runner.RunWith -import org.junit.runners.Parameterized +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.MethodSource import java.awt.image.BufferedImage import java.io.ByteArrayInputStream import java.io.ByteArrayOutputStream @@ -23,12 +23,11 @@ import java.util.Base64 import java.util.zip.GZIPInputStream import javax.imageio.ImageIO import kotlin.math.abs -import kotlin.test.Test -@RunWith(Parameterized::class) -class ImageSerializationTests(private val encodingOptions: Base64ImageEncodingOptions?) { - @Test - fun `serialize images as base64`() { +class ImageSerializationTests { + @ParameterizedTest + @MethodSource("imageEncodingOptionsToTest") + fun `serialize images as base64`(encodingOptions: Base64ImageEncodingOptions?) { val images = readImagesFromResources() val json = encodeImagesAsJson(images, encodingOptions) @@ -166,8 +165,7 @@ class ImageSerializationTests(private val encodingOptions: Base64ImageEncodingOp private val DISABLED = null @JvmStatic - @Parameterized.Parameters - fun imageEncodingOptionsToTest(): Collection = + fun imageEncodingOptionsToTest(): List = listOf( DEFAULT, GZIP_ON_RESIZE_OFF, diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 7b439ff968..734e835640 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -95,6 +95,7 @@ junit = { group = "junit", name = "junit", version.ref = "junit" } junit-bom = { group = "org.junit", name = "junit-bom", version.ref = "junit-jupiter" } junit-jupiter = { group = "org.junit.jupiter", name = "junit-jupiter", version.ref = "junit-jupiter" } junit-jupiter-engine = { group = "org.junit.jupiter", name = "junit-jupiter-engine", version.ref = "junit-jupiter" } +junit-jupiter-params = { group = "org.junit.jupiter", name = "junit-jupiter-params", version.ref = "junit-jupiter" } junit-platform-commons = { group = "org.junit.platform", name = "junit-platform-commons", version.ref = "junit-platform" } junit-platform-launcher = { group = "org.junit.platform", name = "junit-platform-launcher", version.ref = "junit-platform" } junit-platform-runner = { group = "org.junit.platform", name = "junit-platform-runner", version.ref = "junit-platform" } From 42be12faf32601c75215cb2855be83f1e20f540b Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Sat, 26 Apr 2025 19:18:15 +0200 Subject: [PATCH 10/15] renamed JsonTests --- .../jetbrains/kotlinx/dataframe/io/{jsonTests.kt => JsonTests.kt} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/{jsonTests.kt => JsonTests.kt} (100%) diff --git a/dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/jsonTests.kt b/dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/JsonTests.kt similarity index 100% rename from dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/jsonTests.kt rename to dataframe-json/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/JsonTests.kt From cd61dd7c5e2cf0c42ee5152eb898f434399352af Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Sat, 26 Apr 2025 19:20:31 +0200 Subject: [PATCH 11/15] added apidump --- dataframe-json/api/dataframe-json.api | 82 +++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 dataframe-json/api/dataframe-json.api diff --git a/dataframe-json/api/dataframe-json.api b/dataframe-json/api/dataframe-json.api new file mode 100644 index 0000000000..644237dff6 --- /dev/null +++ b/dataframe-json/api/dataframe-json.api @@ -0,0 +1,82 @@ +public final class org/jetbrains/kotlinx/dataframe/io/Base64ImageEncodingOptions { + public static final field ALL_OFF I + public static final field Companion Lorg/jetbrains/kotlinx/dataframe/io/Base64ImageEncodingOptions$Companion; + public static final field GZIP_ON I + public static final field LIMIT_SIZE_ON I + public fun ()V + public fun (II)V + public synthetic fun (IIILkotlin/jvm/internal/DefaultConstructorMarker;)V + public final fun getImageSizeLimit ()I + public final fun isGzipOn ()Z + public final fun isLimitSizeOn ()Z +} + +public final class org/jetbrains/kotlinx/dataframe/io/Base64ImageEncodingOptions$Companion { +} + +public abstract interface class org/jetbrains/kotlinx/dataframe/io/CustomEncoder { + public abstract fun canEncode (Ljava/lang/Object;)Z + public abstract fun encode (Ljava/lang/Object;)Lkotlinx/serialization/json/JsonElement; +} + +public final class org/jetbrains/kotlinx/dataframe/io/JSON : org/jetbrains/kotlinx/dataframe/io/SupportedDataFrameFormat { + public fun ()V + public fun (Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Ljava/util/List;Z)V + public synthetic fun (Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Ljava/util/List;ZILkotlin/jvm/internal/DefaultConstructorMarker;)V + public fun acceptsExtension (Ljava/lang/String;)Z + public fun acceptsSample (Lorg/jetbrains/kotlinx/dataframe/io/SupportedFormatSample;)Z + public fun createDefaultReadMethod (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethod; + public fun getTestOrder ()I + public fun readDataFrame (Ljava/io/File;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public fun readDataFrame (Ljava/io/InputStream;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; +} + +public final class org/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic : java/lang/Enum { + public static final field ANY_COLUMNS Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic; + public static final field ARRAY_AND_VALUE_COLUMNS Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic; + public static fun getEntries ()Lkotlin/enums/EnumEntries; + public static fun valueOf (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic; + public static fun values ()[Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic; +} + +public final class org/jetbrains/kotlinx/dataframe/io/JsonKt { + public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/File;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/InputStream;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/net/URL;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/File;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/InputStream;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/net/URL;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun readJsonStr (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun readJsonStr (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static synthetic fun readJsonStr$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun readJsonStr$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow; + public static final fun toJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Z)Ljava/lang/String; + public static final fun toJson (Lorg/jetbrains/kotlinx/dataframe/DataRow;Z)Ljava/lang/String; + public static synthetic fun toJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;ZILjava/lang/Object;)Ljava/lang/String; + public static synthetic fun toJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow;ZILjava/lang/Object;)Ljava/lang/String; + public static final fun toJsonWithMetadata (Lorg/jetbrains/kotlinx/dataframe/DataFrame;ILjava/lang/Integer;ZLjava/util/List;)Ljava/lang/String; + public static synthetic fun toJsonWithMetadata$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;ILjava/lang/Integer;ZLjava/util/List;ILjava/lang/Object;)Ljava/lang/String; + public static final fun writeJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/io/File;Z)V + public static final fun writeJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/Appendable;Z)V + public static final fun writeJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/String;Z)V + public static final fun writeJson (Lorg/jetbrains/kotlinx/dataframe/DataRow;Ljava/io/File;Z)V + public static final fun writeJson (Lorg/jetbrains/kotlinx/dataframe/DataRow;Ljava/lang/Appendable;Z)V + public static final fun writeJson (Lorg/jetbrains/kotlinx/dataframe/DataRow;Ljava/lang/String;Z)V + public static synthetic fun writeJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/io/File;ZILjava/lang/Object;)V + public static synthetic fun writeJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/Appendable;ZILjava/lang/Object;)V + public static synthetic fun writeJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/String;ZILjava/lang/Object;)V + public static synthetic fun writeJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow;Ljava/io/File;ZILjava/lang/Object;)V + public static synthetic fun writeJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow;Ljava/lang/Appendable;ZILjava/lang/Object;)V + public static synthetic fun writeJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow;Ljava/lang/String;ZILjava/lang/Object;)V +} + From 4a251ac1fffcb7b055b54eac87a543956aa047dd Mon Sep 17 00:00:00 2001 From: Ilya Muradyan Date: Sat, 26 Apr 2025 20:36:25 +0200 Subject: [PATCH 12/15] Fix build: jupyter dependencies declarations --- dataframe-jupyter/build.gradle.kts | 6 ++---- .../org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt | 2 -- dataframe-openapi-generator/build.gradle.kts | 1 - 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/dataframe-jupyter/build.gradle.kts b/dataframe-jupyter/build.gradle.kts index 8170f5c84f..6cae686190 100644 --- a/dataframe-jupyter/build.gradle.kts +++ b/dataframe-jupyter/build.gradle.kts @@ -18,17 +18,15 @@ repositories { } dependencies { - compileOnly(projects.core) - compileOnly(projects.dataframeJson) + api(projects.core) + api(projects.dataframeJson) testImplementation(libs.junit) testImplementation(libs.serialization.json) - testImplementation(projects.core) testImplementation(projects.dataframeArrow) testImplementation(projects.dataframeCsv) testImplementation(projects.dataframeExcel) testImplementation(projects.dataframeJdbc) - testImplementation(projects.dataframeJson) // experimental testImplementation(projects.dataframeOpenapiGenerator) testImplementation(projects.dataframeOpenapi) diff --git a/dataframe-jupyter/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt b/dataframe-jupyter/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt index 48b92166e1..47d11cfe87 100644 --- a/dataframe-jupyter/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt +++ b/dataframe-jupyter/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt @@ -171,8 +171,6 @@ internal class Integration(private val notebook: Notebook, private val options: ) } dependencies( - "org.jetbrains.kotlinx:dataframe-core:$version", - "org.jetbrains.kotlinx:dataframe-json:$version", "org.jetbrains.kotlinx:dataframe-csv:$version", "org.jetbrains.kotlinx:dataframe-excel:$version", "org.jetbrains.kotlinx:dataframe-jdbc:$version", diff --git a/dataframe-openapi-generator/build.gradle.kts b/dataframe-openapi-generator/build.gradle.kts index 346205882f..37aa3b2a58 100644 --- a/dataframe-openapi-generator/build.gradle.kts +++ b/dataframe-openapi-generator/build.gradle.kts @@ -34,7 +34,6 @@ dependencies { exclude("jakarta.validation") } - testApi(projects.core) testApi(projects.dataframeJupyter) testImplementation(libs.junit) testImplementation(libs.kotestAssertions) { From c5821dd6763a22babea3588d85a21d34713bfd6e Mon Sep 17 00:00:00 2001 From: Ilya Muradyan Date: Sat, 26 Apr 2025 21:15:49 +0200 Subject: [PATCH 13/15] Fix build: integration tests --- dataframe-json/build.gradle.kts | 11 +++++++++++ plugins/dataframe-gradle-plugin/build.gradle.kts | 3 ++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/dataframe-json/build.gradle.kts b/dataframe-json/build.gradle.kts index efb141d6f3..31d333f3a4 100644 --- a/dataframe-json/build.gradle.kts +++ b/dataframe-json/build.gradle.kts @@ -48,6 +48,17 @@ tasks.test { useJUnitPlatform() } +val instrumentedJars: Configuration by configurations.creating { + isCanBeConsumed = true + isCanBeResolved = false +} + +artifacts { + add("instrumentedJars", tasks.jar.get().archiveFile) { + builtBy(tasks.jar) + } +} + kotlinPublications { publication { publicationName = "dataframeJson" diff --git a/plugins/dataframe-gradle-plugin/build.gradle.kts b/plugins/dataframe-gradle-plugin/build.gradle.kts index d87e3ffae9..c8974b3c15 100644 --- a/plugins/dataframe-gradle-plugin/build.gradle.kts +++ b/plugins/dataframe-gradle-plugin/build.gradle.kts @@ -54,7 +54,7 @@ tasks.withType { filter { it.replace( "%DATAFRAME_JAR%", - listOf(":core", ":dataframe-csv").joinToString("\", \"") { + listOf(":core", ":dataframe-csv", ":dataframe-json").joinToString("\", \"") { project(it).configurations .getByName("instrumentedJars") .artifacts.single() @@ -111,6 +111,7 @@ val integrationTestTask = task("integrationTest") { dependsOn(":dataframe-excel:publishToMavenLocal") dependsOn(":dataframe-csv:publishToMavenLocal") dependsOn(":dataframe-jdbc:publishToMavenLocal") + dependsOn(":dataframe-json:publishToMavenLocal") dependsOn(":dataframe-openapi-generator:publishToMavenLocal") dependsOn(":dataframe-openapi:publishToMavenLocal") dependsOn(":publishApiPublicationToMavenLocal") From c54fcd52bd93e5e9723810bfe8f4ddfea4db54f6 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Sun, 27 Apr 2025 12:12:11 +0200 Subject: [PATCH 14/15] added README.md for json module --- dataframe-json/README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 dataframe-json/README.md diff --git a/dataframe-json/README.md b/dataframe-json/README.md new file mode 100644 index 0000000000..43796698a3 --- /dev/null +++ b/dataframe-json/README.md @@ -0,0 +1,14 @@ +## :dataframe-json + +This module, published as `dataframe-json`, contains all logic and tests for DataFrame to be able to work with +JSON data sources; [reading](https://kotlin.github.io/dataframe/read.html#read-from-json) +and [writing](https://kotlin.github.io/dataframe/write.html#writing-to-json). +It's based on [Kotlinx Serialization](https://github.com/Kotlin/kotlinx.serialization). + +It also contains some logic specific to encoding dataframes as JSON objects with metadata for +the [custom table component in Kotlin Notebook](https://kotlin.github.io/dataframe/usage-with-kotlin-notebook-plugin.html). +See [serialization_format](../docs/serialization_format.md) for more information about the format. + +This module is optional but is included by default by the `dataframe` module, `dataframe-jupyter`, +`dataframe-csv`, and `dataframe-excel`. +If you want to use DataFrame without JSON support, you can exclude this module from the dependency. From 780b61fa310769b52bd428dad268403adbfbfa12 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Tue, 29 Apr 2025 12:00:31 +0200 Subject: [PATCH 15/15] tiny error rewrite --- .../org/jetbrains/kotlinx/dataframe/impl/io/writeDelim.kt | 4 ++-- .../main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeDelim.kt b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeDelim.kt index 008fb5a19d..ca943d9ade 100644 --- a/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeDelim.kt +++ b/dataframe-csv/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/writeDelim.kt @@ -62,7 +62,7 @@ internal fun writeDelimImpl( it.toJson() } catch (_: NoClassDefFoundError) { error( - "Encountered a DataFrame when writing to csv/tsv/delim. This needs to be converted to JSON, so the dataframe-json dependency is required.", + "Encountered a DataRow value when writing to csv/tsv/delim. It must be serialized to JSON, requiring the 'dataframe-json' dependency.", ) } @@ -70,7 +70,7 @@ internal fun writeDelimImpl( it.toJson() } catch (_: NoClassDefFoundError) { error( - "Encountered a DataRow when writing to csv/tsv/delim. This needs to be converted to JSON, so the dataframe-json dependency is required.", + "Encountered a DataFrame value when writing to csv/tsv/delim. It must be serialized to JSON, requiring the 'dataframe-json' dependency.", ) } diff --git a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt index 4cc061cdce..3f39c57837 100644 --- a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt +++ b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt @@ -673,7 +673,7 @@ private fun Cell.setCellValueByGuessedType(any: Any) = any.toJson() } catch (_: NoClassDefFoundError) { error( - "Encountered a DataRow when writing to an Excel cell. This needs to be converted to JSON, so the dataframe-json dependency is required.", + "Encountered a DataRow value when writing to an Excel cell. It must be serialized to JSON, requiring the 'dataframe-json' dependency.", ) }, ) @@ -683,7 +683,7 @@ private fun Cell.setCellValueByGuessedType(any: Any) = any.toJson() } catch (_: NoClassDefFoundError) { error( - "Encountered a DataFrame when writing to an Excel cell. This needs to be converted to JSON, so the dataframe-json dependency is required.", + "Encountered a DataFrame value when writing to an Excel cell. It must be serialized to JSON, requiring the 'dataframe-json' dependency.", ) }, )