Skip to content

Json module extraction #1147

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Apr 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ dependencies {
api(projects.dataframeExcel)
api(projects.dataframeJdbc)
api(projects.dataframeCsv)
api(projects.dataframeJson)

// experimental, so not included by default:
// api(projects.dataframeOpenapi)
Expand All @@ -64,6 +65,7 @@ dependencies {
kover(projects.dataframeOpenapi)
kover(projects.dataframeJdbc)
kover(projects.dataframeCsv)
kover(projects.dataframeJson)
kover(projects.plugins.kotlinDataframe)
kover(projects.dataframeJupyter)
}
Expand Down
120 changes: 19 additions & 101 deletions core/api/core.api

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions core/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,6 @@ dependencies {

api(libs.commonsCsv)
implementation(libs.commonsIo)
implementation(libs.serialization.core)
implementation(libs.serialization.json)
implementation(libs.fastDoubleParser)

api(libs.kotlin.datetimeJvm)
Expand All @@ -81,6 +79,9 @@ dependencies {
testImplementation(libs.kotlin.scriptingJvm)
testImplementation(libs.jsoup)
testImplementation(libs.sl4jsimple)
testImplementation(projects.dataframeJson)
testImplementation(libs.serialization.core)
testImplementation(libs.serialization.json)

// for samples.api
testImplementation(projects.dataframeCsv)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import org.jetbrains.kotlinx.dataframe.api.KeyValueProperty
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
import org.jetbrains.kotlinx.dataframe.documentation.UnifyingNumbers
import org.jetbrains.kotlinx.dataframe.io.JSON

/**
* Annotation preprocessing will generate a DataSchema interface from the data at `path`.
Expand Down Expand Up @@ -73,8 +72,11 @@ public annotation class JdbcOptions(
)

public annotation class JsonOptions(
/** Allows the choice of how to handle type clashes when reading a JSON file. */
public val typeClashTactic: JSON.TypeClashTactic = JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS,
/**
* Allows the choice of how to handle type clashes when reading a JSON file.
* Must be either [JsonOptions.TypeClashTactics.ARRAY_AND_VALUE_COLUMNS] or [JsonOptions.TypeClashTactics.ANY_COLUMNS]
* */
public val typeClashTactic: String = TypeClashTactics.ARRAY_AND_VALUE_COLUMNS,
/**
* List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[KeyValueProperty]>
* will be created.
Expand All @@ -85,4 +87,9 @@ public annotation class JsonOptions(
public val keyValuePaths: Array<String> = [],
/** Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. */
public val unifyNumbers: Boolean = true,
)
) {
public object TypeClashTactics {
public const val ARRAY_AND_VALUE_COLUMNS: String = "ARRAY_AND_VALUE_COLUMNS"
public const val ANY_COLUMNS: String = "ANY_COLUMNS"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,14 @@ import org.jetbrains.kotlinx.dataframe.type
* Creates a [FrameColumn] from [this] by splitting the dataframe into
* smaller ones, with their number of rows at most [size].
*/
public fun <T> DataFrame<T>.chunked(size: Int, name: String = "groups"): FrameColumn<T> {
val startIndices = (0 until nrow step size)
return this.chunkedImpl(startIndices, name)
}
public fun <T> DataFrame<T>.chunked(size: Int, name: String = "groups"): FrameColumn<T> =
chunked(
startIndices = 0 until nrow step size,
name = name,
)

public fun <T> DataFrame<T>.chunked(startIndices: Iterable<Int>, name: String = "groups"): FrameColumn<T> =
chunkedImpl(startIndices, name)

public fun <T> DataColumn<T>.chunked(size: Int): ValueColumn<List<T>> {
val values = toList().chunked(size)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ private const val CAST = "cast"
private const val VERIFY = "verify" // cast(true) is obscure, i think it's better to use named argument here
private const val READ_CSV = "readCSV"
private const val READ_TSV = "readTSV"
private const val READ_JSON = "readJson"
private const val READ_JDBC = "readJdbc"

public abstract class AbstractDefaultReadMethod(
Expand Down Expand Up @@ -82,13 +81,6 @@ public abstract class AbstractDefaultReadMethod(
override val additionalImports: List<String> = listOf("import org.jetbrains.kotlinx.dataframe.io.$methodName")
}

internal class DefaultReadJsonMethod(path: String?, arguments: MethodArguments) :
AbstractDefaultReadMethod(
path = path,
arguments = arguments,
methodName = READ_JSON,
)

internal class DefaultReadCsvMethod(path: String?, arguments: MethodArguments) :
AbstractDefaultReadMethod(path, arguments, READ_CSV)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,28 +42,4 @@ import org.jetbrains.kotlinx.dataframe.impl.UnifiedNumberTypeOptions
*
* At the bottom of the graph is [Nothing]. This can be interpreted as `null`.
*/
internal interface UnifyingNumbers {

/**
* ```
* (BigDecimal)
* / \
* (BigInteger) \
* / \ \
* <~ ULong Long ~> Double ..
* .. | / | / | \..
* \ | / | / |
* UInt Int Float
* .. | / | / \..
* \ | / | /
* UShort Short
* | / |
* | / |
* UByte Byte
* \ /
* \ /
* Nothing?
* ```
*/
interface Graph
}
public interface UnifyingNumbers
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import kotlin.reflect.full.isSubclassOf
import kotlin.reflect.full.withNullability
import kotlin.reflect.jvm.jvmErasure

internal interface DataCollector<T> {
public interface DataCollector<T> {

public val data: List<T?>
public val hasNulls: Boolean
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,10 @@ internal fun <T> catchSilent(body: () -> T): T? =
internal fun Iterable<KClass<*>>.commonType(nullable: Boolean, upperBound: KType? = null) =
commonParents(this).createType(nullable, upperBound)

// helper overload for friend modules
@JvmName("commonTypeOverload")
internal fun commonType(types: Iterable<KType?>, useStar: Boolean = true) = types.commonType(useStar)

/**
* Returns the common supertype of the given types.
*
Expand Down Expand Up @@ -276,6 +280,10 @@ internal fun <T> DataFrame<T>.splitByIndices(startIndices: Sequence<Int>): Seque
}
}

// helper overload for friend modules
@JvmName("splitByIndicesOverload")
internal fun <T> splitByIndices(list: List<T>, startIndices: Sequence<Int>) = list.splitByIndices(startIndices)

internal fun <T> List<T>.splitByIndices(startIndices: Sequence<Int>): Sequence<List<T>> =
(startIndices + size).zipWithNext { start, endExclusive ->
subList(start, endExclusive)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.jetbrains.kotlinx.dataframe.impl.api

import io.github.oshai.kotlinlogging.KotlinLogging
import kotlinx.datetime.Instant
import kotlinx.datetime.LocalDate
import kotlinx.datetime.LocalDateTime
Expand All @@ -25,19 +26,18 @@ import org.jetbrains.kotlinx.dataframe.api.isColumnGroup
import org.jetbrains.kotlinx.dataframe.api.isFrameColumn
import org.jetbrains.kotlinx.dataframe.api.isSubtypeOf
import org.jetbrains.kotlinx.dataframe.api.map
import org.jetbrains.kotlinx.dataframe.api.parser
import org.jetbrains.kotlinx.dataframe.api.to
import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion
import org.jetbrains.kotlinx.dataframe.columns.size
import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException
import org.jetbrains.kotlinx.dataframe.hasNulls
import org.jetbrains.kotlinx.dataframe.impl.api.Parsers.resetToDefault
import org.jetbrains.kotlinx.dataframe.impl.canParse
import org.jetbrains.kotlinx.dataframe.impl.catchSilent
import org.jetbrains.kotlinx.dataframe.impl.createStarProjectedType
import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser
import org.jetbrains.kotlinx.dataframe.impl.javaDurationCanParse
import org.jetbrains.kotlinx.dataframe.io.isUrl
import org.jetbrains.kotlinx.dataframe.io.readJsonStr
import org.jetbrains.kotlinx.dataframe.values
import java.math.BigDecimal
import java.math.BigInteger
Expand All @@ -61,6 +61,8 @@ import java.time.LocalDate as JavaLocalDate
import java.time.LocalDateTime as JavaLocalDateTime
import java.time.LocalTime as JavaLocalTime

private val logger = KotlinLogging.logger { }

internal interface StringParser<T> {
fun toConverter(options: ParserOptions?): TypeConverter

Expand Down Expand Up @@ -335,6 +337,94 @@ internal object Parsers : GlobalParserOptions {
parser
}

// TODO rewrite using parser service later https://github.com/Kotlin/dataframe/issues/962
// null when dataframe-json is not present
private val readJsonStrAnyFrame: ((text: String) -> AnyFrame)? by lazy {
try {
val klass = Class.forName("org.jetbrains.kotlinx.dataframe.io.JsonKt")
val typeClashTactic = Class.forName("org.jetbrains.kotlinx.dataframe.io.JSON\$TypeClashTactic")
val readJsonStr = klass.getMethod(
"readJsonStr",
// this =
DataFrame.Companion::class.java,
// text =
String::class.java,
// header =
List::class.java,
// keyValuePaths =
List::class.java,
// typeClashTactic =
typeClashTactic,
// unifyNumbers =
Boolean::class.java,
)

return@lazy { text: String ->
readJsonStr.invoke(
null,
// this =
DataFrame.Companion,
// text =
text,
// header =
emptyList<Any>(),
// keyValuePaths =
emptyList<Any>(),
// typeClashTactic =
typeClashTactic.enumConstants[0],
// unifyNumbers =
true,
) as AnyFrame
}
} catch (_: ClassNotFoundException) {
return@lazy null
}
}

// TODO rewrite using parser service later https://github.com/Kotlin/dataframe/issues/962
// null when dataframe-json is not present
private val readJsonStrAnyRow: ((text: String) -> AnyRow)? by lazy {
try {
val klass = Class.forName("org.jetbrains.kotlinx.dataframe.io.JsonKt")
val typeClashTactic = Class.forName("org.jetbrains.kotlinx.dataframe.io.JSON\$TypeClashTactic")
val readJsonStr = klass.getMethod(
"readJsonStr",
// this =
DataRow.Companion::class.java,
// text =
String::class.java,
// header =
List::class.java,
// keyValuePaths =
List::class.java,
// typeClashTactic =
typeClashTactic,
// unifyNumbers =
Boolean::class.java,
)

return@lazy { text: String ->
readJsonStr.invoke(
null,
// this =
DataRow.Companion,
// text =
text,
// header =
emptyList<Any>(),
// keyValuePaths =
emptyList<Any>(),
// typeClashTactic =
typeClashTactic.enumConstants[0],
// unifyNumbers =
true,
) as AnyRow
}
} catch (_: ClassNotFoundException) {
return@lazy null
}
}

internal val parsersOrder = listOf(
// Int
stringParser<Int> { it.toIntOrNull() },
Expand Down Expand Up @@ -408,7 +498,14 @@ internal object Parsers : GlobalParserOptions {
stringParser<AnyFrame>(catch = true) {
val trimmed = it.trim()
if (trimmed.startsWith("[") && trimmed.endsWith("]")) {
DataFrame.readJsonStr(it)
if (readJsonStrAnyFrame == null) {
logger.warn {
"parse() encountered a string that looks like a JSON array, but the dataframe-json dependency was not detected. Skipping for now."
}
null
} else {
readJsonStrAnyFrame!!(trimmed)
}
} else {
null
}
Expand All @@ -417,7 +514,14 @@ internal object Parsers : GlobalParserOptions {
stringParser<AnyRow>(catch = true) {
val trimmed = it.trim()
if (trimmed.startsWith("{") && trimmed.endsWith("}")) {
DataRow.readJsonStr(it)
if (readJsonStrAnyRow == null) {
logger.warn {
"parse() encountered a string that looks like a JSON object, but the dataframe-json dependency was not detected. Skipping for now."
}
null
} else {
readJsonStrAnyRow!!(trimmed)
}
} else {
null
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,26 @@ internal fun BufferedImage.toByteArray(format: String = DEFAULT_IMG_FORMAT): Byt
ImageIO.write(this, format, bos)
bos.toByteArray()
}

// helper overload for friend modules
@JvmName("resizeKeepingAspectRatioOverload")
internal fun resizeKeepingAspectRatio(
image: BufferedImage,
maxSize: Int,
resultImageType: Int = BufferedImage.TYPE_INT_ARGB,
interpolation: Any = RenderingHints.VALUE_INTERPOLATION_NEAREST_NEIGHBOR,
renderingQuality: Any = RenderingHints.VALUE_RENDER_QUALITY,
antialiasing: Any = RenderingHints.VALUE_ANTIALIAS_ON,
observer: ImageObserver? = null,
) = image.resizeKeepingAspectRatio(
maxSize = maxSize,
resultImageType = resultImageType,
interpolation = interpolation,
renderingQuality = renderingQuality,
antialiasing = antialiasing,
observer = observer,
)

// helper overload for friend modules
@JvmName("toByteArrayOverload")
internal fun toByteArray(image: BufferedImage, format: String = DEFAULT_IMG_FORMAT) = image.toByteArray(format)
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ import kotlin.reflect.typeOf
internal fun AnyFrame.extractSchema(): DataFrameSchema =
DataFrameSchemaImpl(columns().filter { it.name().isNotEmpty() }.associate { it.name() to it.extractSchema() })

// helper overload for friend modules
@JvmName("intersectSchemasOverload")
internal fun intersectSchemas(schemas: Iterable<DataFrameSchema>): DataFrameSchema = schemas.intersectSchemas()

internal fun Iterable<DataFrameSchema>.intersectSchemas(): DataFrameSchema {
val collectedTypes = mutableMapOf<String, MutableSet<ColumnSchema>>()
var first = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import java.net.URL
* Opens a stream to [url] to create a [DataFrame] from it.
* If the URL is a file URL, the file is read directly.
* If the URL is an HTTP URL, it's also read directly, but if the server returns an error code,
* the error response is read as JSON and parsed as [DataFrame] too.
* the error response is read and parsed as [DataFrame] too.
*
* Public so it may be used in other modules.
*/
Expand All @@ -32,8 +32,8 @@ public fun catchHttpResponse(url: URL, body: (InputStream) -> AnyFrame): AnyFram
if (code != 200) {
val response = connection.responseMessage
try {
// attempt to read error response as JSON
return DataFrame.readJson(connection.errorStream)
// attempt to read error response as dataframe
return DataFrame.read(connection.errorStream).df
} catch (_: Exception) {
throw RuntimeException("Server returned HTTP response code: $code. Response: $response")
}
Expand Down
Loading