Skip to content

Commit

Permalink
Rewrite and restructure JSON parser
Browse files Browse the repository at this point in the history
    * Get rid of spontaneous lookahead
    * Spill less variables into object state
    * Oprimize token and whitespaces reading
    * Compact bytecode where applicable
    * Separate fast and slow paths
    * Add optimistic key consumption optimization to leverage indexOf intrinsic
    * Imrpove exception messages in few places

All tests except lenient boolean should pass

Benchmark difference for throughput (Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz, libraries-linux-perf-unit-877):

CitmBenchmark.decodeCitm                            diff   +8%
CoerceInputValuesBenchmark.testNonNullableCoercing  diff   +11%
CoerceInputValuesBenchmark.testNonNullableRegular   diff   +5%
CoerceInputValuesBenchmark.testNullableCoercing     diff   +7%
CoerceInputValuesBenchmark.testNullableRegular      diff   +13%
JacksonComparisonBenchmark.kotlinFromString         diff   +16% (noisy, JUT-dependable)
TwitterBenchmark.parseTwitter                       diff   +26%
TwitterFeedBenchmark.parseTwitter                   diff   +30%
  • Loading branch information
qwwdfsad committed Feb 17, 2021
1 parent f28e005 commit 18de55e
Show file tree
Hide file tree
Showing 17 changed files with 485 additions and 307 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ import java.util.concurrent.*
@Fork(2)
open class CoerceInputValuesBenchmark {

// Specific benchmark to isolate effect on #1156. Remove after release of 1.0.1

@Serializable
class Holder(
val i1: Int,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,10 @@ open class PrimitiveValuesBenchmark {
fun encodeLong(): LongHolder = Json.decodeFromString(LongHolder.serializer(), longValue)
}


private val booleanHolder = PrimitiveValuesBenchmark.BooleanHolder(true, false, true, false, true, true, false, false)
private val booleanValue = Json.encodeToString(booleanHolder)

fun main() {
println(Json.decodeFromString(PrimitiveValuesBenchmark.BooleanHolder.serializer(), booleanValue))
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,5 @@ open class TwitterBenchmark {

// Order of magnitude: 4-7 op/ms
@Benchmark
fun parseTwitter() = Json.decodeFromString(MacroTwitterFeed.serializer(), input)
fun parseTwitter() = Json.decodeFromString(Twitter.serializer(), input)
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ data class TwitterStatus(
val source: String,
val truncated: Boolean,
val in_reply_to_status_id: Long?,
val in_reply_to_status_id_str: Long?,
val in_reply_to_status_id_str: String?,
val in_reply_to_user_id: Long?,
val in_reply_to_user_id_str: Long?,
val in_reply_to_user_id_str: String?,
val in_reply_to_screen_name: String?,
val user: TwitterUser,
val geo: String?,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import kotlinx.serialization.*
import kotlinx.serialization.json.*

fun main() {
val s = MacroTwitterFeed::class.java.getResource("/twitter.json").readBytes().decodeToString()
val s = MacroTwitterFeed::class.java.getResource("/twitter_macro.json").readBytes().decodeToString()
println(Json.decodeFromString<MacroTwitterFeed>(s))
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,9 @@ internal open class PluginGeneratedSerialDescriptor(
private val elementsOptionality = BooleanArray(elementsCount)
public override val serialNames: Set<String> get() = indices.keys

// don't change lazy mode: KT-32871, KT-32872
private val indices: Map<String, Int> by lazy { buildIndices() }
private var indices: Map<String, Int> = emptyMap()
// Cache child serializers, they are not cached by the implementation for nullable types
private val childSerializers by lazy { generatedSerializer?.childSerializers() ?: emptyArray() }
private val childSerializers: Array<KSerializer<*>> by lazy { generatedSerializer?.childSerializers() ?: emptyArray() }

// Lazy because of JS specific initialization order (#789)
internal val typeParameterDescriptors: Array<SerialDescriptor> by lazy {
Expand All @@ -48,6 +47,9 @@ internal open class PluginGeneratedSerialDescriptor(
names[++added] = name
elementsOptionality[added] = isOptional
propertiesAnnotations[added] = null
if (added == elementsCount - 1) {
indices = buildIndices()
}
}

public fun pushAnnotation(annotation: Annotation) {
Expand Down
2 changes: 1 addition & 1 deletion docs/basic-serialization.md
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ Attempts to explicitly specify its value in the serial format, even if the speci
value is equal to the default one, produces the following exception.

```text
Exception in thread "main" kotlinx.serialization.json.internal.JsonDecodingException: Unexpected JSON token at offset 60: Encountered an unknown key 'language'.
Exception in thread "main" kotlinx.serialization.json.internal.JsonDecodingException: Unexpected JSON token at offset 42: Encountered an unknown key 'language'.
Use 'ignoreUnknownKeys = true' in 'Json {}' builder to ignore unknown keys.
```

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ internal object JsonNullSerializer : KSerializer<JsonNull> {

override fun deserialize(decoder: Decoder): JsonNull {
verify(decoder)
if (decoder.decodeNotNullMark()) {

}
decoder.decodeNull()
return JsonNull
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,70 +13,64 @@ internal class JsonParser(
private val isLenient = configuration.isLenient

private fun readObject(): JsonElement {
reader.requireTokenClass(TC_BEGIN_OBJ) { "Expected start of the object" }
reader.nextToken()
// Prohibit leading comma
reader.require(reader.tokenClass != TC_COMMA, reader.currentPosition) { "Unexpected leading comma" }
var lastToken = reader.consumeNextToken(TC_BEGIN_OBJ)
if (reader.peekNextToken() == TC_COMMA) reader.fail("Unexpected leading comma")
val result = linkedMapOf<String, JsonElement>()
var valueExpected = false
while (reader.canBeginValue) {
valueExpected = false
val key = if (isLenient) reader.takeString() else reader.takeStringQuoted()
reader.requireTokenClass(TC_COLON) { "Expected ':'" }
reader.nextToken()
while (reader.canConsumeValue()) {
// Read key and value
val key = if (isLenient) reader.consumeStringLenient() else reader.consumeString()
reader.consumeNextToken(TC_COLON)
val element = read()
result[key] = element
if (reader.tokenClass != TC_COMMA) {
// Prohibit whitespaces instead of commas {a:b c:d}
reader.requireTokenClass(TC_END_OBJ) { "Expected end of the object or comma" }
} else {
valueExpected = true
reader.nextToken()
// Verify the next token
lastToken = reader.consumeNextToken()
if (lastToken != TC_COMMA && lastToken != TC_END_OBJ) {
reader.fail("Expected end of the object or comma")
}
}
reader.require(!valueExpected && reader.tokenClass == TC_END_OBJ, reader.currentPosition) { "Expected end of the object" }
reader.nextToken()
// Check for the correct ending
if (lastToken == TC_BEGIN_OBJ) { // Case of empty object
reader.consumeNextToken(TC_END_OBJ)
} else if (lastToken == TC_COMMA) { // Trailing comma
reader.fail("Unexpected trailing comma")
}
return JsonObject(result)
}

private fun readArray(): JsonElement {
reader.requireTokenClass(TC_BEGIN_LIST) { "Expected start of the array" }
reader.nextToken()
var lastToken = reader.consumeNextToken(TC_BEGIN_LIST)
// Prohibit leading comma
reader.require(reader.tokenClass != TC_COMMA, reader.currentPosition) { "Unexpected leading comma" }
if (reader.peekNextToken() == TC_COMMA) reader.fail("Unexpected leading comma")
val result = arrayListOf<JsonElement>()
var valueExpected = false
while (reader.canBeginValue) {
valueExpected = false
while (reader.canConsumeValue()) {
val element = read()
result.add(element)
if (reader.tokenClass != TC_COMMA) {
// Prohibit whitespaces instead of commas [a b]
reader.requireTokenClass(TC_END_LIST) { "Expected end of the array or comma" }
} else {
valueExpected = true
reader.nextToken()
lastToken = reader.consumeNextToken()
if (lastToken != TC_COMMA) {
reader.require(lastToken == TC_END_LIST) { "Expected end of the array or comma" }
}
}
// Prohibit trailing commas
reader.require(!valueExpected, reader.currentPosition) { "Unexpected trailing comma" }
reader.nextToken()
// Check for the correct ending
if (lastToken == TC_BEGIN_LIST) { // Case of empty object
reader.consumeNextToken(TC_END_LIST)
} else if (lastToken == TC_COMMA) { // Trailing comma
reader.fail("Unexpected trailing comma")
}
return JsonArray(result)
}

private fun readValue(isString: Boolean): JsonElement {
val str = if (isLenient) {
reader.takeString()
private fun readValue(isString: Boolean): JsonPrimitive {
val string = if (isLenient || !isString) {
reader.consumeStringLenient()
} else {
if (isString) reader.takeStringQuoted() else reader.takeString()
reader.consumeString()
}
return JsonLiteral(str, isString)
if (string == NULL) return JsonNull
return JsonLiteral(string, isString)
}

fun read(): JsonElement {
if (!reader.canBeginValue) reader.fail("Can't begin reading value from here")
return when (reader.tokenClass) {
TC_NULL -> JsonNull.also { reader.nextToken() }
return when (reader.peekNextToken()) {
TC_STRING -> readValue(isString = true)
TC_OTHER -> readValue(isString = false)
TC_BEGIN_OBJ -> readObject()
Expand Down
Loading

0 comments on commit 18de55e

Please sign in to comment.