From 9a0277a315e204583c145ecd8c43225c999bf4a6 Mon Sep 17 00:00:00 2001 From: Vsevolod Tolstopyatov Date: Wed, 17 Feb 2021 16:10:26 +0300 Subject: [PATCH 01/11] Rewrite and restructure JSON parser * Get rid of spontaneous lookahead * Spill fewer variables into object state * Optimize token and whitespaces reading * Separate fast and slow paths * Add optimistic key consumption optimization to leverage indexOf intrinsic * Improve exception messages in few places All tests except lenient boolean should pass Benchmark difference for throughput (Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz, libraries-linux-perf-unit-877): CitmBenchmark.decodeCitm diff +8% CoerceInputValuesBenchmark.testNonNullableCoercing diff +11% CoerceInputValuesBenchmark.testNonNullableRegular diff +5% CoerceInputValuesBenchmark.testNullableCoercing diff +7% CoerceInputValuesBenchmark.testNullableRegular diff +13% JacksonComparisonBenchmark.kotlinFromString diff +16% (noisy, JUT-dependable) TwitterBenchmark.parseTwitter diff +26% TwitterFeedBenchmark.parseTwitter diff +30% --- .../json/CoerceInputValuesBenchmark.kt | 2 - .../json/PrimitiveValuesBenchmark.kt | 7 + .../benchmarks/json/TwitterBenchmark.kt | 2 +- .../kotlinx/benchmarks/model/MacroTwitter.kt | 4 +- .../kotlinx/benchmarks/model/Twitter.kt | 2 +- .../PluginGeneratedSerialDescriptor.kt | 8 +- docs/basic-serialization.md | 2 +- .../json/JsonElementSerializers.kt | 3 + .../serialization/json/internal/JsonParser.kt | 76 ++-- .../serialization/json/internal/JsonReader.kt | 429 ++++++++++++------ .../json/internal/StreamingJsonDecoder.kt | 177 ++++---- .../json/internal/TreeJsonDecoder.kt | 3 - .../serialization/json/JsonParserTest.kt | 3 +- .../JsonUseDefaultOnNullAndUnknownTest.kt | 1 - .../json/GsonCompatibilityTest.kt | 69 +-- gradle.properties | 2 +- guide/test/BasicSerializationTest.kt | 2 +- 17 files changed, 485 insertions(+), 307 deletions(-) diff --git a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/CoerceInputValuesBenchmark.kt b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/CoerceInputValuesBenchmark.kt index 8e37bd9c2c..d81509d4f9 100644 --- a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/CoerceInputValuesBenchmark.kt +++ b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/CoerceInputValuesBenchmark.kt @@ -16,8 +16,6 @@ import java.util.concurrent.* @Fork(2) open class CoerceInputValuesBenchmark { - // Specific benchmark to isolate effect on #1156. Remove after release of 1.0.1 - @Serializable class Holder( val i1: Int, diff --git a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/PrimitiveValuesBenchmark.kt b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/PrimitiveValuesBenchmark.kt index 07c7f449fd..149b38edbe 100644 --- a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/PrimitiveValuesBenchmark.kt +++ b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/PrimitiveValuesBenchmark.kt @@ -60,3 +60,10 @@ open class PrimitiveValuesBenchmark { fun encodeLong(): LongHolder = Json.decodeFromString(LongHolder.serializer(), longValue) } + +private val booleanHolder = PrimitiveValuesBenchmark.BooleanHolder(true, false, true, false, true, true, false, false) +private val booleanValue = Json.encodeToString(booleanHolder) + +fun main() { + println(Json.decodeFromString(PrimitiveValuesBenchmark.BooleanHolder.serializer(), booleanValue)) +} diff --git a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/TwitterBenchmark.kt b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/TwitterBenchmark.kt index 56bcb68209..4505c28bd5 100644 --- a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/TwitterBenchmark.kt +++ b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/TwitterBenchmark.kt @@ -23,5 +23,5 @@ open class TwitterBenchmark { // Order of magnitude: 4-7 op/ms @Benchmark - fun parseTwitter() = Json.decodeFromString(MacroTwitterFeed.serializer(), input) + fun parseTwitter() = Json.decodeFromString(Twitter.serializer(), input) } diff --git a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/model/MacroTwitter.kt b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/model/MacroTwitter.kt index 2230e3e9e1..8bbb933ac1 100644 --- a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/model/MacroTwitter.kt +++ b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/model/MacroTwitter.kt @@ -19,9 +19,9 @@ data class TwitterStatus( val source: String, val truncated: Boolean, val in_reply_to_status_id: Long?, - val in_reply_to_status_id_str: Long?, + val in_reply_to_status_id_str: String?, val in_reply_to_user_id: Long?, - val in_reply_to_user_id_str: Long?, + val in_reply_to_user_id_str: String?, val in_reply_to_screen_name: String?, val user: TwitterUser, val geo: String?, diff --git a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/model/Twitter.kt b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/model/Twitter.kt index 6cecd510db..385afe31e1 100644 --- a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/model/Twitter.kt +++ b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/model/Twitter.kt @@ -4,7 +4,7 @@ import kotlinx.serialization.* import kotlinx.serialization.json.* fun main() { - val s = MacroTwitterFeed::class.java.getResource("/twitter.json").readBytes().decodeToString() + val s = MacroTwitterFeed::class.java.getResource("/twitter_macro.json").readBytes().decodeToString() println(Json.decodeFromString(s)) } diff --git a/core/commonMain/src/kotlinx/serialization/internal/PluginGeneratedSerialDescriptor.kt b/core/commonMain/src/kotlinx/serialization/internal/PluginGeneratedSerialDescriptor.kt index c57370f0c6..71017d3407 100644 --- a/core/commonMain/src/kotlinx/serialization/internal/PluginGeneratedSerialDescriptor.kt +++ b/core/commonMain/src/kotlinx/serialization/internal/PluginGeneratedSerialDescriptor.kt @@ -31,10 +31,9 @@ internal open class PluginGeneratedSerialDescriptor( private val elementsOptionality = BooleanArray(elementsCount) public override val serialNames: Set get() = indices.keys - // don't change lazy mode: KT-32871, KT-32872 - private val indices: Map by lazy { buildIndices() } + private var indices: Map = emptyMap() // Cache child serializers, they are not cached by the implementation for nullable types - private val childSerializers by lazy { generatedSerializer?.childSerializers() ?: emptyArray() } + private val childSerializers: Array> by lazy { generatedSerializer?.childSerializers() ?: emptyArray() } // Lazy because of JS specific initialization order (#789) internal val typeParameterDescriptors: Array by lazy { @@ -48,6 +47,9 @@ internal open class PluginGeneratedSerialDescriptor( names[++added] = name elementsOptionality[added] = isOptional propertiesAnnotations[added] = null + if (added == elementsCount - 1) { + indices = buildIndices() + } } public fun pushAnnotation(annotation: Annotation) { diff --git a/docs/basic-serialization.md b/docs/basic-serialization.md index 92c954db6a..737e1dedc9 100644 --- a/docs/basic-serialization.md +++ b/docs/basic-serialization.md @@ -411,7 +411,7 @@ Attempts to explicitly specify its value in the serial format, even if the speci value is equal to the default one, produces the following exception. ```text -Exception in thread "main" kotlinx.serialization.json.internal.JsonDecodingException: Unexpected JSON token at offset 60: Encountered an unknown key 'language'. +Exception in thread "main" kotlinx.serialization.json.internal.JsonDecodingException: Unexpected JSON token at offset 42: Encountered an unknown key 'language'. Use 'ignoreUnknownKeys = true' in 'Json {}' builder to ignore unknown keys. ``` diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/JsonElementSerializers.kt b/formats/json/commonMain/src/kotlinx/serialization/json/JsonElementSerializers.kt index 294eed14b7..dee6e0139e 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/JsonElementSerializers.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/JsonElementSerializers.kt @@ -96,6 +96,9 @@ internal object JsonNullSerializer : KSerializer { override fun deserialize(decoder: Decoder): JsonNull { verify(decoder) + if (decoder.decodeNotNullMark()) { + + } decoder.decodeNull() return JsonNull } diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonParser.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonParser.kt index aa83a4b411..50d35c5613 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonParser.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonParser.kt @@ -13,70 +13,64 @@ internal class JsonParser( private val isLenient = configuration.isLenient private fun readObject(): JsonElement { - reader.requireTokenClass(TC_BEGIN_OBJ) { "Expected start of the object" } - reader.nextToken() - // Prohibit leading comma - reader.require(reader.tokenClass != TC_COMMA, reader.currentPosition) { "Unexpected leading comma" } + var lastToken = reader.consumeNextToken(TC_BEGIN_OBJ) + if (reader.peekNextToken() == TC_COMMA) reader.fail("Unexpected leading comma") val result = linkedMapOf() - var valueExpected = false - while (reader.canBeginValue) { - valueExpected = false - val key = if (isLenient) reader.takeString() else reader.takeStringQuoted() - reader.requireTokenClass(TC_COLON) { "Expected ':'" } - reader.nextToken() + while (reader.canConsumeValue()) { + // Read key and value + val key = if (isLenient) reader.consumeStringLenient() else reader.consumeString() + reader.consumeNextToken(TC_COLON) val element = read() result[key] = element - if (reader.tokenClass != TC_COMMA) { - // Prohibit whitespaces instead of commas {a:b c:d} - reader.requireTokenClass(TC_END_OBJ) { "Expected end of the object or comma" } - } else { - valueExpected = true - reader.nextToken() + // Verify the next token + lastToken = reader.consumeNextToken() + if (lastToken != TC_COMMA && lastToken != TC_END_OBJ) { + reader.fail("Expected end of the object or comma") } } - reader.require(!valueExpected && reader.tokenClass == TC_END_OBJ, reader.currentPosition) { "Expected end of the object" } - reader.nextToken() + // Check for the correct ending + if (lastToken == TC_BEGIN_OBJ) { // Case of empty object + reader.consumeNextToken(TC_END_OBJ) + } else if (lastToken == TC_COMMA) { // Trailing comma + reader.fail("Unexpected trailing comma") + } return JsonObject(result) } private fun readArray(): JsonElement { - reader.requireTokenClass(TC_BEGIN_LIST) { "Expected start of the array" } - reader.nextToken() + var lastToken = reader.consumeNextToken(TC_BEGIN_LIST) // Prohibit leading comma - reader.require(reader.tokenClass != TC_COMMA, reader.currentPosition) { "Unexpected leading comma" } + if (reader.peekNextToken() == TC_COMMA) reader.fail("Unexpected leading comma") val result = arrayListOf() - var valueExpected = false - while (reader.canBeginValue) { - valueExpected = false + while (reader.canConsumeValue()) { val element = read() result.add(element) - if (reader.tokenClass != TC_COMMA) { - // Prohibit whitespaces instead of commas [a b] - reader.requireTokenClass(TC_END_LIST) { "Expected end of the array or comma" } - } else { - valueExpected = true - reader.nextToken() + lastToken = reader.consumeNextToken() + if (lastToken != TC_COMMA) { + reader.require(lastToken == TC_END_LIST) { "Expected end of the array or comma" } } } - // Prohibit trailing commas - reader.require(!valueExpected, reader.currentPosition) { "Unexpected trailing comma" } - reader.nextToken() + // Check for the correct ending + if (lastToken == TC_BEGIN_LIST) { // Case of empty object + reader.consumeNextToken(TC_END_LIST) + } else if (lastToken == TC_COMMA) { // Trailing comma + reader.fail("Unexpected trailing comma") + } return JsonArray(result) } - private fun readValue(isString: Boolean): JsonElement { - val str = if (isLenient) { - reader.takeString() + private fun readValue(isString: Boolean): JsonPrimitive { + val string = if (isLenient || !isString) { + reader.consumeStringLenient() } else { - if (isString) reader.takeStringQuoted() else reader.takeString() + reader.consumeString() } - return JsonLiteral(str, isString) + if (string == NULL) return JsonNull + return JsonLiteral(string, isString) } fun read(): JsonElement { - if (!reader.canBeginValue) reader.fail("Can't begin reading value from here") - return when (reader.tokenClass) { - TC_NULL -> JsonNull.also { reader.nextToken() } + return when (reader.peekNextToken()) { TC_STRING -> readValue(isString = true) TC_OTHER -> readValue(isString = false) TC_BEGIN_OBJ -> readObject() diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt index b1f82080ac..11e60d9905 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt @@ -4,15 +4,17 @@ package kotlinx.serialization.json.internal -import kotlinx.serialization.json.internal.CharMappings.C2TC +import kotlinx.serialization.json.internal.CharMappings.CHAR_TO_TOKEN import kotlinx.serialization.json.internal.CharMappings.ESCAPE_2_CHAR import kotlin.jvm.* internal const val lenientHint = "Use 'isLenient = true' in 'Json {}` builder to accept non-compliant JSON." internal const val coerceInputValuesHint = "Use 'coerceInputValues = true' in 'Json {}` builder to coerce nulls to default values." -internal const val specialFlowingValuesHint = "It is possible to deserialize them using 'JsonBuilder.allowSpecialFloatingPointValues = true'" +internal const val specialFlowingValuesHint = + "It is possible to deserialize them using 'JsonBuilder.allowSpecialFloatingPointValues = true'" internal const val ignoreUnknownKeysHint = "Use 'ignoreUnknownKeys = true' in 'Json {}' builder to ignore unknown keys." -internal const val allowStructuredMapKeysHint = "Use 'allowStructuredMapKeys = true' in 'Json {}' builder to convert such maps to [key1, value1, key2, value2,...] arrays." +internal const val allowStructuredMapKeysHint = + "Use 'allowStructuredMapKeys = true' in 'Json {}' builder to convert such maps to [key1, value1, key2, value2,...] arrays." // special strings internal const val NULL = "null" @@ -34,16 +36,15 @@ internal const val UNICODE_ESC = 'u' internal const val TC_OTHER: Byte = 0 internal const val TC_STRING: Byte = 1 internal const val TC_STRING_ESC: Byte = 2 -internal const val TC_WS: Byte = 3 +internal const val TC_WHITESPACE: Byte = 3 internal const val TC_COMMA: Byte = 4 internal const val TC_COLON: Byte = 5 internal const val TC_BEGIN_OBJ: Byte = 6 internal const val TC_END_OBJ: Byte = 7 internal const val TC_BEGIN_LIST: Byte = 8 internal const val TC_END_LIST: Byte = 9 -internal const val TC_NULL: Byte = 10 -internal const val TC_INVALID: Byte = 11 -internal const val TC_EOF: Byte = 12 +internal const val TC_EOF: Byte = 10 +internal const val TC_INVALID: Byte = Byte.MAX_VALUE // mapping from chars to token classes private const val CTC_MAX = 0x7e @@ -51,13 +52,19 @@ private const val CTC_MAX = 0x7e // mapping from escape chars real chars private const val ESC2C_MAX = 0x75 +/* + * In ASCII representation, upper and lower case letters are different + * in 6-th bit and we leverage this fact + */ +private const val asciiCaseMask = 1 shl 5 + // object instead of @SharedImmutable because there is mutual initialization in [initC2ESC] and [initC2TC] internal object CharMappings { @JvmField val ESCAPE_2_CHAR = CharArray(ESC2C_MAX) @JvmField - val C2TC = ByteArray(CTC_MAX) + val CHAR_TO_TOKEN = ByteArray(CTC_MAX) init { initEscape() @@ -84,10 +91,10 @@ internal object CharMappings { initC2TC(i, TC_INVALID) } - initC2TC(0x09, TC_WS) - initC2TC(0x0a, TC_WS) - initC2TC(0x0d, TC_WS) - initC2TC(0x20, TC_WS) + initC2TC(0x09, TC_WHITESPACE) + initC2TC(0x0a, TC_WHITESPACE) + initC2TC(0x0d, TC_WHITESPACE) + initC2TC(0x20, TC_WHITESPACE) initC2TC(COMMA, TC_COMMA) initC2TC(COLON, TC_COLON) initC2TC(BEGIN_OBJ, TC_BEGIN_OBJ) @@ -105,13 +112,13 @@ internal object CharMappings { private fun initC2ESC(c: Char, esc: Char) = initC2ESC(c.toInt(), esc) private fun initC2TC(c: Int, cl: Byte) { - C2TC[c] = cl + CHAR_TO_TOKEN[c] = cl } private fun initC2TC(c: Char, cl: Byte) = initC2TC(c.toInt(), cl) } -internal fun charToTokenClass(c: Char) = if (c.toInt() < CTC_MAX) C2TC[c.toInt()] else TC_OTHER +internal fun charToTokenClass(c: Char) = if (c.toInt() < CTC_MAX) CHAR_TO_TOKEN[c.toInt()] else TC_OTHER internal fun escapeToChar(c: Int): Char = if (c < ESC2C_MAX) ESCAPE_2_CHAR[c] else INVALID @@ -121,139 +128,169 @@ internal class JsonReader(private val source: String) { @JvmField var currentPosition: Int = 0 // position in source - @JvmField - var tokenClass: Byte = TC_EOF - - public val isDone: Boolean get() = tokenClass == TC_EOF + // TODO this one should be built-in assert + public val isDone: Boolean get() = consumeNextToken() == TC_EOF - public val canBeginValue: Boolean - get() = when (tokenClass) { - TC_BEGIN_LIST, TC_BEGIN_OBJ, TC_OTHER, TC_STRING, TC_NULL -> true - else -> false + fun tryConsumeComma(): Boolean { + val current = skipWhitespaces() + if (current == source.length) return false + if (source[current] == ',') { + ++currentPosition + return true } + return false + } - // updated by nextToken - private var tokenPosition: Int = 0 + fun canConsumeValue(): Boolean { + var current = currentPosition + while (current < source.length) { + val c = source[current] + if (c == ' ' || c == '\n' || c == '\r' || c == '\t') { + ++current + continue + } + val tc = charToTokenClass(c) + currentPosition = current + return tc == TC_STRING || tc == TC_OTHER || tc == TC_BEGIN_LIST || tc == TC_BEGIN_OBJ + } + currentPosition = current + return false + } - // update by nextString/nextLiteral - private var offset = -1 // when offset >= 0 string is in source, otherwise in buf + /* + * Peeked string for coerced enums. + * If the value was picked, 'consumeString' will take it without scanning the source. + */ + private var peekedString: String? = null private var length = 0 // length of string private var buf = CharArray(16) // only used for strings with escapes - init { - nextToken() - } - - internal inline fun requireTokenClass(expected: Byte, errorMessage: (Char) -> String) { - if (tokenClass != expected) fail(errorMessage(tokenClass.toChar()), tokenPosition) + fun consumeNextToken(expected: Byte): Byte { + val token = consumeNextToken() + if (token != expected) { + fail(expected) + } + return token } - fun takeString(): String { - if (tokenClass != TC_OTHER && tokenClass != TC_STRING) fail( - "Expected string or non-null literal", tokenPosition - ) - return takeStringInternal() - } + private fun fail(expectedToken: Byte) { + // We know that the token was consumed prior to this call + // Slow path, never called in normal code, can avoid optimizing it + val expected = when (expectedToken) { + TC_STRING -> "quotation mark '\"'" + TC_COMMA -> "comma ','" + TC_COLON -> "semicolon ':'" + TC_BEGIN_OBJ -> "start of the object '{'" + TC_END_OBJ -> "end of the object '}'" + TC_BEGIN_LIST -> "start of the array '['" + TC_END_LIST -> "end of the array ']'" + else -> "valid token" // should never happen + } - fun peekString(isLenient: Boolean): String? { - return if (tokenClass != TC_STRING && (!isLenient || tokenClass != TC_OTHER)) null - else takeStringInternal(advance = false) + fail("Expected $expected, but had '${source[currentPosition - 1]}' instead", currentPosition) } - fun takeStringQuoted(): String { - when (tokenClass) { - TC_STRING -> {} // ok - TC_NULL -> fail( - "Expected string literal but 'null' literal was found.\n$coerceInputValuesHint", - tokenPosition - ) - else -> fail( - "Expected string literal with quotes.\n$lenientHint", - tokenPosition - ) + fun peekNextToken(): Byte { + val source = source + while (currentPosition < source.length) { + val ch = source[currentPosition] + return when (val tc = charToTokenClass(ch)) { + TC_WHITESPACE -> { + ++currentPosition + continue + } + else -> tc + } } - return takeStringInternal() + return TC_EOF } - fun takeBooleanStringUnquoted(): String { - if (tokenClass != TC_OTHER) fail("Expected start of the unquoted boolean literal.\n$lenientHint", tokenPosition) - return takeStringInternal() + fun consumeNextToken(): Byte { + val source = source + while (currentPosition < source.length) { + val ch = source[currentPosition++] + return when (val tc = charToTokenClass(ch)) { + TC_WHITESPACE -> continue + else -> tc + } + } + return TC_EOF } - private fun takeStringInternal(advance: Boolean = true): String { - val prevStr = if (offset < 0) - buf.concatToString(0, 0 + length) else - source.substring(offset, offset + length) - if (advance) nextToken() - return prevStr + /** + * Tries to consume `null` token from input. + * Returns `true` if the next 4 chars in input are not `null`, + * `false` otherwise and consumes it. + */ + fun tryConsumeNotNull(): Boolean { + val current = skipWhitespaces() + // Cannot consume null due to EOF, maybe something else + if (source.length - current < 4) return true + for (i in 0..3) { + if (NULL[i] != source[current + i]) return true + } + currentPosition = current + 4 + return false } - private fun append(ch: Char) { - if (length >= buf.size) buf = buf.copyOf(2 * buf.size) - buf[length++] = ch + private fun skipWhitespaces(): Int { + var current = currentPosition + // Skip whitespaces + while (current < source.length) { + val c = source[current] + // Faster than char2TokenClass actually + if (c == ' ' || c == '\n' || c == '\r' || c == '\t') { + ++current + } else { + break + } + } + currentPosition = current + return current } - // initializes buf usage upon the first encountered escaped char - private fun appendRange(source: String, fromIndex: Int, toIndex: Int) { - val addLen = toIndex - fromIndex - val oldLen = length - val newLen = oldLen + addLen - if (newLen > buf.size) buf = buf.copyOf(newLen.coerceAtLeast(2 * buf.size)) - for (i in 0 until addLen) buf[oldLen + i] = source[fromIndex + i] - length += addLen + fun peekString(isLenient: Boolean): String? { + val token = peekNextToken() + val string = if (isLenient) { + if (token != TC_STRING && token != TC_OTHER) return null + consumeStringLenient() + } else { + if (token != TC_STRING) return null + consumeString() + } + peekedString = string + return string } - fun nextToken() { - val source = source - var currentPosition = currentPosition - while (currentPosition < source.length) { - val ch = source[currentPosition] - when (val tc = charToTokenClass(ch)) { - TC_WS -> currentPosition++ // skip whitespace - TC_OTHER -> { - nextLiteral(source, currentPosition) - return - } - TC_STRING -> { - nextString(source, currentPosition) - return - } - else -> { - this.tokenPosition = currentPosition - this.tokenClass = tc - this.currentPosition = currentPosition + 1 - return - } - } + private fun failBeginningOfTheString() { + // Try to guess if it's null for better error message + --currentPosition + if (!tryConsumeNotNull()) { + fail("Expected string literal but 'null' literal was found.\n$coerceInputValuesHint", currentPosition - 4) + } else { + val symbol = if (++currentPosition == source.length) "EOF" else source[currentPosition] + fail("Expected string literal but had $symbol instead") } - - tokenPosition = currentPosition - tokenClass = TC_EOF } - private fun nextLiteral(source: String, startPos: Int) { - tokenPosition = startPos - offset = startPos - var currentPosition = startPos - while (currentPosition < source.length && charToTokenClass(source[currentPosition]) == TC_OTHER) { - currentPosition++ + fun consumeString(): String { + if (peekedString != null) { + return takePeeked() } - this.currentPosition = currentPosition - length = currentPosition - offset - tokenClass = if (rangeEquals(source, offset, length, NULL)) TC_NULL else TC_OTHER - } - private fun nextString(source: String, startPosition: Int) { - tokenPosition = startPosition - length = 0 // in buffer - var currentPosition = startPosition + 1 // skip starting " - // except if the input ends + if (consumeNextToken() != TC_STRING) { + failBeginningOfTheString() + } + var currentPosition = currentPosition if (currentPosition >= source.length) { fail("EOF", currentPosition) } + val startPosition = currentPosition - 1 var lastPosition = currentPosition - while (source[currentPosition] != STRING) { - if (source[currentPosition] == STRING_ESC) { + length = 0 + var char = source[currentPosition] // Avoid two double checks visible in the profiler + while (char != STRING) { + if (char == STRING_ESC) { appendRange(source, lastPosition, currentPosition) val newPosition = appendEsc(source, currentPosition + 1) currentPosition = newPosition @@ -261,18 +298,82 @@ internal class JsonReader(private val source: String) { } else if (++currentPosition >= source.length) { fail("EOF", currentPosition) } + char = source[currentPosition] } - if (lastPosition == startPosition + 1) { + + val string = if (lastPosition == startPosition + 1) { // there was no escaped chars - offset = lastPosition - this.length = currentPosition - lastPosition + source.substring(lastPosition, currentPosition) } else { // some escaped chars were there appendRange(source, lastPosition, currentPosition) - this.offset = -1 + buf.concatToString(0, length) } this.currentPosition = currentPosition + 1 - tokenClass = TC_STRING + return string + } + + private fun takePeeked(): String { + return peekedString!!.also { peekedString = null } + } + + /* + * This method is a copy of consumeString, but used for key of json objects. + * For them we know that escape symbols are _very_ unlikely and can optimistically do + * quotation lookup via `indexOf` (which is a vectorized intrinsic), then substring and + * `indexOf` for escape symbol. It works almost 20% faster for both large and small JSON strings. + */ + fun consumeKeyString(): String { + consumeNextToken(TC_STRING) + val current = currentPosition + val closingQuote = source.indexOf('"', current) + if (closingQuote == -1) fail(TC_STRING) // Better error message? + // TODO explain + for (i in current until closingQuote) { + // Encountered escape sequence, should fallback to "slow" path + if (source[i] == '\\') TODO() + } + this.currentPosition = closingQuote + 1 + return source.substring(current, closingQuote) + } + + // Allows to consume unquoted string + fun consumeStringLenient(): String { + if (peekedString != null) { + return takePeeked() + } + var current = skipWhitespaces() + // Skip leading quotation mark + val token = charToTokenClass(source[current]) + if (token == TC_STRING) { + return consumeString() + } + + if (token != TC_OTHER) { + fail("Expected beginning of the string, but got ${source[current]}") + } + while (current < source.length && charToTokenClass(source[current]) == TC_OTHER) { + ++current + } + val result = source.substring(currentPosition, current) + // Skip trailing quotation + currentPosition = current + return result + } + + private fun append(ch: Char) { + if (length >= buf.size) buf = buf.copyOf(2 * buf.size) + buf[length++] = ch + } + + // initializes buf usage upon the first encountered escaped char + private fun appendRange(source: String, fromIndex: Int, toIndex: Int) { + val addLength = toIndex - fromIndex + val oldLength = length + val newLength = oldLength + addLength + if (newLength > buf.size) buf = buf.copyOf(newLength.coerceAtLeast(2 * buf.size)) + for (i in 0 until addLength) buf[oldLength + i] = source[fromIndex + i] + length += addLength } private fun appendEsc(source: String, startPosition: Int): Int { @@ -301,14 +402,18 @@ internal class JsonReader(private val source: String) { } fun skipElement() { - if (tokenClass != TC_BEGIN_OBJ && tokenClass != TC_BEGIN_LIST) { - nextToken() + val tokenStack = mutableListOf() + var lastToken = peekNextToken() + if (lastToken != TC_BEGIN_LIST && lastToken != TC_BEGIN_OBJ) { + consumeStringLenient() return } - val tokenStack = mutableListOf() - do { - when (tokenClass) { - TC_BEGIN_LIST, TC_BEGIN_OBJ -> tokenStack.add(tokenClass) + while (true) { + lastToken = consumeNextToken() + when (lastToken) { + TC_BEGIN_LIST, TC_BEGIN_OBJ -> { + tokenStack.add(lastToken) + } TC_END_LIST -> { if (tokenStack.last() != TC_BEGIN_LIST) throw JsonDecodingException( currentPosition, @@ -316,6 +421,7 @@ internal class JsonReader(private val source: String) { source ) tokenStack.removeAt(tokenStack.size - 1) + if (tokenStack.size == 0) return } TC_END_OBJ -> { if (tokenStack.last() != TC_BEGIN_OBJ) throw JsonDecodingException( @@ -324,17 +430,25 @@ internal class JsonReader(private val source: String) { source ) tokenStack.removeAt(tokenStack.size - 1) + if (tokenStack.size == 0) return } } - nextToken() - } while (tokenStack.isNotEmpty()) + } } override fun toString(): String { - return "JsonReader(source='$source', currentPosition=$currentPosition, tokenClass=$tokenClass, tokenPosition=$tokenPosition, offset=$offset)" + return "JsonReader(source='$source', currentPosition=$currentPosition)" + } + + fun failOnUnknownKey(key: String) { + // At this moment we already have both key and semicolon (and whitespaces! consumed), + // but still would like an error to point to the beginning of the key, so we are backtracking it + val processed = source.substring(0, currentPosition) + val lastIndexOf = processed.lastIndexOf(key) + fail("Encountered an unknown key '$key'.\n$ignoreUnknownKeysHint", lastIndexOf) } - public fun fail(message: String, position: Int = currentPosition): Nothing { + fun fail(message: String, position: Int = currentPosition): Nothing { throw JsonDecodingException(position, message, source) } @@ -351,11 +465,54 @@ internal class JsonReader(private val source: String) { else -> fail("Invalid toHexChar char '$curChar' in unicode escape") } } -} -private fun rangeEquals(source: String, start: Int, length: Int, str: String): Boolean { - val n = str.length - if (length != n) return false - for (i in 0 until n) if (source[start + i] != str[i]) return false - return true + // fun consumeBoolean(allowQuotation: Boolean): Boolean { +// skipWhitespaces() +// var current = currentPosition +//// var hasQuote = false +//// if (allowQuotation && source[current] == STRING) { +//// hasQuote = true +//// ++current +//// } +// +// // TODO handle EOF +// val result = when (source[current++].toInt() or asciiCaseMask) { +// 't'.toInt() -> { +// if (source.length - current < 3) fail("") +// val r = source[current + 0].toInt() or asciiCaseMask == 'r'.toInt() +// val u = source[current + 1].toInt() or asciiCaseMask == 'u'.toInt() +// val e = source[current + 2].toInt() or asciiCaseMask == 'e'.toInt() +// if (!(r and u and e)) fail("") +// +//// for ((i, c) in "rue".withIndex()) { +//// if (c.toInt() != source[current + i].toInt() or asciiCaseMask) { +//// fail("") +//// } +//// } +// currentPosition += 4 +// true +// } +// 'f'.toInt() -> { +// if (source.length - current < 4) fail("") +// val a = source[current + 0].toInt() or asciiCaseMask == 'a'.toInt() +// val l = source[current + 1].toInt() or asciiCaseMask == 'l'.toInt() +// val s = source[current + 2].toInt() or asciiCaseMask == 's'.toInt() +// val e = source[current + 3].toInt() or asciiCaseMask == 'e'.toInt() +// if (!(a and l and s and e)) fail("") +//// for ((i, c) in "alse".withIndex()) { +//// if (c.toInt() != source[current + i].toInt() or asciiCaseMask) { +//// fail("") +//// } +//// } +// currentPosition += 5 +// false +// } +// else -> TODO() +// } +// +//// if (hasQuote) { +//// +//// } +// return result +// } } diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt index cfc2726c3d..fd095ce54d 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt @@ -16,17 +16,17 @@ import kotlin.jvm.* * [JsonDecoder] which reads given JSON from [JsonReader] field by field. */ @OptIn(ExperimentalSerializationApi::class, ExperimentalUnsignedTypes::class) -internal open class StreamingJsonDecoder internal constructor( - public override val json: Json, +internal open class StreamingJsonDecoder( + final override val json: Json, private val mode: WriteMode, @JvmField internal val reader: JsonReader ) : JsonDecoder, AbstractDecoder() { - public override val serializersModule: SerializersModule = json.serializersModule + override val serializersModule: SerializersModule = json.serializersModule private var currentIndex = -1 private val configuration = json.configuration - public override fun decodeJsonElement(): JsonElement = JsonParser(json.configuration, reader).read() + override fun decodeJsonElement(): JsonElement = JsonParser(json.configuration, reader).read() override fun decodeSerializableValue(deserializer: DeserializationStrategy): T { return decodeSerializableValuePolymorphic(deserializer) @@ -34,73 +34,70 @@ internal open class StreamingJsonDecoder internal constructor( override fun beginStructure(descriptor: SerialDescriptor): CompositeDecoder { val newMode = json.switchMode(descriptor) - if (newMode.begin != INVALID) { - reader.requireTokenClass(newMode.beginTc) { "Expected '${newMode.begin}, kind: ${descriptor.kind}'" } - reader.nextToken() - } + reader.consumeNextToken(newMode.beginTc) + checkLeadingComma() return when (newMode) { + // In fact resets current index that these modes rely on WriteMode.LIST, WriteMode.MAP, WriteMode.POLY_OBJ -> StreamingJsonDecoder( json, newMode, reader - ) // need fresh cur index - else -> if (mode == newMode) this else - StreamingJsonDecoder(json, newMode, reader) // todo: reuse instance per mode + ) + else -> if (mode == newMode) { + this + } else { + StreamingJsonDecoder(json, newMode, reader) + } } } override fun endStructure(descriptor: SerialDescriptor) { - if (mode.end != INVALID) { - reader.requireTokenClass(mode.endTc) { "Expected '${mode.end}'" } - reader.nextToken() - } + reader.consumeNextToken(mode.endTc) } override fun decodeNotNullMark(): Boolean { - return reader.tokenClass != TC_NULL + return reader.tryConsumeNotNull() } override fun decodeNull(): Nothing? { - reader.requireTokenClass(TC_NULL) { "Expected 'null' literal" } - reader.nextToken() + // Do nothing, null was consumed return null } - override fun decodeElementIndex(descriptor: SerialDescriptor): Int { - val tokenClass = reader.tokenClass - if (tokenClass == TC_COMMA) { - reader.require(currentIndex != -1, reader.currentPosition) { "Unexpected leading comma" } - reader.nextToken() + private fun checkLeadingComma() { + if (reader.peekNextToken() == TC_COMMA) { + reader.fail("Unexpected leading comma") } + } + + override fun decodeElementIndex(descriptor: SerialDescriptor): Int { return when (mode) { - WriteMode.LIST -> decodeListIndex(tokenClass) - WriteMode.MAP -> decodeMapIndex(tokenClass) - WriteMode.POLY_OBJ -> { - when (++currentIndex) { - 0 -> 0 - 1 -> 1 - else -> { - CompositeDecoder.DECODE_DONE - } - } - } - else -> decodeObjectIndex(tokenClass, descriptor) + WriteMode.OBJ -> decodeObjectIndex(descriptor) + WriteMode.MAP -> decodeMapIndex() + else -> decodeListIndex() // Both for LIST and default polymorphic } } - private fun decodeMapIndex(tokenClass: Byte): Int { - if (tokenClass != TC_COMMA && currentIndex % 2 == 1) { - reader.requireTokenClass(TC_END_OBJ) { "Expected end of the object or comma" } - } - if (currentIndex % 2 == 0) { - reader.requireTokenClass(TC_COLON) { "Expected ':' after the key" } - reader.nextToken() - } - return if (!reader.canBeginValue) { - reader.require(tokenClass != TC_COMMA) { "Unexpected trailing comma" } - CompositeDecoder.DECODE_DONE + private fun decodeMapIndex(): Int { + var hasComma = false + val decodingKey = currentIndex % 2 != 0 + if (decodingKey) { + if (currentIndex != -1) { + hasComma = reader.tryConsumeComma() + } } else { + reader.consumeNextToken(TC_COLON) + } + + return if (reader.canConsumeValue()) { + if (decodingKey) { + if (currentIndex == -1) reader.require(!hasComma) { "Unexpected trailing comma" } + else reader.require(hasComma) { "Expected comma after the key-value pair" } + } ++currentIndex + } else { + if (hasComma) reader.fail("Expected '}', but had ',' instead") + CompositeDecoder.DECODE_DONE } } @@ -109,73 +106,79 @@ internal open class StreamingJsonDecoder internal constructor( */ private fun coerceInputValue(descriptor: SerialDescriptor, index: Int): Boolean { val elementDescriptor = descriptor.getElementDescriptor(index) - if (reader.tokenClass == TC_NULL && !elementDescriptor.isNullable) return true // null for non-nullable + if (!elementDescriptor.isNullable && !reader.tryConsumeNotNull()) return true if (elementDescriptor.kind == SerialKind.ENUM) { val enumValue = reader.peekString(configuration.isLenient) - ?: return false // if value is not a string, decodeEnum() will throw correct exception + ?: return false // if value is not a string, decodeEnum() will throw correct exception val enumIndex = elementDescriptor.getElementIndex(enumValue) - if (enumIndex == UNKNOWN_NAME) return true + if (enumIndex == UNKNOWN_NAME) { + // Encountered unknown enum value, have to skip it + reader.consumeString() + return true + } } return false } - private fun decodeObjectIndex(tokenClass: Byte, descriptor: SerialDescriptor): Int { - if (tokenClass == TC_COMMA && !reader.canBeginValue) { - reader.fail("Unexpected trailing comma") - } - - while (reader.canBeginValue) { - ++currentIndex - val key = decodeString() - reader.requireTokenClass(TC_COLON) { "Expected ':'" } - reader.nextToken() + private fun decodeObjectIndex(descriptor: SerialDescriptor): Int { + // hasComma checks are required to properly react on trailing commas + var hasComma = reader.tryConsumeComma() + while (reader.canConsumeValue()) { // TODO: consider merging comma consumption and this check + hasComma = false + val key = decodeStringKey() + reader.consumeNextToken(TC_COLON) val index = descriptor.getElementIndex(key) val isUnknown = if (index != UNKNOWN_NAME) { if (configuration.coerceInputValues && coerceInputValue(descriptor, index)) { - false // skip known element + hasComma = reader.tryConsumeComma() + false // Known element, but coerced } else { - return index // read known element + return index // Known element without coercing, return it } } else { true // unknown element } - if (isUnknown && !configuration.ignoreUnknownKeys) { - reader.fail("Encountered an unknown key '$key'.\n$ignoreUnknownKeysHint") - } else { - reader.skipElement() - } - - if (reader.tokenClass == TC_COMMA) { - reader.nextToken() - reader.require(reader.canBeginValue, reader.currentPosition) { "Unexpected trailing comma" } + if (isUnknown) { // slow-path for unknown keys handling + hasComma = handleUnknown(key) } } + if (hasComma) reader.fail("Unexpected trailing comma") return CompositeDecoder.DECODE_DONE } - private fun decodeListIndex(tokenClass: Byte): Int { - // Prohibit leading comma - if (tokenClass != TC_COMMA && currentIndex != -1) { - reader.requireTokenClass(TC_END_LIST) { "Expected end of the array or comma" } - } - return if (!reader.canBeginValue) { - reader.require(tokenClass != TC_COMMA) { "Unexpected trailing comma" } - CompositeDecoder.DECODE_DONE + private fun handleUnknown(key: String): Boolean { + if (configuration.ignoreUnknownKeys) { + reader.skipElement() } else { + reader.failOnUnknownKey(key) + } + return reader.tryConsumeComma() + } + + private fun decodeListIndex(): Int { + // Prohibit leading comma + val hasComma = reader.tryConsumeComma() + return if (reader.canConsumeValue()) { + if (currentIndex != -1 && !hasComma) reader.fail("Expected end of the array or comma") ++currentIndex + } else { + if (hasComma) reader.fail("Unexpected trailing comma") + CompositeDecoder.DECODE_DONE } } + override fun decodeBoolean(): Boolean { /* * We prohibit non true/false boolean literals at all as it is considered way too error-prone, * but allow quoted literal in relaxed mode for booleans. */ val string = if (configuration.isLenient) { - reader.takeString() + reader.consumeStringLenient() } else { - reader.takeBooleanStringUnquoted() + // TODO _SHOULD_ be ONLY unquoted + reader.consumeStringLenient() } string.toBooleanStrictOrNull()?.let { return it } reader.fail("Failed to parse type 'boolean' for input '$string'") @@ -206,11 +209,19 @@ internal open class StreamingJsonDecoder internal constructor( override fun decodeChar(): Char = reader.parseString("char") { single() } + private fun decodeStringKey(): String { + return if (configuration.isLenient) { + reader.consumeStringLenient() + } else { + reader.consumeKeyString() + } + } + override fun decodeString(): String { return if (configuration.isLenient) { - reader.takeString() + reader.consumeStringLenient() } else { - reader.takeStringQuoted() + reader.consumeString() } } @@ -239,7 +250,7 @@ internal class JsonDecoderForUnsignedTypes( } private inline fun JsonReader.parseString(expectedType: String, block: String.() -> T): T { - val input = takeString() + val input = consumeStringLenient() try { return input.block() } catch (e: IllegalArgumentException) { diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/TreeJsonDecoder.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/TreeJsonDecoder.kt index 4f357756c4..993b9676ac 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/TreeJsonDecoder.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/TreeJsonDecoder.kt @@ -279,9 +279,6 @@ private class JsonTreeListDecoder(json: Json, override val value: JsonArray) : A } } -internal const val updateModeDeprecated = "Update mode in Decoder is deprecated for removal. " + - "Update behaviour is now considered an implementation detail of the format that should not concern serializer." - /** * Same as [SerialDescriptor.getElementIndex], but throws [SerializationException] if * given [name] is not associated with any element in the descriptor. diff --git a/formats/json/commonTest/src/kotlinx/serialization/json/JsonParserTest.kt b/formats/json/commonTest/src/kotlinx/serialization/json/JsonParserTest.kt index fc8d3383a8..9b23b4ce4c 100644 --- a/formats/json/commonTest/src/kotlinx/serialization/json/JsonParserTest.kt +++ b/formats/json/commonTest/src/kotlinx/serialization/json/JsonParserTest.kt @@ -70,11 +70,10 @@ class JsonParserTest : JsonTestBase() { testTrailingComma("{\"id\":0 , ,}") } - private fun testTrailingComma(content: String) { val e = assertFailsWith { Json.parseToJsonElement(content) } val msg = e.message!! - assertTrue(msg.contains("Expected end of the object")) + assertTrue(msg.contains("Unexpected trailing")) } @Test diff --git a/formats/json/commonTest/src/kotlinx/serialization/json/JsonUseDefaultOnNullAndUnknownTest.kt b/formats/json/commonTest/src/kotlinx/serialization/json/JsonUseDefaultOnNullAndUnknownTest.kt index d38d1d49d8..402479786e 100644 --- a/formats/json/commonTest/src/kotlinx/serialization/json/JsonUseDefaultOnNullAndUnknownTest.kt +++ b/formats/json/commonTest/src/kotlinx/serialization/json/JsonUseDefaultOnNullAndUnknownTest.kt @@ -98,5 +98,4 @@ class JsonUseDefaultOnNullAndUnknownTest : JsonTestBase() { assertEquals(expected, json.decodeFromString(MultipleValues.serializer(), input), "Failed on input: $input") } } - } diff --git a/formats/json/jvmTest/src/kotlinx/serialization/json/GsonCompatibilityTest.kt b/formats/json/jvmTest/src/kotlinx/serialization/json/GsonCompatibilityTest.kt index dcf06f12ae..99bc23f935 100644 --- a/formats/json/jvmTest/src/kotlinx/serialization/json/GsonCompatibilityTest.kt +++ b/formats/json/jvmTest/src/kotlinx/serialization/json/GsonCompatibilityTest.kt @@ -1,45 +1,56 @@ package kotlinx.serialization.json +import com.google.gson.* import kotlinx.serialization.* -import kotlinx.serialization.descriptors.* -import kotlinx.serialization.encoding.* -import org.junit.* +import org.junit.Test +import kotlin.test.* class GsonCompatibilityTest { - @Serializable(with = ValueSerializer::class) - data class Value(val isSet: Boolean, val value: T?) + @Serializable + data class Box(val d: Double, val f: Float) -class ValueSerializer(private val dataSerializer: KSerializer) : KSerializer> { - override val descriptor: SerialDescriptor = PrimitiveSerialDescriptor("Value", PrimitiveKind.STRING).nullable + @Test + fun testNaN() { + checkCompatibility(Box(Double.NaN, 1.0f)) + checkCompatibility(Box(1.0, Float.NaN)) + checkCompatibility(Box(Double.NaN, Float.NaN)) + } - override fun serialize(encoder: Encoder, value: Value) { - encoder.encodeNullableSerializableValue(dataSerializer, value.value) + @Test + fun testInfinity() { + checkCompatibility(Box(Double.POSITIVE_INFINITY, 1.0f)) + checkCompatibility(Box(1.0, Float.POSITIVE_INFINITY)) + checkCompatibility(Box(Double.NEGATIVE_INFINITY, Float.NEGATIVE_INFINITY)) } - override fun deserialize(decoder: Decoder) = TODO("Not implemented!") -} + @Test + fun testNumber() { + checkCompatibility(Box(23.9, 23.9f)) + } - class ValueClassSerializer(private val dataSerializer: KSerializer) : - JsonTransformingSerializer(dataSerializer) { - override fun transformSerialize(element: JsonElement): JsonElement = - element.jsonObject + private fun checkCompatibility(box: Box) { + checkCompatibility(box, Gson(), Json) + checkCompatibility(box, GsonBuilder().serializeSpecialFloatingPointValues().create(), Json { allowSpecialFloatingPointValues = true }) } - @Serializable - data class TestObject( - val test1: Value = Value(true, "Hello World"), - val test2: Value = Value(false, null), - val test3: Value = Value(true, null), - ) + private fun checkCompatibility(box: Box, gson: Gson, json: Json) { + val jsonResult = resultOrNull { json.encodeToString(box) } + val gsonResult = resultOrNull { gson.toJson(box) } + assertEquals(gsonResult, jsonResult) - @Test - fun f() { - println( - Json { encodeDefaults = true }.encodeToString( - ValueClassSerializer(TestObject.serializer()), - TestObject() - ) - ) + if (jsonResult != null && gsonResult != null) { + val jsonDeserialized: Box = json.decodeFromString(jsonResult) + val gsonDeserialized: Box = gson.fromJson(gsonResult, Box::class.java) + assertEquals(gsonDeserialized, jsonDeserialized) + } + } + + private fun resultOrNull(function: () -> String): String? { + return try { + function() + } catch (t: Throwable) { + null + } } } diff --git a/gradle.properties b/gradle.properties index 2f2b1f6d51..dd487a3f49 100644 --- a/gradle.properties +++ b/gradle.properties @@ -5,7 +5,7 @@ group=org.jetbrains.kotlinx version=1.1.0-RC -kotlin.version=1.4.30-270 +kotlin.version=1.4.30 # This version take precedence if 'bootstrap' property passed to project kotlin.version.snapshot=1.4.255-SNAPSHOT diff --git a/guide/test/BasicSerializationTest.kt b/guide/test/BasicSerializationTest.kt index 62b90f4075..4f8871b947 100644 --- a/guide/test/BasicSerializationTest.kt +++ b/guide/test/BasicSerializationTest.kt @@ -79,7 +79,7 @@ class BasicSerializationTest { @Test fun testExampleClasses08() { captureOutput("ExampleClasses08") { example.exampleClasses08.main() }.verifyOutputLinesStart( - "Exception in thread \"main\" kotlinx.serialization.json.internal.JsonDecodingException: Unexpected JSON token at offset 60: Encountered an unknown key 'language'.", + "Exception in thread \"main\" kotlinx.serialization.json.internal.JsonDecodingException: Unexpected JSON token at offset 42: Encountered an unknown key 'language'.", "Use 'ignoreUnknownKeys = true' in 'Json {}' builder to ignore unknown keys." ) } From 36d96524fd9720ea3c442c5b46694f4ea295a018 Mon Sep 17 00:00:00 2001 From: Vsevolod Tolstopyatov Date: Wed, 17 Feb 2021 17:04:08 +0300 Subject: [PATCH 02/11] Optimize boolean consumption (+40% on boolean stress benchmark) --- .../serialization/json/internal/JsonReader.kt | 115 ++++++++++-------- .../json/internal/StreamingJsonDecoder.kt | 9 +- 2 files changed, 65 insertions(+), 59 deletions(-) diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt index 11e60d9905..399532198a 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt @@ -52,10 +52,6 @@ private const val CTC_MAX = 0x7e // mapping from escape chars real chars private const val ESC2C_MAX = 0x75 -/* - * In ASCII representation, upper and lower case letters are different - * in 6-th bit and we leverage this fact - */ private const val asciiCaseMask = 1 shl 5 // object instead of @SharedImmutable because there is mutual initialization in [initC2ESC] and [initC2TC] @@ -466,53 +462,66 @@ internal class JsonReader(private val source: String) { } } - // fun consumeBoolean(allowQuotation: Boolean): Boolean { -// skipWhitespaces() -// var current = currentPosition -//// var hasQuote = false -//// if (allowQuotation && source[current] == STRING) { -//// hasQuote = true -//// ++current -//// } -// -// // TODO handle EOF -// val result = when (source[current++].toInt() or asciiCaseMask) { -// 't'.toInt() -> { -// if (source.length - current < 3) fail("") -// val r = source[current + 0].toInt() or asciiCaseMask == 'r'.toInt() -// val u = source[current + 1].toInt() or asciiCaseMask == 'u'.toInt() -// val e = source[current + 2].toInt() or asciiCaseMask == 'e'.toInt() -// if (!(r and u and e)) fail("") -// -//// for ((i, c) in "rue".withIndex()) { -//// if (c.toInt() != source[current + i].toInt() or asciiCaseMask) { -//// fail("") -//// } -//// } -// currentPosition += 4 -// true -// } -// 'f'.toInt() -> { -// if (source.length - current < 4) fail("") -// val a = source[current + 0].toInt() or asciiCaseMask == 'a'.toInt() -// val l = source[current + 1].toInt() or asciiCaseMask == 'l'.toInt() -// val s = source[current + 2].toInt() or asciiCaseMask == 's'.toInt() -// val e = source[current + 3].toInt() or asciiCaseMask == 'e'.toInt() -// if (!(a and l and s and e)) fail("") -//// for ((i, c) in "alse".withIndex()) { -//// if (c.toInt() != source[current + i].toInt() or asciiCaseMask) { -//// fail("") -//// } -//// } -// currentPosition += 5 -// false -// } -// else -> TODO() -// } -// -//// if (hasQuote) { -//// -//// } -// return result -// } + fun consumeBoolean(): Boolean { + return consumeBoolean(skipWhitespaces()) + } + + fun consumeBooleanLenient(): Boolean { + var current = skipWhitespaces() + if (current == source.length) fail("EOF") + val hasQuotation = if (source[current] == STRING) { + ++current + true + } else { + false + } + val result = consumeBoolean(current) + if (hasQuotation) { + if (currentPosition == source.length) fail("EOF") + if (source[currentPosition] != STRING) + fail("Expected closing quotation mark") + ++currentPosition + } + return result + } + + private fun consumeBoolean(start: Int): Boolean { + /* + * In ASCII representation, upper and lower case letters are different + * in 6-th bit and we leverage this fact, our implementation consumes boolean literals + * in a case-insensitive manner. + */ + var current = start + if (current == source.length) fail("EOF") + return when (source[current++].toInt() or asciiCaseMask) { + 't'.toInt() -> { + consumeBooleanLiteral("rue", current) + true + } + 'f'.toInt() -> { + consumeBooleanLiteral("alse", current) + false + } + else -> { + fail("Expected valid boolean literal prefix, but had ${source[current - 1]}") + } + } + } + + + private fun consumeBooleanLiteral(literalSuffix: String, current: Int) { + if (source.length - current < literalSuffix.length) { + fail("Unexpected end of boolean literal") + } + + for (i in literalSuffix.indices) { + val expected = literalSuffix[i] + val actual = source[current + i] + if (expected.toInt() != actual.toInt() or asciiCaseMask) { + fail("Expected valid boolean literal prefix, but had ${source.substring(current - 1, current - 1 + i)}") + } + } + + currentPosition = current + literalSuffix.length + } } diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt index fd095ce54d..ca0f24be24 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt @@ -174,14 +174,11 @@ internal open class StreamingJsonDecoder( * We prohibit non true/false boolean literals at all as it is considered way too error-prone, * but allow quoted literal in relaxed mode for booleans. */ - val string = if (configuration.isLenient) { - reader.consumeStringLenient() + return if (configuration.isLenient) { + reader.consumeBooleanLenient() } else { - // TODO _SHOULD_ be ONLY unquoted - reader.consumeStringLenient() + return reader.consumeBoolean() } - string.toBooleanStrictOrNull()?.let { return it } - reader.fail("Failed to parse type 'boolean' for input '$string'") } /* From e9d4d3efbae795ffe959a4da120da277a15fdf85 Mon Sep 17 00:00:00 2001 From: Vsevolod Tolstopyatov Date: Wed, 17 Feb 2021 18:42:55 +0300 Subject: [PATCH 03/11] Parse numeric literals in an allocation-free manner by manually iterating over the source --- .../serialization/json/internal/JsonReader.kt | 58 +++++++++++++++++-- .../json/internal/StreamingJsonDecoder.kt | 36 ++++++++++-- .../json/JsonParserFailureModesTest.kt | 2 + .../SerializerForNullableJavaTypeTest.kt | 2 +- 4 files changed, 86 insertions(+), 12 deletions(-) diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt index 399532198a..5ffc382321 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt @@ -124,7 +124,6 @@ internal class JsonReader(private val source: String) { @JvmField var currentPosition: Int = 0 // position in source - // TODO this one should be built-in assert public val isDone: Boolean get() = consumeNextToken() == TC_EOF fun tryConsumeComma(): Boolean { @@ -145,14 +144,20 @@ internal class JsonReader(private val source: String) { ++current continue } - val tc = charToTokenClass(c) currentPosition = current - return tc == TC_STRING || tc == TC_OTHER || tc == TC_BEGIN_LIST || tc == TC_BEGIN_OBJ + return isValidValueStart(c) } currentPosition = current return false } + private fun isValidValueStart(c: Char): Boolean { + return when (c) { + '}', ']', ':', ',' -> false + else -> true + } + } + /* * Peeked string for coerced enums. * If the value was picked, 'consumeString' will take it without scanning the source. @@ -182,7 +187,7 @@ internal class JsonReader(private val source: String) { TC_END_LIST -> "end of the array ']'" else -> "valid token" // should never happen } - + // TODO off-by-one? fail("Expected $expected, but had '${source[currentPosition - 1]}' instead", currentPosition) } @@ -462,6 +467,50 @@ internal class JsonReader(private val source: String) { } } + fun consumeNumericLiteral(): Long { + var current = skipWhitespaces() + val hasQuotation = if (source[current] == STRING) { + ++current + true + } else { + false + } + if (current == source.length) fail("EOF") + var accumulator = 0L + var isNegative = false + val start = current + var hasChars = true + while (hasChars) { + val ch: Char = source[current] + if (ch == '-') { + if (current != start) fail("Unexpected symbol '-' in numeric literal") + isNegative = true + ++current + continue + } + val token = charToTokenClass(ch) + if (token != TC_OTHER) break + ++current + hasChars = current != source.length + val digit = ch - '0' + if (digit !in 0..9) + fail("Unexpected symbol '$ch' in numeric literal") + accumulator = accumulator * 10 - digit + if (accumulator > 0) fail("Numeric value overflow") + } + if (start == current) { + fail("Expected numeric literal") + } + if (hasQuotation) { + if (!hasChars) fail("EOF") + if (source[current] != STRING) fail("Expected closing quotation mark") + ++current + } + currentPosition = current + return if (isNegative) accumulator else -accumulator + } + + fun consumeBoolean(): Boolean { return consumeBoolean(skipWhitespaces()) } @@ -508,7 +557,6 @@ internal class JsonReader(private val source: String) { } } - private fun consumeBooleanLiteral(literalSuffix: String, current: Int) { if (source.length - current < literalSuffix.length) { fail("Unexpected end of boolean literal") diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt index ca0f24be24..7d468f6d13 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt @@ -182,13 +182,33 @@ internal open class StreamingJsonDecoder( } /* - * The rest of the primitives are allowed to be quoted and unqouted + * The rest of the primitives are allowed to be quoted and unquoted * to simplify integrations with third-party API. */ - override fun decodeByte(): Byte = reader.parseString("byte") { toByte() } - override fun decodeShort(): Short = reader.parseString("short") { toShort() } - override fun decodeInt(): Int = reader.parseString("int") { toInt() } - override fun decodeLong(): Long = reader.parseString("long") { toLong() } + override fun decodeByte(): Byte { + val value = reader.consumeNumericLiteral() + // Check for overflow + if (value != value.toByte().toLong()) reader.fail("Failed to parse byte for input '$value'") + return value.toByte() + } + + override fun decodeShort(): Short { + val value = reader.consumeNumericLiteral() + // Check for overflow + if (value != value.toShort().toLong()) reader.fail("Failed to parse byte for input '$value'") + return value.toShort() + } + + override fun decodeInt(): Int { + val value = reader.consumeNumericLiteral() + // Check for overflow + if (value != value.toInt().toLong()) reader.fail("Failed to parse byte for input '$value'") + return value.toInt() + } + + override fun decodeLong(): Long { + return reader.consumeNumericLiteral() + } override fun decodeFloat(): Float { val result = reader.parseString("float") { toFloat() } @@ -204,7 +224,11 @@ internal open class StreamingJsonDecoder( reader.throwInvalidFloatingPointDecoded(result) } - override fun decodeChar(): Char = reader.parseString("char") { single() } + override fun decodeChar(): Char { + val string= reader.consumeStringLenient() + if (string.length != 1) reader.fail("Expected single char, but got '$string'") + return string[0] + } private fun decodeStringKey(): String { return if (configuration.isLenient) { diff --git a/formats/json/commonTest/src/kotlinx/serialization/json/JsonParserFailureModesTest.kt b/formats/json/commonTest/src/kotlinx/serialization/json/JsonParserFailureModesTest.kt index ce547ba3c0..f58281e6da 100644 --- a/formats/json/commonTest/src/kotlinx/serialization/json/JsonParserFailureModesTest.kt +++ b/formats/json/commonTest/src/kotlinx/serialization/json/JsonParserFailureModesTest.kt @@ -122,5 +122,7 @@ class JsonParserFailureModesTest : JsonTestBase() { default.decodeFromString("""{"s": ${Short.MIN_VALUE}}""", it) default.decodeFromString("""{"i": ${Int.MAX_VALUE}}""", it) default.decodeFromString("""{"i": ${Int.MIN_VALUE}}""", it) + default.decodeFromString("""{"id": ${Long.MIN_VALUE}}""", it) + default.decodeFromString("""{"id": ${Long.MAX_VALUE}}""", it) } } diff --git a/formats/json/jvmTest/src/kotlinx/serialization/SerializerForNullableJavaTypeTest.kt b/formats/json/jvmTest/src/kotlinx/serialization/SerializerForNullableJavaTypeTest.kt index dc4117ea4d..ebed6f370b 100644 --- a/formats/json/jvmTest/src/kotlinx/serialization/SerializerForNullableJavaTypeTest.kt +++ b/formats/json/jvmTest/src/kotlinx/serialization/SerializerForNullableJavaTypeTest.kt @@ -20,7 +20,7 @@ class SerializerForNullableJavaTypeTest { override fun deserialize(decoder: Decoder): Date? = when (val seconds = decoder.decodeLong()) { -1L -> null - else -> Date(seconds.toLong()) + else -> Date(seconds) } override fun serialize(encoder: Encoder, value: Date?) { From 9fd24121cee7d98418b3eb63f556b1fa13e1e247 Mon Sep 17 00:00:00 2001 From: Vsevolod Tolstopyatov Date: Thu, 18 Feb 2021 16:45:35 +0300 Subject: [PATCH 04/11] Optimize JsonReader further -- don't make conditional array lookups where they are not really necessary It gives solid 5-10% of throughpuut --- .../serialization/json/internal/JsonParser.kt | 2 +- .../serialization/json/internal/JsonReader.kt | 28 +++++++++++++++---- .../json/internal/StreamingJsonDecoder.kt | 8 +++--- 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonParser.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonParser.kt index 50d35c5613..bd44e6dd90 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonParser.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonParser.kt @@ -38,7 +38,7 @@ internal class JsonParser( } private fun readArray(): JsonElement { - var lastToken = reader.consumeNextToken(TC_BEGIN_LIST) + var lastToken = reader.consumeNextToken() // Prohibit leading comma if (reader.peekNextToken() == TC_COMMA) reader.fail("Unexpected leading comma") val result = arrayListOf() diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt index 5ffc382321..45c2863514 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt @@ -166,6 +166,7 @@ internal class JsonReader(private val source: String) { private var length = 0 // length of string private var buf = CharArray(16) // only used for strings with escapes + // TODO consider replacing usages of this method in JsonParser with char overload fun consumeNextToken(expected: Byte): Byte { val token = consumeNextToken() if (token != expected) { @@ -174,6 +175,17 @@ internal class JsonReader(private val source: String) { return token } + fun consumeNextToken(expected: Char) { + val source = source + while (currentPosition < source.length) { + val c = source[currentPosition++] + if (c == ' ' || c == '\n' || c == '\r' || c == '\t') continue + if (c == expected) return + fail(charToTokenClass(expected)) + } + fail(charToTokenClass(expected)) // EOF + } + private fun fail(expectedToken: Byte) { // We know that the token was consumed prior to this call // Slow path, never called in normal code, can avoid optimizing it @@ -288,7 +300,6 @@ internal class JsonReader(private val source: String) { } val startPosition = currentPosition - 1 var lastPosition = currentPosition - length = 0 var char = source[currentPosition] // Avoid two double checks visible in the profiler while (char != STRING) { if (char == STRING_ESC) { @@ -308,7 +319,10 @@ internal class JsonReader(private val source: String) { } else { // some escaped chars were there appendRange(source, lastPosition, currentPosition) - buf.concatToString(0, length) + val l = length + length = 0 + buf.concatToString(0, l) + } this.currentPosition = currentPosition + 1 return string @@ -325,14 +339,17 @@ internal class JsonReader(private val source: String) { * `indexOf` for escape symbol. It works almost 20% faster for both large and small JSON strings. */ fun consumeKeyString(): String { - consumeNextToken(TC_STRING) + consumeNextToken(STRING) val current = currentPosition val closingQuote = source.indexOf('"', current) if (closingQuote == -1) fail(TC_STRING) // Better error message? // TODO explain for (i in current until closingQuote) { // Encountered escape sequence, should fallback to "slow" path - if (source[i] == '\\') TODO() + if (source[i] == '\\') { + TODO() + break + } } this.currentPosition = closingQuote + 1 return source.substring(current, closingQuote) @@ -493,8 +510,7 @@ internal class JsonReader(private val source: String) { ++current hasChars = current != source.length val digit = ch - '0' - if (digit !in 0..9) - fail("Unexpected symbol '$ch' in numeric literal") + if (digit !in 0..9) fail("Unexpected symbol '$ch' in numeric literal") accumulator = accumulator * 10 - digit if (accumulator > 0) fail("Numeric value overflow") } diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt index 7d468f6d13..71a9c8d5f4 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt @@ -34,7 +34,7 @@ internal open class StreamingJsonDecoder( override fun beginStructure(descriptor: SerialDescriptor): CompositeDecoder { val newMode = json.switchMode(descriptor) - reader.consumeNextToken(newMode.beginTc) + reader.consumeNextToken(newMode.begin) checkLeadingComma() return when (newMode) { // In fact resets current index that these modes rely on @@ -52,7 +52,7 @@ internal open class StreamingJsonDecoder( } override fun endStructure(descriptor: SerialDescriptor) { - reader.consumeNextToken(mode.endTc) + reader.consumeNextToken(mode.end) } override fun decodeNotNullMark(): Boolean { @@ -86,7 +86,7 @@ internal open class StreamingJsonDecoder( hasComma = reader.tryConsumeComma() } } else { - reader.consumeNextToken(TC_COLON) + reader.consumeNextToken(COLON) } return if (reader.canConsumeValue()) { @@ -126,7 +126,7 @@ internal open class StreamingJsonDecoder( while (reader.canConsumeValue()) { // TODO: consider merging comma consumption and this check hasComma = false val key = decodeStringKey() - reader.consumeNextToken(TC_COLON) + reader.consumeNextToken(COLON) val index = descriptor.getElementIndex(key) val isUnknown = if (index != UNKNOWN_NAME) { if (configuration.coerceInputValues && coerceInputValue(descriptor, index)) { From e15cd14c91b65519db1b247218812d718d57d061 Mon Sep 17 00:00:00 2001 From: Vsevolod Tolstopyatov Date: Thu, 18 Feb 2021 16:54:22 +0300 Subject: [PATCH 05/11] Properly support escaped symbols in JSON keys --- .../serialization/json/internal/JsonReader.kt | 18 +++++++++------ .../serialization/json/JsonUnicodeTest.kt | 22 +++++++++++++++++++ 2 files changed, 33 insertions(+), 7 deletions(-) create mode 100644 formats/json/commonTest/src/kotlinx/serialization/json/JsonUnicodeTest.kt diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt index 45c2863514..31be6b64f8 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt @@ -294,10 +294,15 @@ internal class JsonReader(private val source: String) { if (consumeNextToken() != TC_STRING) { failBeginningOfTheString() } - var currentPosition = currentPosition + val currentPosition = currentPosition if (currentPosition >= source.length) { fail("EOF", currentPosition) } + return consumeString(currentPosition) + } + + private fun consumeString(position: Int): String { + var currentPosition = position val startPosition = currentPosition - 1 var lastPosition = currentPosition var char = source[currentPosition] // Avoid two double checks visible in the profiler @@ -342,13 +347,12 @@ internal class JsonReader(private val source: String) { consumeNextToken(STRING) val current = currentPosition val closingQuote = source.indexOf('"', current) - if (closingQuote == -1) fail(TC_STRING) // Better error message? - // TODO explain + if (closingQuote == -1) fail(TC_STRING) for (i in current until closingQuote) { - // Encountered escape sequence, should fallback to "slow" path - if (source[i] == '\\') { - TODO() - break + // Encountered escape sequence, should fallback to "slow" path, + // don't even try to reuse the known part of the string, this situation should almost never happen + if (source[i] == STRING_ESC) { + return consumeString(currentPosition) } } this.currentPosition = closingQuote + 1 diff --git a/formats/json/commonTest/src/kotlinx/serialization/json/JsonUnicodeTest.kt b/formats/json/commonTest/src/kotlinx/serialization/json/JsonUnicodeTest.kt new file mode 100644 index 0000000000..db325c232c --- /dev/null +++ b/formats/json/commonTest/src/kotlinx/serialization/json/JsonUnicodeTest.kt @@ -0,0 +1,22 @@ +package kotlinx.serialization.json + +import kotlinx.serialization.* +import kotlin.test.* + +class JsonUnicodeTest : JsonTestBase() { + + @Serializable + data class UnicodeKeys( + @SerialName("\uD83E\uDD14") val thinking: String, + @SerialName("🤔?") val thinking2: String, + @SerialName("\uD83E\uDD15") val bandage: String, + @SerialName("\"") val escaped: String + ) + + @Test + fun testUnicodeKeys() { + val data = UnicodeKeys("1", "2", "3", "4") + val s = """{"\uD83E\uDD14":"1","\uD83E\uDD14?":"2","\uD83E\uDD15":"3","\"":"4"}""" + assertEquals(data, Json.decodeFromString(s)) + } +} From 804c3b65bd4c5bf0a998b907fad83a393138fa34 Mon Sep 17 00:00:00 2001 From: Vsevolod Tolstopyatov Date: Thu, 18 Feb 2021 18:39:24 +0300 Subject: [PATCH 06/11] Make consumeString inliner-friendly --- .../serialization/json/internal/JsonReader.kt | 30 ++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt index 31be6b64f8..e60b6143bf 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt @@ -305,13 +305,12 @@ internal class JsonReader(private val source: String) { var currentPosition = position val startPosition = currentPosition - 1 var lastPosition = currentPosition + val source = source var char = source[currentPosition] // Avoid two double checks visible in the profiler while (char != STRING) { if (char == STRING_ESC) { - appendRange(source, lastPosition, currentPosition) - val newPosition = appendEsc(source, currentPosition + 1) - currentPosition = newPosition - lastPosition = newPosition + currentPosition = appendEscape(lastPosition, currentPosition) + lastPosition = currentPosition } else if (++currentPosition >= source.length) { fail("EOF", currentPosition) } @@ -323,16 +322,27 @@ internal class JsonReader(private val source: String) { source.substring(lastPosition, currentPosition) } else { // some escaped chars were there - appendRange(source, lastPosition, currentPosition) - val l = length - length = 0 - buf.concatToString(0, l) + decodedString(lastPosition, currentPosition) } this.currentPosition = currentPosition + 1 return string } + private fun appendEscape(lastPosition: Int, current: Int): Int { + var currentPosition1 = current + appendRange(lastPosition, currentPosition1) + currentPosition1 = appendEsc(currentPosition1 + 1) + return currentPosition1 + } + + private fun decodedString(lastPosition: Int, currentPosition: Int): String { + appendRange(lastPosition, currentPosition) + val l = length + length = 0 + return buf.concatToString(0, l) + } + private fun takePeeked(): String { return peekedString!!.also { peekedString = null } } @@ -389,7 +399,7 @@ internal class JsonReader(private val source: String) { } // initializes buf usage upon the first encountered escaped char - private fun appendRange(source: String, fromIndex: Int, toIndex: Int) { + private fun appendRange(fromIndex: Int, toIndex: Int) { val addLength = toIndex - fromIndex val oldLength = length val newLength = oldLength + addLength @@ -398,7 +408,7 @@ internal class JsonReader(private val source: String) { length += addLength } - private fun appendEsc(source: String, startPosition: Int): Int { + private fun appendEsc(startPosition: Int): Int { var currentPosition = startPosition require(currentPosition < source.length, currentPosition) { "Unexpected EOF after escape character" } val currentChar = source[currentPosition++] From 9b9799658fe9650308f301d7ce6c0639c2db05ce Mon Sep 17 00:00:00 2001 From: Vsevolod Tolstopyatov Date: Thu, 18 Feb 2021 20:40:03 +0300 Subject: [PATCH 07/11] Optimize strings handling * Replace handrolled char-array with StringBuilder, tweak it here and there * Always use optimistic path for strings * Properly handle slow-path from the middle of a string --- docs/basic-serialization.md | 3 +- .../serialization/json/internal/JsonReader.kt | 145 +++++++----------- .../serialization/json/JsonUnicodeTest.kt | 7 + guide/test/BasicSerializationTest.kt | 3 +- 4 files changed, 65 insertions(+), 93 deletions(-) diff --git a/docs/basic-serialization.md b/docs/basic-serialization.md index 737e1dedc9..1f1a93ea18 100644 --- a/docs/basic-serialization.md +++ b/docs/basic-serialization.md @@ -493,8 +493,7 @@ Even though the `language` property has a default value, it is still an error to the `null` value to it. ```text -Exception in thread "main" kotlinx.serialization.json.internal.JsonDecodingException: Unexpected JSON token at offset 52: Expected string literal but 'null' literal was found. -Use 'coerceInputValues = true' in 'Json {}` builder to coerce nulls to default values. +Exception in thread "main" kotlinx.serialization.json.internal.JsonDecodingException: Unexpected JSON token at offset 52: Expected quotation mark '"', but had 'n' instead ``` diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt index e60b6143bf..87c371959f 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt @@ -123,8 +123,7 @@ internal class JsonReader(private val source: String) { @JvmField var currentPosition: Int = 0 // position in source - - public val isDone: Boolean get() = consumeNextToken() == TC_EOF + val isDone: Boolean get() = consumeNextToken() == TC_EOF fun tryConsumeComma(): Boolean { val current = skipWhitespaces() @@ -163,8 +162,7 @@ internal class JsonReader(private val source: String) { * If the value was picked, 'consumeString' will take it without scanning the source. */ private var peekedString: String? = null - private var length = 0 // length of string - private var buf = CharArray(16) // only used for strings with escapes + private var escapedString = StringBuilder() // TODO consider replacing usages of this method in JsonParser with char overload fun consumeNextToken(expected: Byte): Byte { @@ -199,8 +197,8 @@ internal class JsonReader(private val source: String) { TC_END_LIST -> "end of the array ']'" else -> "valid token" // should never happen } - // TODO off-by-one? - fail("Expected $expected, but had '${source[currentPosition - 1]}' instead", currentPosition) + val s = if (currentPosition == source.length) "EOF" else source[currentPosition - 1].toString() + fail("Expected $expected, but had '$s' instead", currentPosition - 1) } fun peekNextToken(): Byte { @@ -275,15 +273,29 @@ internal class JsonReader(private val source: String) { return string } - private fun failBeginningOfTheString() { - // Try to guess if it's null for better error message - --currentPosition - if (!tryConsumeNotNull()) { - fail("Expected string literal but 'null' literal was found.\n$coerceInputValuesHint", currentPosition - 4) - } else { - val symbol = if (++currentPosition == source.length) "EOF" else source[currentPosition] - fail("Expected string literal but had $symbol instead") + /* + * This method is a copy of consumeString, but used for key of json objects, so there + * is no need to lookup peeked string. + */ + fun consumeKeyString(): String { + /* + * For strings we assume that escaped symbols are rather an exception, so firstly + * we optimistically scan for closing quote via intrinsified and blazing-fast 'indexOf', + * than do our pessimistic check for backslash and fallback to slow-path if necessary. + */ + consumeNextToken(STRING) + val current = currentPosition + val closingQuote = source.indexOf('"', current) + if (closingQuote == -1) fail(TC_STRING) + // Now we _optimistically_ know where the string ends (it might have been an escaped quote) + for (i in current until closingQuote) { + // Encountered escape sequence, should fallback to "slow" path and symmbolic scanning + if (source[i] == STRING_ESC) { + return consumeString(currentPosition, i) + } } + this.currentPosition = closingQuote + 1 + return source.substring(current, closingQuote) } fun consumeString(): String { @@ -291,22 +303,14 @@ internal class JsonReader(private val source: String) { return takePeeked() } - if (consumeNextToken() != TC_STRING) { - failBeginningOfTheString() - } - val currentPosition = currentPosition - if (currentPosition >= source.length) { - fail("EOF", currentPosition) - } - return consumeString(currentPosition) + return consumeKeyString() } - private fun consumeString(position: Int): String { - var currentPosition = position - val startPosition = currentPosition - 1 - var lastPosition = currentPosition + private fun consumeString(startPosition: Int, current: Int): String { + var currentPosition = current + var lastPosition = startPosition val source = source - var char = source[currentPosition] // Avoid two double checks visible in the profiler + var char = source[currentPosition] // Avoid two range checks visible in the profiler while (char != STRING) { if (char == STRING_ESC) { currentPosition = appendEscape(lastPosition, currentPosition) @@ -317,58 +321,33 @@ internal class JsonReader(private val source: String) { char = source[currentPosition] } - val string = if (lastPosition == startPosition + 1) { + val string = if (lastPosition == startPosition) { // there was no escaped chars source.substring(lastPosition, currentPosition) } else { // some escaped chars were there decodedString(lastPosition, currentPosition) - } this.currentPosition = currentPosition + 1 return string } private fun appendEscape(lastPosition: Int, current: Int): Int { - var currentPosition1 = current - appendRange(lastPosition, currentPosition1) - currentPosition1 = appendEsc(currentPosition1 + 1) - return currentPosition1 + escapedString.append(source, lastPosition, current) + return appendEsc(current + 1) } private fun decodedString(lastPosition: Int, currentPosition: Int): String { appendRange(lastPosition, currentPosition) - val l = length - length = 0 - return buf.concatToString(0, l) + val result = escapedString.toString() + escapedString.setLength(0) + return result } private fun takePeeked(): String { return peekedString!!.also { peekedString = null } } - /* - * This method is a copy of consumeString, but used for key of json objects. - * For them we know that escape symbols are _very_ unlikely and can optimistically do - * quotation lookup via `indexOf` (which is a vectorized intrinsic), then substring and - * `indexOf` for escape symbol. It works almost 20% faster for both large and small JSON strings. - */ - fun consumeKeyString(): String { - consumeNextToken(STRING) - val current = currentPosition - val closingQuote = source.indexOf('"', current) - if (closingQuote == -1) fail(TC_STRING) - for (i in current until closingQuote) { - // Encountered escape sequence, should fallback to "slow" path, - // don't even try to reuse the known part of the string, this situation should almost never happen - if (source[i] == STRING_ESC) { - return consumeString(currentPosition) - } - } - this.currentPosition = closingQuote + 1 - return source.substring(current, closingQuote) - } - // Allows to consume unquoted string fun consumeStringLenient(): String { if (peekedString != null) { @@ -393,44 +372,42 @@ internal class JsonReader(private val source: String) { return result } - private fun append(ch: Char) { - if (length >= buf.size) buf = buf.copyOf(2 * buf.size) - buf[length++] = ch - } - // initializes buf usage upon the first encountered escaped char private fun appendRange(fromIndex: Int, toIndex: Int) { - val addLength = toIndex - fromIndex - val oldLength = length - val newLength = oldLength + addLength - if (newLength > buf.size) buf = buf.copyOf(newLength.coerceAtLeast(2 * buf.size)) - for (i in 0 until addLength) buf[oldLength + i] = source[fromIndex + i] - length += addLength + escapedString.append(source, fromIndex, toIndex) } private fun appendEsc(startPosition: Int): Int { var currentPosition = startPosition - require(currentPosition < source.length, currentPosition) { "Unexpected EOF after escape character" } val currentChar = source[currentPosition++] if (currentChar == UNICODE_ESC) { return appendHex(source, currentPosition) } val c = escapeToChar(currentChar.toInt()) - require(c != INVALID, currentPosition) { "Invalid escaped char '$currentChar'" } - append(c) + if (c == INVALID) fail("Invalid escaped char '$currentChar'") + escapedString.append(c) return currentPosition } private fun appendHex(source: String, startPos: Int): Int { - var curPos = startPos - append( - ((fromHexChar(source, curPos++) shl 12) + - (fromHexChar(source, curPos++) shl 8) + - (fromHexChar(source, curPos++) shl 4) + - fromHexChar(source, curPos++)).toChar() + if (startPos + 4 >= source.length) fail("Unexpected EOF during unicode escape") + escapedString.append( + ((fromHexChar(source, startPos) shl 12) + + (fromHexChar(source, startPos + 1) shl 8) + + (fromHexChar(source, startPos + 2) shl 4) + + fromHexChar(source, startPos + 3)).toChar() ) - return curPos + return startPos + 4 + } + + private fun fromHexChar(source: String, currentPosition: Int): Int { + return when (val character = source[currentPosition]) { + in '0'..'9' -> character.toInt() - '0'.toInt() + in 'a'..'f' -> character.toInt() - 'a'.toInt() + 10 + in 'A'..'F' -> character.toInt() - 'A'.toInt() + 10 + else -> fail("Invalid toHexChar char '$character' in unicode escape") + } } fun skipElement() { @@ -488,16 +465,6 @@ internal class JsonReader(private val source: String) { if (!condition) fail(message(), position) } - private fun fromHexChar(source: String, currentPosition: Int): Int { - require(currentPosition < source.length, currentPosition) { "Unexpected EOF during unicode escape" } - return when (val curChar = source[currentPosition]) { - in '0'..'9' -> curChar.toInt() - '0'.toInt() - in 'a'..'f' -> curChar.toInt() - 'a'.toInt() + 10 - in 'A'..'F' -> curChar.toInt() - 'A'.toInt() + 10 - else -> fail("Invalid toHexChar char '$curChar' in unicode escape") - } - } - fun consumeNumericLiteral(): Long { var current = skipWhitespaces() val hasQuotation = if (source[current] == STRING) { diff --git a/formats/json/commonTest/src/kotlinx/serialization/json/JsonUnicodeTest.kt b/formats/json/commonTest/src/kotlinx/serialization/json/JsonUnicodeTest.kt index db325c232c..5a7ea1571b 100644 --- a/formats/json/commonTest/src/kotlinx/serialization/json/JsonUnicodeTest.kt +++ b/formats/json/commonTest/src/kotlinx/serialization/json/JsonUnicodeTest.kt @@ -19,4 +19,11 @@ class JsonUnicodeTest : JsonTestBase() { val s = """{"\uD83E\uDD14":"1","\uD83E\uDD14?":"2","\uD83E\uDD15":"3","\"":"4"}""" assertEquals(data, Json.decodeFromString(s)) } + + @Test + fun testUnicodeValues() { + val data = UnicodeKeys("\uD83E\uDD14", "\" \uD83E\uDD14", "\uD83E\uDD14", + "slow-path-in-\"-the-middle\"") + assertEquals(data, Json.decodeFromString(Json.encodeToString(data))) + } } diff --git a/guide/test/BasicSerializationTest.kt b/guide/test/BasicSerializationTest.kt index 4f8871b947..bfd02207e0 100644 --- a/guide/test/BasicSerializationTest.kt +++ b/guide/test/BasicSerializationTest.kt @@ -101,8 +101,7 @@ class BasicSerializationTest { @Test fun testExampleClasses11() { captureOutput("ExampleClasses11") { example.exampleClasses11.main() }.verifyOutputLinesStart( - "Exception in thread \"main\" kotlinx.serialization.json.internal.JsonDecodingException: Unexpected JSON token at offset 52: Expected string literal but 'null' literal was found.", - "Use 'coerceInputValues = true' in 'Json {}` builder to coerce nulls to default values." + "Exception in thread \"main\" kotlinx.serialization.json.internal.JsonDecodingException: Unexpected JSON token at offset 52: Expected quotation mark '\"', but had 'n' instead" ) } From 041052adcb01c89a7275d90277bdc99ceabbacc2 Mon Sep 17 00:00:00 2001 From: Vsevolod Tolstopyatov Date: Fri, 19 Feb 2021 17:22:31 +0300 Subject: [PATCH 08/11] Remove no longer used WriteMode fields --- .../src/kotlinx/serialization/json/internal/WriteMode.kt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/WriteMode.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/WriteMode.kt index 994addefb1..5554516fed 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/WriteMode.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/WriteMode.kt @@ -15,11 +15,6 @@ internal enum class WriteMode(@JvmField val begin: Char, @JvmField val end: Char LIST(BEGIN_LIST, END_LIST), MAP(BEGIN_OBJ, END_OBJ), POLY_OBJ(BEGIN_LIST, END_LIST); - - @JvmField - val beginTc: Byte = charToTokenClass(begin) - @JvmField - val endTc: Byte = charToTokenClass(end) } @OptIn(ExperimentalSerializationApi::class) From 446570e334cee28137299e835a8a268f0845aac2 Mon Sep 17 00:00:00 2001 From: Vsevolod Tolstopyatov Date: Fri, 26 Feb 2021 19:57:02 +0300 Subject: [PATCH 09/11] ~review fixes * Better exception messages and hints * Minor improvements * More comments --- .../json/PrimitiveValuesBenchmark.kt | 8 ---- docs/basic-serialization.md | 3 +- .../src/kotlinx/serialization/json/Json.kt | 2 +- .../json/JsonElementSerializers.kt | 2 +- .../serialization/json/internal/JsonReader.kt | 47 +++++++++++++------ .../json/internal/StreamingJsonDecoder.kt | 4 +- .../json/JsonParserFailureModesTest.kt | 12 +++++ .../serialization/json/JsonTestBase.kt | 6 +-- .../serializers/JsonNullSerializerTest.kt | 7 +++ .../serializers/JsonObjectSerializerTest.kt | 2 +- guide/test/BasicSerializationTest.kt | 3 +- 11 files changed, 63 insertions(+), 33 deletions(-) diff --git a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/PrimitiveValuesBenchmark.kt b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/PrimitiveValuesBenchmark.kt index 149b38edbe..9125a84f8b 100644 --- a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/PrimitiveValuesBenchmark.kt +++ b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/PrimitiveValuesBenchmark.kt @@ -59,11 +59,3 @@ open class PrimitiveValuesBenchmark { @Benchmark fun encodeLong(): LongHolder = Json.decodeFromString(LongHolder.serializer(), longValue) } - - -private val booleanHolder = PrimitiveValuesBenchmark.BooleanHolder(true, false, true, false, true, true, false, false) -private val booleanValue = Json.encodeToString(booleanHolder) - -fun main() { - println(Json.decodeFromString(PrimitiveValuesBenchmark.BooleanHolder.serializer(), booleanValue)) -} diff --git a/docs/basic-serialization.md b/docs/basic-serialization.md index 1f1a93ea18..737e1dedc9 100644 --- a/docs/basic-serialization.md +++ b/docs/basic-serialization.md @@ -493,7 +493,8 @@ Even though the `language` property has a default value, it is still an error to the `null` value to it. ```text -Exception in thread "main" kotlinx.serialization.json.internal.JsonDecodingException: Unexpected JSON token at offset 52: Expected quotation mark '"', but had 'n' instead +Exception in thread "main" kotlinx.serialization.json.internal.JsonDecodingException: Unexpected JSON token at offset 52: Expected string literal but 'null' literal was found. +Use 'coerceInputValues = true' in 'Json {}` builder to coerce nulls to default values. ``` diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/Json.kt b/formats/json/commonMain/src/kotlinx/serialization/json/Json.kt index 465859527e..d62a03c962 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/Json.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/Json.kt @@ -83,7 +83,7 @@ public sealed class Json(internal val configuration: JsonConf) : StringFormat { val reader = JsonReader(string) val input = StreamingJsonDecoder(this, WriteMode.OBJ, reader) val result = input.decodeSerializableValue(deserializer) - if (!reader.isDone) { error("Reader has not consumed the whole input: $reader") } + reader.expectEof() return result } /** diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/JsonElementSerializers.kt b/formats/json/commonMain/src/kotlinx/serialization/json/JsonElementSerializers.kt index dee6e0139e..23fb53af8d 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/JsonElementSerializers.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/JsonElementSerializers.kt @@ -97,7 +97,7 @@ internal object JsonNullSerializer : KSerializer { override fun deserialize(decoder: Decoder): JsonNull { verify(decoder) if (decoder.decodeNotNullMark()) { - + throw JsonDecodingException("Expected 'null' literal") } decoder.decodeNull() return JsonNull diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt index 87c371959f..8a893014ed 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt @@ -123,7 +123,12 @@ internal class JsonReader(private val source: String) { @JvmField var currentPosition: Int = 0 // position in source - val isDone: Boolean get() = consumeNextToken() == TC_EOF + + fun expectEof() { + val nextToken = consumeNextToken() + if (nextToken != TC_EOF) + fail("Expected EOF, but had ${source[currentPosition - 1]} instead") + } fun tryConsumeComma(): Boolean { val current = skipWhitespaces() @@ -139,6 +144,7 @@ internal class JsonReader(private val source: String) { var current = currentPosition while (current < source.length) { val c = source[current] + // Inlined skipWhitespaces without field spill and nested loop. Also faster then char2TokenClass if (c == ' ' || c == '\n' || c == '\r' || c == '\t') { ++current continue @@ -179,9 +185,17 @@ internal class JsonReader(private val source: String) { val c = source[currentPosition++] if (c == ' ' || c == '\n' || c == '\r' || c == '\t') continue if (c == expected) return - fail(charToTokenClass(expected)) + unexpectedToken(expected) } - fail(charToTokenClass(expected)) // EOF + unexpectedToken(expected) // EOF + } + + private fun unexpectedToken(expected: Char) { + --currentPosition // To properly handle null + if (expected == STRING && consumeStringLenient() == NULL) { + fail("Expected string literal but 'null' literal was found.\n$coerceInputValuesHint", currentPosition - 4) + } + fail(charToTokenClass(expected)) } private fun fail(expectedToken: Byte) { @@ -197,7 +211,7 @@ internal class JsonReader(private val source: String) { TC_END_LIST -> "end of the array ']'" else -> "valid token" // should never happen } - val s = if (currentPosition == source.length) "EOF" else source[currentPosition - 1].toString() + val s = if (currentPosition == source.length || currentPosition == 0) "EOF" else source[currentPosition - 1].toString() fail("Expected $expected, but had '$s' instead", currentPosition - 1) } @@ -205,13 +219,11 @@ internal class JsonReader(private val source: String) { val source = source while (currentPosition < source.length) { val ch = source[currentPosition] - return when (val tc = charToTokenClass(ch)) { - TC_WHITESPACE -> { - ++currentPosition - continue - } - else -> tc + if (ch == ' ' || ch == '\n' || ch == '\r' || ch == '\t') { + ++currentPosition + continue } + return charToTokenClass(ch) } return TC_EOF } @@ -466,14 +478,19 @@ internal class JsonReader(private val source: String) { } fun consumeNumericLiteral(): Long { + /* + * This is an optimized (~40% for numbers) version of consumeString().toLong() + * that doesn't allocate and also doesn't support any radix but 10 + */ var current = skipWhitespaces() + if (current == source.length) fail("EOF") val hasQuotation = if (source[current] == STRING) { - ++current + // Check it again + if (++current == source.length) fail("EOF") true } else { false } - if (current == source.length) fail("EOF") var accumulator = 0L var isNegative = false val start = current @@ -495,7 +512,7 @@ internal class JsonReader(private val source: String) { accumulator = accumulator * 10 - digit if (accumulator > 0) fail("Numeric value overflow") } - if (start == current) { + if (start == current || (isNegative && start == current - 1)) { fail("Expected numeric literal") } if (hasQuotation) { @@ -549,7 +566,7 @@ internal class JsonReader(private val source: String) { false } else -> { - fail("Expected valid boolean literal prefix, but had ${source[current - 1]}") + fail("Expected valid boolean literal prefix, but had '${consumeStringLenient()}'") } } } @@ -563,7 +580,7 @@ internal class JsonReader(private val source: String) { val expected = literalSuffix[i] val actual = source[current + i] if (expected.toInt() != actual.toInt() or asciiCaseMask) { - fail("Expected valid boolean literal prefix, but had ${source.substring(current - 1, current - 1 + i)}") + fail("Expected valid boolean literal prefix, but had '${consumeStringLenient()}'") } } diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt index 71a9c8d5f4..a22f0fb0e4 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt @@ -177,7 +177,7 @@ internal open class StreamingJsonDecoder( return if (configuration.isLenient) { reader.consumeBooleanLenient() } else { - return reader.consumeBoolean() + reader.consumeBoolean() } } @@ -225,7 +225,7 @@ internal open class StreamingJsonDecoder( } override fun decodeChar(): Char { - val string= reader.consumeStringLenient() + val string = reader.consumeStringLenient() if (string.length != 1) reader.fail("Expected single char, but got '$string'") return string[0] } diff --git a/formats/json/commonTest/src/kotlinx/serialization/json/JsonParserFailureModesTest.kt b/formats/json/commonTest/src/kotlinx/serialization/json/JsonParserFailureModesTest.kt index f58281e6da..06e78b8884 100644 --- a/formats/json/commonTest/src/kotlinx/serialization/json/JsonParserFailureModesTest.kt +++ b/formats/json/commonTest/src/kotlinx/serialization/json/JsonParserFailureModesTest.kt @@ -125,4 +125,16 @@ class JsonParserFailureModesTest : JsonTestBase() { default.decodeFromString("""{"id": ${Long.MIN_VALUE}}""", it) default.decodeFromString("""{"id": ${Long.MAX_VALUE}}""", it) } + + @Test + fun testInvalidNumber() = parametrizedTest { + assertFailsWith { default.decodeFromString("""{"id":-}""", it) } + assertFailsWith { default.decodeFromString("""{"id":+}""", it) } + assertFailsWith { default.decodeFromString("""{"id":--}""", it) } + assertFailsWith { default.decodeFromString("""{"id":1-1}""", it) } + assertFailsWith { default.decodeFromString("""{"id":0-1}""", it) } + assertFailsWith { default.decodeFromString("""{"id":0-}""", it) } + assertFailsWith { default.decodeFromString("""{"id":a}""", it) } + assertFailsWith { default.decodeFromString("""{"id":-a}""", it) } + } } diff --git a/formats/json/commonTest/src/kotlinx/serialization/json/JsonTestBase.kt b/formats/json/commonTest/src/kotlinx/serialization/json/JsonTestBase.kt index 64fa9d3cb9..0d6418b3be 100644 --- a/formats/json/commonTest/src/kotlinx/serialization/json/JsonTestBase.kt +++ b/formats/json/commonTest/src/kotlinx/serialization/json/JsonTestBase.kt @@ -37,10 +37,10 @@ abstract class JsonTestBase { return if (useStreaming) { decodeFromString(deserializer, source) } else { - val parser = JsonReader(source) - val input = StreamingJsonDecoder(this, WriteMode.OBJ, parser) + val reader = JsonReader(source) + val input = StreamingJsonDecoder(this, WriteMode.OBJ, reader) val tree = input.decodeJsonElement() - if (!input.reader.isDone) { error("Reader has not consumed the whole input: ${input.reader}") } + reader.expectEof() readJson(tree, deserializer) } } diff --git a/formats/json/commonTest/src/kotlinx/serialization/json/serializers/JsonNullSerializerTest.kt b/formats/json/commonTest/src/kotlinx/serialization/json/serializers/JsonNullSerializerTest.kt index 13d6a799cb..56c170b0c1 100644 --- a/formats/json/commonTest/src/kotlinx/serialization/json/serializers/JsonNullSerializerTest.kt +++ b/formats/json/commonTest/src/kotlinx/serialization/json/serializers/JsonNullSerializerTest.kt @@ -5,6 +5,7 @@ package kotlinx.serialization.json.serializers import kotlinx.serialization.json.* +import kotlinx.serialization.json.internal.* import kotlinx.serialization.test.assertStringFormAndRestored import kotlin.test.* @@ -15,6 +16,12 @@ class JsonNullSerializerTest : JsonTestBase() { assertStringFormAndRestored("{\"element\":null}", JsonNullWrapper(JsonNull), JsonNullWrapper.serializer()) } + @Test + fun testJsonNullFailure() = parametrizedTest(default) { + val t = assertFailsWith { default.decodeFromString(JsonNullWrapper.serializer(), "{\"element\":\"foo\"}", true) } + assertTrue { t.message!!.contains("'null' literal") } + } + @Test fun testJsonNullAsElement() = parametrizedTest(default) { assertStringFormAndRestored("{\"element\":null}", JsonElementWrapper(JsonNull), JsonElementWrapper.serializer()) diff --git a/formats/json/commonTest/src/kotlinx/serialization/json/serializers/JsonObjectSerializerTest.kt b/formats/json/commonTest/src/kotlinx/serialization/json/serializers/JsonObjectSerializerTest.kt index f71a19b027..21bebfa05e 100644 --- a/formats/json/commonTest/src/kotlinx/serialization/json/serializers/JsonObjectSerializerTest.kt +++ b/formats/json/commonTest/src/kotlinx/serialization/json/serializers/JsonObjectSerializerTest.kt @@ -66,7 +66,7 @@ class JsonObjectSerializerTest : JsonTestBase() { fun testInvalidObject() = parametrizedTest { useStreaming -> assertFailsWith { default.decodeFromString(JsonObjectSerializer, "{\"a\":\"b\"]", false) } assertFailsWith { default.decodeFromString(JsonObjectSerializer, "{", useStreaming) } - assertFailsWith { default.decodeFromString(JsonObjectSerializer, "{}}", useStreaming) } + assertFailsWith { default.decodeFromString(JsonObjectSerializer, "{}}", useStreaming) } assertFailsWith { default.decodeFromString(JsonObjectSerializer, "{]", useStreaming) } } diff --git a/guide/test/BasicSerializationTest.kt b/guide/test/BasicSerializationTest.kt index bfd02207e0..4f8871b947 100644 --- a/guide/test/BasicSerializationTest.kt +++ b/guide/test/BasicSerializationTest.kt @@ -101,7 +101,8 @@ class BasicSerializationTest { @Test fun testExampleClasses11() { captureOutput("ExampleClasses11") { example.exampleClasses11.main() }.verifyOutputLinesStart( - "Exception in thread \"main\" kotlinx.serialization.json.internal.JsonDecodingException: Unexpected JSON token at offset 52: Expected quotation mark '\"', but had 'n' instead" + "Exception in thread \"main\" kotlinx.serialization.json.internal.JsonDecodingException: Unexpected JSON token at offset 52: Expected string literal but 'null' literal was found.", + "Use 'coerceInputValues = true' in 'Json {}` builder to coerce nulls to default values." ) } From 18fc81caf9cb492c757b63cf1e09a37989acc6dd Mon Sep 17 00:00:00 2001 From: Vsevolod Tolstopyatov Date: Thu, 4 Mar 2021 15:17:59 +0300 Subject: [PATCH 10/11] ~comments and fixes --- core/commonMain/src/kotlinx/serialization/encoding/Decoding.kt | 3 +++ .../src/kotlinx/serialization/json/internal/JsonReader.kt | 2 +- .../serialization/json/internal/StreamingJsonDecoder.kt | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/core/commonMain/src/kotlinx/serialization/encoding/Decoding.kt b/core/commonMain/src/kotlinx/serialization/encoding/Decoding.kt index dad28e350c..7ac3ec8e4e 100644 --- a/core/commonMain/src/kotlinx/serialization/encoding/Decoding.kt +++ b/core/commonMain/src/kotlinx/serialization/encoding/Decoding.kt @@ -137,6 +137,9 @@ public interface Decoder { /** * Decodes the `null` value and returns it. + * + * It is expected that `decodeNotNullMark` was called + * prior to `decodeNull` invocation and the case when it returned `true` was handled. */ @ExperimentalSerializationApi public fun decodeNull(): Nothing? diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt index 8a893014ed..5d3abac53e 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt @@ -211,7 +211,7 @@ internal class JsonReader(private val source: String) { TC_END_LIST -> "end of the array ']'" else -> "valid token" // should never happen } - val s = if (currentPosition == source.length || currentPosition == 0) "EOF" else source[currentPosition - 1].toString() + val s = if (currentPosition == source.length || currentPosition <= 0) "EOF" else source[currentPosition - 1].toString() fail("Expected $expected, but had '$s' instead", currentPosition - 1) } diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt index a22f0fb0e4..d61f1d79bc 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt @@ -60,7 +60,7 @@ internal open class StreamingJsonDecoder( } override fun decodeNull(): Nothing? { - // Do nothing, null was consumed + // Do nothing, null was consumed by `decodeNotNullMark` return null } From 1c0c8c3a3d545641796a8be2c50208251a89f35f Mon Sep 17 00:00:00 2001 From: Vsevolod Tolstopyatov Date: Fri, 12 Mar 2021 12:10:34 +0300 Subject: [PATCH 11/11] Rename JsonReader to JsonLExer and JsonParser to JsonTreeReader --- .../src/kotlinx/serialization/json/Json.kt | 7 +- .../json/internal/JsonExceptions.kt | 2 +- .../internal/{JsonReader.kt => JsonLexer.kt} | 2 +- .../{JsonParser.kt => JsonTreeReader.kt} | 44 +++---- .../json/internal/StreamingJsonDecoder.kt | 114 +++++++++--------- .../json/internal/TreeJsonDecoder.kt | 2 +- .../serialization/json/JsonTestBase.kt | 6 +- 7 files changed, 88 insertions(+), 89 deletions(-) rename formats/json/commonMain/src/kotlinx/serialization/json/internal/{JsonReader.kt => JsonLexer.kt} (99%) rename formats/json/commonMain/src/kotlinx/serialization/json/internal/{JsonParser.kt => JsonTreeReader.kt} (58%) diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/Json.kt b/formats/json/commonMain/src/kotlinx/serialization/json/Json.kt index d62a03c962..3943804b7b 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/Json.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/Json.kt @@ -7,7 +7,6 @@ package kotlinx.serialization.json import kotlinx.serialization.* import kotlinx.serialization.json.internal.* import kotlinx.serialization.modules.* -import kotlin.js.* /** * The main entry point to work with JSON serialization. @@ -80,10 +79,10 @@ public sealed class Json(internal val configuration: JsonConf) : StringFormat { * @throws [SerializationException] if the given JSON string cannot be deserialized to the value of type [T]. */ public final override fun decodeFromString(deserializer: DeserializationStrategy, string: String): T { - val reader = JsonReader(string) - val input = StreamingJsonDecoder(this, WriteMode.OBJ, reader) + val lexer = JsonLexer(string) + val input = StreamingJsonDecoder(this, WriteMode.OBJ, lexer) val result = input.decodeSerializableValue(deserializer) - reader.expectEof() + lexer.expectEof() return result } /** diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonExceptions.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonExceptions.kt index 27e30fe3e5..5c89ba2d96 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonExceptions.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonExceptions.kt @@ -45,7 +45,7 @@ internal fun InvalidFloatingPointDecoded(value: Number, key: String, output: Str JsonDecodingException(-1, unexpectedFpErrorMessage(value, key, output)) // Extension on JSON reader and fail immediately -internal fun JsonReader.throwInvalidFloatingPointDecoded(result: Number): Nothing { +internal fun JsonLexer.throwInvalidFloatingPointDecoded(result: Number): Nothing { fail("Unexpected special floating-point value $result. By default, " + "non-finite floating point values are prohibited because they do not conform JSON specification. " + specialFlowingValuesHint diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonLexer.kt similarity index 99% rename from formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt rename to formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonLexer.kt index 5d3abac53e..a1a92e8b0c 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonLexer.kt @@ -119,7 +119,7 @@ internal fun charToTokenClass(c: Char) = if (c.toInt() < CTC_MAX) CHAR_TO_TOKEN[ internal fun escapeToChar(c: Int): Char = if (c < ESC2C_MAX) ESCAPE_2_CHAR[c] else INVALID // Streaming JSON reader -internal class JsonReader(private val source: String) { +internal class JsonLexer(private val source: String) { @JvmField var currentPosition: Int = 0 // position in source diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonParser.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonTreeReader.kt similarity index 58% rename from formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonParser.kt rename to formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonTreeReader.kt index bd44e6dd90..ca8ff1ca5a 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonParser.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonTreeReader.kt @@ -6,76 +6,76 @@ package kotlinx.serialization.json.internal import kotlinx.serialization.json.* -internal class JsonParser( +internal class JsonTreeReader( configuration: JsonConf, - private val reader: JsonReader + private val lexer: JsonLexer ) { private val isLenient = configuration.isLenient private fun readObject(): JsonElement { - var lastToken = reader.consumeNextToken(TC_BEGIN_OBJ) - if (reader.peekNextToken() == TC_COMMA) reader.fail("Unexpected leading comma") + var lastToken = lexer.consumeNextToken(TC_BEGIN_OBJ) + if (lexer.peekNextToken() == TC_COMMA) lexer.fail("Unexpected leading comma") val result = linkedMapOf() - while (reader.canConsumeValue()) { + while (lexer.canConsumeValue()) { // Read key and value - val key = if (isLenient) reader.consumeStringLenient() else reader.consumeString() - reader.consumeNextToken(TC_COLON) + val key = if (isLenient) lexer.consumeStringLenient() else lexer.consumeString() + lexer.consumeNextToken(TC_COLON) val element = read() result[key] = element // Verify the next token - lastToken = reader.consumeNextToken() + lastToken = lexer.consumeNextToken() if (lastToken != TC_COMMA && lastToken != TC_END_OBJ) { - reader.fail("Expected end of the object or comma") + lexer.fail("Expected end of the object or comma") } } // Check for the correct ending if (lastToken == TC_BEGIN_OBJ) { // Case of empty object - reader.consumeNextToken(TC_END_OBJ) + lexer.consumeNextToken(TC_END_OBJ) } else if (lastToken == TC_COMMA) { // Trailing comma - reader.fail("Unexpected trailing comma") + lexer.fail("Unexpected trailing comma") } return JsonObject(result) } private fun readArray(): JsonElement { - var lastToken = reader.consumeNextToken() + var lastToken = lexer.consumeNextToken() // Prohibit leading comma - if (reader.peekNextToken() == TC_COMMA) reader.fail("Unexpected leading comma") + if (lexer.peekNextToken() == TC_COMMA) lexer.fail("Unexpected leading comma") val result = arrayListOf() - while (reader.canConsumeValue()) { + while (lexer.canConsumeValue()) { val element = read() result.add(element) - lastToken = reader.consumeNextToken() + lastToken = lexer.consumeNextToken() if (lastToken != TC_COMMA) { - reader.require(lastToken == TC_END_LIST) { "Expected end of the array or comma" } + lexer.require(lastToken == TC_END_LIST) { "Expected end of the array or comma" } } } // Check for the correct ending if (lastToken == TC_BEGIN_LIST) { // Case of empty object - reader.consumeNextToken(TC_END_LIST) + lexer.consumeNextToken(TC_END_LIST) } else if (lastToken == TC_COMMA) { // Trailing comma - reader.fail("Unexpected trailing comma") + lexer.fail("Unexpected trailing comma") } return JsonArray(result) } private fun readValue(isString: Boolean): JsonPrimitive { val string = if (isLenient || !isString) { - reader.consumeStringLenient() + lexer.consumeStringLenient() } else { - reader.consumeString() + lexer.consumeString() } if (string == NULL) return JsonNull return JsonLiteral(string, isString) } fun read(): JsonElement { - return when (reader.peekNextToken()) { + return when (lexer.peekNextToken()) { TC_STRING -> readValue(isString = true) TC_OTHER -> readValue(isString = false) TC_BEGIN_OBJ -> readObject() TC_BEGIN_LIST -> readArray() - else -> reader.fail("Can't begin reading element, unexpected token") + else -> lexer.fail("Can't begin reading element, unexpected token") } } } diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt index d61f1d79bc..8f76f64ca7 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt @@ -13,20 +13,20 @@ import kotlinx.serialization.modules.* import kotlin.jvm.* /** - * [JsonDecoder] which reads given JSON from [JsonReader] field by field. + * [JsonDecoder] which reads given JSON from [JsonLexer] field by field. */ @OptIn(ExperimentalSerializationApi::class, ExperimentalUnsignedTypes::class) internal open class StreamingJsonDecoder( final override val json: Json, private val mode: WriteMode, - @JvmField internal val reader: JsonReader + @JvmField internal val lexer: JsonLexer ) : JsonDecoder, AbstractDecoder() { override val serializersModule: SerializersModule = json.serializersModule private var currentIndex = -1 private val configuration = json.configuration - override fun decodeJsonElement(): JsonElement = JsonParser(json.configuration, reader).read() + override fun decodeJsonElement(): JsonElement = JsonTreeReader(json.configuration, lexer).read() override fun decodeSerializableValue(deserializer: DeserializationStrategy): T { return decodeSerializableValuePolymorphic(deserializer) @@ -34,29 +34,29 @@ internal open class StreamingJsonDecoder( override fun beginStructure(descriptor: SerialDescriptor): CompositeDecoder { val newMode = json.switchMode(descriptor) - reader.consumeNextToken(newMode.begin) + lexer.consumeNextToken(newMode.begin) checkLeadingComma() return when (newMode) { // In fact resets current index that these modes rely on WriteMode.LIST, WriteMode.MAP, WriteMode.POLY_OBJ -> StreamingJsonDecoder( json, newMode, - reader + lexer ) else -> if (mode == newMode) { this } else { - StreamingJsonDecoder(json, newMode, reader) + StreamingJsonDecoder(json, newMode, lexer) } } } override fun endStructure(descriptor: SerialDescriptor) { - reader.consumeNextToken(mode.end) + lexer.consumeNextToken(mode.end) } override fun decodeNotNullMark(): Boolean { - return reader.tryConsumeNotNull() + return lexer.tryConsumeNotNull() } override fun decodeNull(): Nothing? { @@ -65,8 +65,8 @@ internal open class StreamingJsonDecoder( } private fun checkLeadingComma() { - if (reader.peekNextToken() == TC_COMMA) { - reader.fail("Unexpected leading comma") + if (lexer.peekNextToken() == TC_COMMA) { + lexer.fail("Unexpected leading comma") } } @@ -83,20 +83,20 @@ internal open class StreamingJsonDecoder( val decodingKey = currentIndex % 2 != 0 if (decodingKey) { if (currentIndex != -1) { - hasComma = reader.tryConsumeComma() + hasComma = lexer.tryConsumeComma() } } else { - reader.consumeNextToken(COLON) + lexer.consumeNextToken(COLON) } - return if (reader.canConsumeValue()) { + return if (lexer.canConsumeValue()) { if (decodingKey) { - if (currentIndex == -1) reader.require(!hasComma) { "Unexpected trailing comma" } - else reader.require(hasComma) { "Expected comma after the key-value pair" } + if (currentIndex == -1) lexer.require(!hasComma) { "Unexpected trailing comma" } + else lexer.require(hasComma) { "Expected comma after the key-value pair" } } ++currentIndex } else { - if (hasComma) reader.fail("Expected '}', but had ',' instead") + if (hasComma) lexer.fail("Expected '}', but had ',' instead") CompositeDecoder.DECODE_DONE } } @@ -106,14 +106,14 @@ internal open class StreamingJsonDecoder( */ private fun coerceInputValue(descriptor: SerialDescriptor, index: Int): Boolean { val elementDescriptor = descriptor.getElementDescriptor(index) - if (!elementDescriptor.isNullable && !reader.tryConsumeNotNull()) return true + if (!elementDescriptor.isNullable && !lexer.tryConsumeNotNull()) return true if (elementDescriptor.kind == SerialKind.ENUM) { - val enumValue = reader.peekString(configuration.isLenient) + val enumValue = lexer.peekString(configuration.isLenient) ?: return false // if value is not a string, decodeEnum() will throw correct exception val enumIndex = elementDescriptor.getElementIndex(enumValue) if (enumIndex == UNKNOWN_NAME) { // Encountered unknown enum value, have to skip it - reader.consumeString() + lexer.consumeString() return true } } @@ -122,15 +122,15 @@ internal open class StreamingJsonDecoder( private fun decodeObjectIndex(descriptor: SerialDescriptor): Int { // hasComma checks are required to properly react on trailing commas - var hasComma = reader.tryConsumeComma() - while (reader.canConsumeValue()) { // TODO: consider merging comma consumption and this check + var hasComma = lexer.tryConsumeComma() + while (lexer.canConsumeValue()) { // TODO: consider merging comma consumption and this check hasComma = false val key = decodeStringKey() - reader.consumeNextToken(COLON) + lexer.consumeNextToken(COLON) val index = descriptor.getElementIndex(key) val isUnknown = if (index != UNKNOWN_NAME) { if (configuration.coerceInputValues && coerceInputValue(descriptor, index)) { - hasComma = reader.tryConsumeComma() + hasComma = lexer.tryConsumeComma() false // Known element, but coerced } else { return index // Known element without coercing, return it @@ -143,27 +143,27 @@ internal open class StreamingJsonDecoder( hasComma = handleUnknown(key) } } - if (hasComma) reader.fail("Unexpected trailing comma") + if (hasComma) lexer.fail("Unexpected trailing comma") return CompositeDecoder.DECODE_DONE } private fun handleUnknown(key: String): Boolean { if (configuration.ignoreUnknownKeys) { - reader.skipElement() + lexer.skipElement() } else { - reader.failOnUnknownKey(key) + lexer.failOnUnknownKey(key) } - return reader.tryConsumeComma() + return lexer.tryConsumeComma() } private fun decodeListIndex(): Int { // Prohibit leading comma - val hasComma = reader.tryConsumeComma() - return if (reader.canConsumeValue()) { - if (currentIndex != -1 && !hasComma) reader.fail("Expected end of the array or comma") + val hasComma = lexer.tryConsumeComma() + return if (lexer.canConsumeValue()) { + if (currentIndex != -1 && !hasComma) lexer.fail("Expected end of the array or comma") ++currentIndex } else { - if (hasComma) reader.fail("Unexpected trailing comma") + if (hasComma) lexer.fail("Unexpected trailing comma") CompositeDecoder.DECODE_DONE } } @@ -175,9 +175,9 @@ internal open class StreamingJsonDecoder( * but allow quoted literal in relaxed mode for booleans. */ return if (configuration.isLenient) { - reader.consumeBooleanLenient() + lexer.consumeBooleanLenient() } else { - reader.consumeBoolean() + lexer.consumeBoolean() } } @@ -186,68 +186,68 @@ internal open class StreamingJsonDecoder( * to simplify integrations with third-party API. */ override fun decodeByte(): Byte { - val value = reader.consumeNumericLiteral() + val value = lexer.consumeNumericLiteral() // Check for overflow - if (value != value.toByte().toLong()) reader.fail("Failed to parse byte for input '$value'") + if (value != value.toByte().toLong()) lexer.fail("Failed to parse byte for input '$value'") return value.toByte() } override fun decodeShort(): Short { - val value = reader.consumeNumericLiteral() + val value = lexer.consumeNumericLiteral() // Check for overflow - if (value != value.toShort().toLong()) reader.fail("Failed to parse byte for input '$value'") + if (value != value.toShort().toLong()) lexer.fail("Failed to parse short for input '$value'") return value.toShort() } override fun decodeInt(): Int { - val value = reader.consumeNumericLiteral() + val value = lexer.consumeNumericLiteral() // Check for overflow - if (value != value.toInt().toLong()) reader.fail("Failed to parse byte for input '$value'") + if (value != value.toInt().toLong()) lexer.fail("Failed to parse int for input '$value'") return value.toInt() } override fun decodeLong(): Long { - return reader.consumeNumericLiteral() + return lexer.consumeNumericLiteral() } override fun decodeFloat(): Float { - val result = reader.parseString("float") { toFloat() } + val result = lexer.parseString("float") { toFloat() } val specialFp = json.configuration.allowSpecialFloatingPointValues if (specialFp || result.isFinite()) return result - reader.throwInvalidFloatingPointDecoded(result) + lexer.throwInvalidFloatingPointDecoded(result) } override fun decodeDouble(): Double { - val result = reader.parseString("double") { toDouble() } + val result = lexer.parseString("double") { toDouble() } val specialFp = json.configuration.allowSpecialFloatingPointValues if (specialFp || result.isFinite()) return result - reader.throwInvalidFloatingPointDecoded(result) + lexer.throwInvalidFloatingPointDecoded(result) } override fun decodeChar(): Char { - val string = reader.consumeStringLenient() - if (string.length != 1) reader.fail("Expected single char, but got '$string'") + val string = lexer.consumeStringLenient() + if (string.length != 1) lexer.fail("Expected single char, but got '$string'") return string[0] } private fun decodeStringKey(): String { return if (configuration.isLenient) { - reader.consumeStringLenient() + lexer.consumeStringLenient() } else { - reader.consumeKeyString() + lexer.consumeKeyString() } } override fun decodeString(): String { return if (configuration.isLenient) { - reader.consumeStringLenient() + lexer.consumeStringLenient() } else { - reader.consumeString() + lexer.consumeString() } } override fun decodeInline(inlineDescriptor: SerialDescriptor): Decoder { - return if (inlineDescriptor.isUnsignedNumber) JsonDecoderForUnsignedTypes(reader, json) else this + return if (inlineDescriptor.isUnsignedNumber) JsonDecoderForUnsignedTypes(lexer, json) else this } override fun decodeEnum(enumDescriptor: SerialDescriptor): Int { @@ -258,19 +258,19 @@ internal open class StreamingJsonDecoder( @OptIn(ExperimentalSerializationApi::class) @ExperimentalUnsignedTypes internal class JsonDecoderForUnsignedTypes( - private val reader: JsonReader, + private val lexer: JsonLexer, json: Json ) : AbstractDecoder() { override val serializersModule: SerializersModule = json.serializersModule override fun decodeElementIndex(descriptor: SerialDescriptor): Int = error("unsupported") - override fun decodeInt(): Int = reader.parseString("UInt") { toUInt().toInt() } - override fun decodeLong(): Long = reader.parseString("ULong") { toULong().toLong() } - override fun decodeByte(): Byte = reader.parseString("UByte") { toUByte().toByte() } - override fun decodeShort(): Short = reader.parseString("UShort") { toUShort().toShort() } + override fun decodeInt(): Int = lexer.parseString("UInt") { toUInt().toInt() } + override fun decodeLong(): Long = lexer.parseString("ULong") { toULong().toLong() } + override fun decodeByte(): Byte = lexer.parseString("UByte") { toUByte().toByte() } + override fun decodeShort(): Short = lexer.parseString("UShort") { toUShort().toShort() } } -private inline fun JsonReader.parseString(expectedType: String, block: String.() -> T): T { +private inline fun JsonLexer.parseString(expectedType: String, block: String.() -> T): T { val input = consumeStringLenient() try { return input.block() diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/TreeJsonDecoder.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/TreeJsonDecoder.kt index 993b9676ac..ca659017a2 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/TreeJsonDecoder.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/TreeJsonDecoder.kt @@ -158,7 +158,7 @@ private sealed class AbstractJsonTreeDecoder( @OptIn(ExperimentalUnsignedTypes::class) override fun decodeTaggedInline(tag: String, inlineDescriptor: SerialDescriptor): Decoder { - return JsonDecoderForUnsignedTypes(JsonReader(getValue(tag).content), json) + return JsonDecoderForUnsignedTypes(JsonLexer(getValue(tag).content), json) } } diff --git a/formats/json/commonTest/src/kotlinx/serialization/json/JsonTestBase.kt b/formats/json/commonTest/src/kotlinx/serialization/json/JsonTestBase.kt index 0d6418b3be..ea9a569515 100644 --- a/formats/json/commonTest/src/kotlinx/serialization/json/JsonTestBase.kt +++ b/formats/json/commonTest/src/kotlinx/serialization/json/JsonTestBase.kt @@ -37,10 +37,10 @@ abstract class JsonTestBase { return if (useStreaming) { decodeFromString(deserializer, source) } else { - val reader = JsonReader(source) - val input = StreamingJsonDecoder(this, WriteMode.OBJ, reader) + val lexer = JsonLexer(source) + val input = StreamingJsonDecoder(this, WriteMode.OBJ, lexer) val tree = input.decodeJsonElement() - reader.expectEof() + lexer.expectEof() readJson(tree, deserializer) } }