Skip to content

Commit

Permalink
Performance-friendly JsonLexer (#1635)
Browse files Browse the repository at this point in the history
* Performance-friendly JsonLexer
  • Loading branch information
qwwdfsad authored and sandwwraith committed Sep 3, 2021
1 parent 036936d commit bb0dc8f
Show file tree
Hide file tree
Showing 11 changed files with 227 additions and 108 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ public sealed class Json(
* @throws [SerializationException] if the given JSON string cannot be deserialized to the value of type [T].
*/
public final override fun <T> decodeFromString(deserializer: DeserializationStrategy<T>, string: String): T {
val lexer = JsonLexer(string)
val lexer = StringJsonLexer(string)
val input = StreamingJsonDecoder(this, WriteMode.OBJ, lexer, deserializer.descriptor)
val result = input.decodeSerializableValue(deserializer)
lexer.expectEof()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ internal fun InvalidFloatingPointDecoded(value: Number, key: String, output: Str
JsonDecodingException(-1, unexpectedFpErrorMessage(value, key, output))

// Extension on JSON reader and fail immediately
internal fun JsonLexer.throwInvalidFloatingPointDecoded(result: Number): Nothing {
internal fun AbstractJsonLexer.throwInvalidFloatingPointDecoded(result: Number): Nothing {
fail("Unexpected special floating-point value $result. By default, " +
"non-finite floating point values are prohibited because they do not conform JSON specification. " +
specialFlowingValuesHint
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import kotlinx.serialization.json.*
@OptIn(ExperimentalSerializationApi::class)
internal class JsonTreeReader(
configuration: JsonConfiguration,
private val lexer: JsonLexer
private val lexer: AbstractJsonLexer
) {
private val isLenient = configuration.isLenient
private var stackDepth = 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ import kotlinx.serialization.modules.*
import kotlin.jvm.*

/**
* [JsonDecoder] which reads given JSON from [JsonLexer] field by field.
* [JsonDecoder] which reads given JSON from [AbstractJsonLexer] field by field.
*/
@OptIn(ExperimentalSerializationApi::class, ExperimentalUnsignedTypes::class)
internal open class StreamingJsonDecoder(
final override val json: Json,
private val mode: WriteMode,
@JvmField internal val lexer: JsonLexer,
@JvmField internal val lexer: AbstractJsonLexer,
descriptor: SerialDescriptor
) : JsonDecoder, AbstractDecoder() {

Expand Down Expand Up @@ -256,7 +256,7 @@ internal open class StreamingJsonDecoder(
@OptIn(ExperimentalSerializationApi::class)
@ExperimentalUnsignedTypes
internal class JsonDecoderForUnsignedTypes(
private val lexer: JsonLexer,
private val lexer: AbstractJsonLexer,
json: Json
) : AbstractDecoder() {
override val serializersModule: SerializersModule = json.serializersModule
Expand All @@ -268,7 +268,7 @@ internal class JsonDecoderForUnsignedTypes(
override fun decodeShort(): Short = lexer.parseString("UShort") { toUShort().toShort() }
}

private inline fun <T> JsonLexer.parseString(expectedType: String, block: String.() -> T): T {
private inline fun <T> AbstractJsonLexer.parseString(expectedType: String, block: String.() -> T): T {
val input = consumeStringLenient()
try {
return input.block()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ private sealed class AbstractJsonTreeDecoder(

@OptIn(ExperimentalUnsignedTypes::class)
override fun decodeTaggedInline(tag: String, inlineDescriptor: SerialDescriptor): Decoder =
if (inlineDescriptor.isUnsignedNumber) JsonDecoderForUnsignedTypes(JsonLexer(getPrimitiveValue(tag).content), json)
if (inlineDescriptor.isUnsignedNumber) JsonDecoderForUnsignedTypes(StringJsonLexer(getPrimitiveValue(tag).content), json)
else super.decodeTaggedInline(tag, inlineDescriptor)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@

package kotlinx.serialization.json.internal

import kotlinx.serialization.json.internal.*
import kotlinx.serialization.json.internal.CharMappings.CHAR_TO_TOKEN
import kotlinx.serialization.json.internal.CharMappings.ESCAPE_2_CHAR
import kotlin.jvm.JvmField
import kotlin.js.*
import kotlin.jvm.*

internal const val lenientHint = "Use 'isLenient = true' in 'Json {}` builder to accept non-compliant JSON."
internal const val coerceInputValuesHint = "Use 'coerceInputValues = true' in 'Json {}` builder to coerce nulls to default values."
Expand Down Expand Up @@ -118,60 +120,47 @@ internal fun charToTokenClass(c: Char) = if (c.code < CTC_MAX) CHAR_TO_TOKEN[c.c

internal fun escapeToChar(c: Int): Char = if (c < ESC2C_MAX) ESCAPE_2_CHAR[c] else INVALID

// Streaming JSON reader
internal open class JsonLexer(@JvmField protected var source: CharSequence) {
/**
* The base class that reads the JSON from the given char sequence source.
* It has two implementations: one over the raw [String] instance, [StringJsonLexer],
* and one over an arbitrary stream of data, [ReaderJsonLexer] (JVM-only).
*
* [AbstractJsonLexer] contains base implementation for cold or not performance-sensitive
* methods on top of [CharSequence], but [StringJsonLexer] overrides some
* of them for the performance reasons (devirtualization of [CharSequence] and avoid
* of additional spills).
*/
internal abstract class AbstractJsonLexer {

protected abstract val source: CharSequence

@JvmField
protected var currentPosition: Int = 0 // position in source

open fun ensureHaveChars() {}

fun expectEof() {
val nextToken = consumeNextToken()
if (nextToken != TC_EOF)
fail("Expected EOF, but had ${source[currentPosition - 1]} instead")
}
// Used as bound check in loops
abstract fun definitelyNotEof(position: Int): Int

// should be used inside loops instead of range checks
protected open fun definitelyNotEof(position: Int): Int = if (position < source.length) position else -1
abstract fun tryConsumeComma(): Boolean

abstract fun canConsumeValue(): Boolean

fun tryConsumeComma(): Boolean {
val current = skipWhitespaces()
if (current >= source.length || current == -1) return false
if (source[current] == ',') {
++currentPosition
return true
}
return false
}

fun canConsumeValue(): Boolean {
ensureHaveChars()
var current = currentPosition
while (true) {
current = definitelyNotEof(current)
if (current == -1) break // could be inline function but KT-1436
val c = source[current]
// Inlined skipWhitespaces without field spill and nested loop. Also faster then char2TokenClass
if (c == ' ' || c == '\n' || c == '\r' || c == '\t') {
++current
continue
}
currentPosition = current
return isValidValueStart(c)
}
currentPosition = current
return false
}
abstract fun consumeNextToken(): Byte

private fun isValidValueStart(c: Char): Boolean {
protected fun isValidValueStart(c: Char): Boolean {
return when (c) {
'}', ']', ':', ',' -> false
else -> true
}
}

fun expectEof() {
val nextToken = consumeNextToken()
if (nextToken != TC_EOF)
fail("Expected EOF, but had ${source[currentPosition - 1]} instead")
}

/*
* Peeked string for coerced enums.
* If the value was picked, 'consumeString' will take it without scanning the source.
Expand All @@ -188,7 +177,7 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
return token
}

fun consumeNextToken(expected: Char) {
open fun consumeNextToken(expected: Char) {
ensureHaveChars()
val source = source
var cpos = currentPosition
Expand All @@ -205,15 +194,15 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
unexpectedToken(expected) // EOF
}

private fun unexpectedToken(expected: Char) {
protected fun unexpectedToken(expected: Char) {
--currentPosition // To properly handle null
if (expected == STRING && consumeStringLenient() == NULL) {
fail("Expected string literal but 'null' literal was found.\n$coerceInputValuesHint", currentPosition - 4)
}
fail(charToTokenClass(expected))
}

private fun fail(expectedToken: Byte) {
protected fun fail(expectedToken: Byte) {
// We know that the token was consumed prior to this call
// Slow path, never called in normal code, can avoid optimizing it
val expected = when (expectedToken) {
Expand Down Expand Up @@ -248,26 +237,6 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
return TC_EOF
}

fun consumeNextToken(): Byte {
ensureHaveChars()
val source = source
var cpos = currentPosition
while (true) {
cpos = definitelyNotEof(cpos)
if (cpos == -1) break
val ch = source[cpos++]
return when (val tc = charToTokenClass(ch)) {
TC_WHITESPACE -> continue
else -> {
currentPosition = cpos
tc
}
}
}
currentPosition = cpos
return TC_EOF
}

/**
* Tries to consume `null` token from input.
* Returns `true` if the next 4 chars in input are not `null`,
Expand All @@ -291,7 +260,7 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
return false
}

private fun skipWhitespaces(): Int {
open fun skipWhitespaces(): Int {
var current = currentPosition
// Skip whitespaces
while (true) {
Expand Down Expand Up @@ -329,33 +298,7 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
* This method is a copy of consumeString, but used for key of json objects, so there
* is no need to lookup peeked string.
*/
fun consumeKeyString(): String {
/*
* For strings we assume that escaped symbols are rather an exception, so firstly
* we optimistically scan for closing quote via intrinsified and blazing-fast 'indexOf',
* than do our pessimistic check for backslash and fallback to slow-path if necessary.
*/
consumeNextToken(STRING)
var current = currentPosition
val closingQuote = indexOf('"', current)
if (closingQuote == -1) {
current = definitelyNotEof(current)
if (current == -1) fail(TC_STRING)
// it's also possible just to resize buffer,
// instead of falling back to slow path,
// not sure what is better
else return consumeString(currentPosition, current)
}
// Now we _optimistically_ know where the string ends (it might have been an escaped quote)
for (i in current until closingQuote) {
// Encountered escape sequence, should fallback to "slow" path and symmbolic scanning
if (source[i] == STRING_ESC) {
return consumeString(currentPosition, i)
}
}
this.currentPosition = closingQuote + 1
return substring(current, closingQuote)
}
abstract fun consumeKeyString(): String

fun consumeString(): String {
if (peekedString != null) {
Expand All @@ -365,10 +308,10 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
return consumeKeyString()
}

private fun consumeString(startPosition: Int, current: Int): String {
@JsName("consumeString2") // WA for JS issue
protected fun consumeString(source: CharSequence, startPosition: Int, current: Int): String {
var currentPosition = current
var lastPosition = startPosition
var source = source
var char = source[currentPosition] // Avoid two range checks visible in the profiler
var usedAppend = false
while (char != STRING) {
Expand All @@ -383,7 +326,6 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
currentPosition = definitelyNotEof(currentPosition)
if (currentPosition == -1)
fail("EOF", currentPosition)
source = this.source
lastPosition = currentPosition
}
char = source[currentPosition]
Expand Down Expand Up @@ -453,11 +395,13 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
if (current >= source.length) {
usedAppend = true
appendRange(currentPosition, current)
current = definitelyNotEof(current)
if (current == -1) {
val eof = definitelyNotEof(current)
if (eof == -1) {
// to handle plain lenient strings, such as top-level
currentPosition = current
return decodedString(0, 0)
} else {
current = eof
}
}
}
Expand Down Expand Up @@ -647,6 +591,7 @@ internal open class JsonLexer(@JvmField protected var source: CharSequence) {
return result
}

@JsName("consumeBoolean2") // WA for JS issue
private fun consumeBoolean(start: Int): Boolean {
/*
* In ASCII representation, upper and lower case letters are different
Expand Down
Loading

0 comments on commit bb0dc8f

Please sign in to comment.