-
Notifications
You must be signed in to change notification settings - Fork 620
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added ability to read buffered huge strings in custom KSerializers (#…
…2012) Added stream-friendly version of decodeString for new ChunkedDecoder interface. Fixes #1987 Co-authored-by: Leonid Startsev <sandwwraith@users.noreply.github.com>
- Loading branch information
1 parent
623dcad
commit 90113a9
Showing
7 changed files
with
275 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
51 changes: 51 additions & 0 deletions
51
core/commonMain/src/kotlinx/serialization/encoding/ChunkedDecoder.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
package kotlinx.serialization.encoding | ||
|
||
import kotlinx.serialization.ExperimentalSerializationApi | ||
|
||
/** | ||
* This interface indicates that decoder supports consuming large strings by chunks via consumeChunk method. | ||
* Currently, only streaming json decoder implements this interface. | ||
* Please note that this interface is only applicable to streaming decoders. That means that it is not possible to use | ||
* some JsonTreeDecoder features like polymorphism with this interface. | ||
*/ | ||
@ExperimentalSerializationApi | ||
public interface ChunkedDecoder { | ||
/** | ||
* Method allows decoding a string value by fixed-size chunks. | ||
* Usable for handling very large strings that may not fit in memory. | ||
* Chunk size is guaranteed to not exceed 16384 chars (but it may be smaller than that). | ||
* Feeds string chunks to the provided consumer. | ||
* | ||
* @param consumeChunk - lambda function to handle string chunks | ||
* | ||
* Example usage: | ||
* ``` | ||
* @Serializable(with = LargeStringSerializer::class) | ||
* data class LargeStringData(val largeString: String) | ||
* | ||
* @Serializable | ||
* data class ClassWithLargeStringDataField(val largeStringField: LargeStringData) | ||
* | ||
* object LargeStringSerializer : KSerializer<LargeStringData> { | ||
* override val descriptor: SerialDescriptor = PrimitiveSerialDescriptor("LargeStringContent", PrimitiveKind.STRING) | ||
* | ||
* override fun deserialize(decoder: Decoder): LargeStringData { | ||
* require(decoder is ChunkedDecoder) { "Only chunked decoder supported" } | ||
* | ||
* val tmpFile = createTempFile() | ||
* val writer = FileWriter(tmpFile.toFile()).use { | ||
* decoder.decodeStringChunked { chunk -> | ||
* writer.append(chunk) | ||
* } | ||
* } | ||
* return LargeStringData("file://${tmpFile.absolutePathString()}") | ||
* } | ||
* } | ||
* ``` | ||
* | ||
* In this sample, we need to be able to handle a huge string coming from json. Instead of storing it in memory, | ||
* we offload it into a file and return the file name instead | ||
*/ | ||
@ExperimentalSerializationApi | ||
public fun decodeStringChunked(consumeChunk: (chunk: String) -> Unit) | ||
} |
74 changes: 74 additions & 0 deletions
74
formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonChunkedStringDecoderTest.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
package kotlinx.serialization.json | ||
|
||
import kotlinx.serialization.* | ||
import kotlinx.serialization.Serializable | ||
import kotlinx.serialization.descriptors.* | ||
import kotlinx.serialization.encoding.* | ||
import kotlinx.serialization.test.assertFailsWithMessage | ||
import kotlin.test.* | ||
|
||
|
||
@Serializable(with = LargeStringSerializer::class) | ||
data class LargeStringData(val largeString: String) | ||
|
||
@Serializable | ||
data class ClassWithLargeStringDataField(val largeStringField: LargeStringData) | ||
|
||
|
||
object LargeStringSerializer : KSerializer<LargeStringData> { | ||
override val descriptor: SerialDescriptor = PrimitiveSerialDescriptor("LargeStringContent", PrimitiveKind.STRING) | ||
|
||
override fun deserialize(decoder: Decoder): LargeStringData { | ||
require(decoder is ChunkedDecoder) { "Only chunked decoder supported" } | ||
|
||
val outStringBuilder = StringBuilder() | ||
|
||
decoder.decodeStringChunked { chunk -> | ||
outStringBuilder.append(chunk) | ||
} | ||
return LargeStringData(outStringBuilder.toString()) | ||
} | ||
|
||
override fun serialize(encoder: Encoder, value: LargeStringData) { | ||
encoder.encodeString(value.largeString) | ||
} | ||
} | ||
|
||
open class JsonChunkedStringDecoderTest : JsonTestBase() { | ||
|
||
@Test | ||
fun decodePlainLenientString() { | ||
val longString = "abcd".repeat(8192) // Make string more than 16k | ||
val sourceObject = ClassWithLargeStringDataField(LargeStringData(longString)) | ||
val serializedObject = "{\"largeStringField\": $longString }" | ||
val jsonWithLenientMode = Json { isLenient = true } | ||
testDecodeInAllModes(jsonWithLenientMode, serializedObject, sourceObject) | ||
} | ||
|
||
@Test | ||
fun decodePlainString() { | ||
val longStringWithEscape = "${"abcd".repeat(4096)}\"${"abcd".repeat(4096)}" // Make string more than 16k | ||
val sourceObject = ClassWithLargeStringDataField(LargeStringData(longStringWithEscape)) | ||
val serializedObject = Json.encodeToString(sourceObject) | ||
testDecodeInAllModes(Json, serializedObject, sourceObject) | ||
} | ||
|
||
private fun testDecodeInAllModes( | ||
seralizer: Json, serializedObject: String, sourceObject: ClassWithLargeStringDataField | ||
) { | ||
/* Filter out Java Streams mode in common tests. Java streams tested separately in java tests */ | ||
JsonTestingMode.values().filterNot { it == JsonTestingMode.JAVA_STREAMS }.forEach { mode -> | ||
if (mode == JsonTestingMode.TREE) { | ||
assertFailsWithMessage<IllegalArgumentException>( | ||
"Only chunked decoder supported", "Shouldn't decode JSON in TREE mode" | ||
) { | ||
seralizer.decodeFromString<ClassWithLargeStringDataField>(serializedObject, mode) | ||
} | ||
} else { | ||
val deserializedObject = | ||
seralizer.decodeFromString<ClassWithLargeStringDataField>(serializedObject, mode) | ||
assertEquals(sourceObject.largeStringField, deserializedObject.largeStringField) | ||
} | ||
} | ||
} | ||
} |
85 changes: 85 additions & 0 deletions
85
formats/json-tests/jvmTest/src/kotlinx/serialization/json/JsonChunkedBase64DecoderTest.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
package kotlinx.serialization.json | ||
|
||
import kotlinx.serialization.* | ||
import kotlinx.serialization.Serializable | ||
import kotlinx.serialization.descriptors.* | ||
import kotlinx.serialization.encoding.* | ||
import kotlinx.serialization.test.assertFailsWithMessage | ||
import org.junit.Test | ||
import java.io.* | ||
import java.util.* | ||
import kotlin.random.Random | ||
import kotlin.test.* | ||
|
||
|
||
@Serializable(with = LargeBase64StringSerializer::class) | ||
data class LargeBinaryData(val binaryData: ByteArray) { | ||
override fun equals(other: Any?): Boolean { | ||
if (this === other) return true | ||
if (javaClass != other?.javaClass) return false | ||
|
||
other as LargeBinaryData | ||
|
||
if (!binaryData.contentEquals(other.binaryData)) return false | ||
|
||
return true | ||
} | ||
|
||
override fun hashCode(): Int { | ||
return binaryData.contentHashCode() | ||
} | ||
} | ||
|
||
@Serializable | ||
data class ClassWithBinaryDataField(val binaryField: LargeBinaryData) | ||
|
||
object LargeBase64StringSerializer : KSerializer<LargeBinaryData> { | ||
private val b64Decoder: Base64.Decoder = Base64.getDecoder() | ||
override val descriptor: SerialDescriptor = PrimitiveSerialDescriptor("LargeStringContent", PrimitiveKind.STRING) | ||
|
||
override fun deserialize(decoder: Decoder): LargeBinaryData { | ||
require(decoder is ChunkedDecoder) { "Only chunked decoder supported" } | ||
|
||
var reminder = "" | ||
val decodedBytes = ByteArrayOutputStream().use { bos -> | ||
decoder.decodeStringChunked { | ||
val actualChunk = reminder + it | ||
val reminderLength = actualChunk.length % 4 | ||
val alignedLength = actualChunk.length - reminderLength | ||
val alignedChunk = actualChunk.take(alignedLength) | ||
reminder = actualChunk.takeLast(reminderLength) | ||
bos.write(b64Decoder.decode(alignedChunk)) | ||
} | ||
bos.toByteArray() | ||
} | ||
|
||
return LargeBinaryData(decodedBytes) | ||
} | ||
|
||
override fun serialize(encoder: Encoder, value: LargeBinaryData) { | ||
encoder.encodeString(Base64.getEncoder().encodeToString(value.binaryData)) | ||
} | ||
} | ||
|
||
class JsonChunkedBase64DecoderTest : JsonTestBase() { | ||
|
||
@Test | ||
fun decodeBase64String() { | ||
val sourceObject = | ||
ClassWithBinaryDataField(LargeBinaryData(Random.nextBytes(16 * 1024))) // After encoding to Base64 will be larger than 16k (JsonLexer#BATCH_SIZE) | ||
val serializedObject = Json.encodeToString(sourceObject) | ||
|
||
JsonTestingMode.values().forEach { mode -> | ||
if (mode == JsonTestingMode.TREE) { | ||
assertFailsWithMessage<IllegalArgumentException>( | ||
"Only chunked decoder supported", "Shouldn't decode JSON in TREE mode" | ||
) { | ||
Json.decodeFromString<ClassWithBinaryDataField>(serializedObject, mode) | ||
} | ||
} else { | ||
val deserializedObject = Json.decodeFromString<ClassWithBinaryDataField>(serializedObject, mode) | ||
assertEquals(sourceObject.binaryField, deserializedObject.binaryField) | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters