Skip to content

Commit

Permalink
Add managed writer for Ion 1.1 with basic round-trip testing (#830)
Browse files Browse the repository at this point in the history
  • Loading branch information
popematt authored and tgregg committed Jun 28, 2024
1 parent 39f41ec commit 85b03c1
Show file tree
Hide file tree
Showing 23 changed files with 1,164 additions and 125 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package com.amazon.ion.impl

import com.amazon.ion.impl.bin.*
import com.amazon.ion.util.*
import java.io.OutputStream

/**
* A [_Private_FastAppendable] that buffers data to blocks of memory which are managed by a [BlockAllocator]. Only when
* [flush] is called are the blocks written to the wrapped [OutputStream].
*
* This is necessary for cases where an [IonManagedWriter_1_1] over Ion text needs to emit encoding directives that are
* not known in advance. The [OutputStreamFastAppendable] class only buffers a fixed amount of data, so it is not safe
* to use if there are system values to be written. For a sufficiently large user value, an [OutputStreamFastAppendable]
* can end up flushing partial or whole user values flushing to the [OutputStream] before the [IonManagedWriter_1_1] can
* write the system value that it depends on.
*
* Once [IonManagedWriter_1_1] supports an auto-flush feature, then this class will have very little practical
* difference from [OutputStreamFastAppendable] for the case where no system values are needed.
*
* TODO:
* - Add proper tests
*/
internal class BlockBufferingOutputStreamFastAppendable(
private val out: OutputStream,
private val allocator: BlockAllocator,
/**
* The minimum utilization of a block before a longer value
* can skip the end of a block and just start a new block.
*/
minBlockUtilization: Double = 1.0,
) : OutputStream(), _Private_FastAppendable {

init {
// 0.0 would have the possibility of wasting entire blocks.
// 0.5 is somewhat arbitrary, but at least sensible that you should use at least
// half of a block before moving on to the next block.
require(minBlockUtilization in 0.5..1.0) { "minBlockUtilization must be between 0.5 and 1" }
require(allocator.blockSize > 10)
}

private val maxBlockWaste: Int = (allocator.blockSize * (1.0 - minBlockUtilization)).toInt()

private var index = -1
private val blocks = mutableListOf<Block>()
private var current: Block = nextBlock()

private fun nextBlock(): Block {
index++
if (index < 0) throw IllegalStateException("This output stream is closed.")
if (index >= blocks.size) blocks.add(allocator.allocateBlock())
return blocks[index]
}

override fun close() {
flush()
blocks.onEach { it.close() }.clear()
index = Int.MIN_VALUE
}

override fun flush() {
blocks.forEach { block ->
out.write(block.data, 0, block.limit)
block.reset()
}
index = 0
current = blocks[index]
}

override fun write(b: Int) {
if (current.remaining() < 1) current = nextBlock()
val block = current
block.data[block.limit] = b.toByte()
block.limit++
}

override fun write(b: ByteArray, off: Int, len: Int) {
if (len > current.remaining()) {
if (current.remaining() < maxBlockWaste && len < allocator.blockSize) {
current = nextBlock()
} else {
writeBytesSlow(b, off, len)
return
}
}
val block = current
System.arraycopy(b, off, block.data, block.limit, len)
block.limit += len
}

// slow in the sense that we do all kind of block boundary checking
private fun writeBytesSlow(bytes: ByteArray, _off: Int, _len: Int) {
var off = _off
var len = _len
while (len > 0) {
val block = current
val amount = Math.min(len, block.remaining())
System.arraycopy(bytes, off, block.data, block.limit, amount)
block.limit += amount
off += amount
len -= amount
if (block.remaining() == 0) {
current = nextBlock()
}
}
}

override fun append(c: Char): Appendable = apply { if (c.code < 0x80) appendAscii(c) else appendUtf16(c) }

override fun append(csq: CharSequence): Appendable = apply { append(csq, 0, csq.length) }

override fun append(csq: CharSequence, start: Int, end: Int): Appendable {
for (i in start until end) {
append(csq[i])
}
return this
}

override fun appendAscii(c: Char) {
assert(c.code < 0x80)
write(c.code)
}

override fun appendAscii(csq: CharSequence) = appendAscii(csq, 0, csq.length)

override fun appendAscii(csq: CharSequence, start: Int, end: Int) {
if (csq is String) {
// Using deprecated String.getBytes intentionally, since it is
// correct behavior in this case, and much faster.
var pos = start
val len = end - start
if (len > current.remaining() && current.remaining() < maxBlockWaste && len < allocator.blockSize) {
current = nextBlock()
}
while (true) {
val copyAmount = minOf(current.remaining(), end - pos)
csq.copyBytes(pos, pos + copyAmount, current.data, current.limit)
current.limit += copyAmount
pos += copyAmount
if (pos >= end) return
current = nextBlock()
}
} else {
append(csq, start, end)
}
}

override fun appendUtf16(c: Char) {
assert(c.code >= 0x80)
if (current.remaining() < 3) {
current = nextBlock()
}
if (c.code < 0x800) {
current.data[current.limit++] = (0xff and (0xC0 or (c.code shr 6))).toByte()
current.data[current.limit++] = (0xff and (0x80 or (c.code and 0x3F))).toByte()
} else if (c.code < 0x10000) {
current.data[current.limit++] = (0xff and (0xE0 or (c.code shr 12))).toByte()
current.data[current.limit++] = (0xff and (0x80 or (c.code shr 6 and 0x3F))).toByte()
current.data[current.limit++] = (0xff and (0x80 or (c.code and 0x3F))).toByte()
}
}

override fun appendUtf16Surrogate(leadSurrogate: Char, trailSurrogate: Char) {
// Here we must convert a UTF-16 surrogate pair to UTF-8 bytes.
val c = _Private_IonConstants.makeUnicodeScalar(leadSurrogate.code, trailSurrogate.code)
assert(c >= 0x10000)
if (current.remaining() < 4) {
current = nextBlock()
}
current.data[current.limit++] = (0xff and (0xF0 or (c shr 18))).toByte()
current.data[current.limit++] = (0xff and (0x80 or (c shr 12 and 0x3F))).toByte()
current.data[current.limit++] = (0xff and (0x80 or (c shr 6 and 0x3F))).toByte()
current.data[current.limit++] = (0xff and (0x80 or (c and 0x3F))).toByte()
}

/** Helper function to wrap [java.lang.String.getBytes]. */
private fun String.copyBytes(srcBegin: Int, srcEnd: Int, dst: ByteArray, dstBegin: Int) {
(this as java.lang.String).getBytes(srcBegin, srcEnd, dst, dstBegin)
}
}
17 changes: 15 additions & 2 deletions src/main/java/com/amazon/ion/impl/IonRawTextWriter_1_1.kt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import java.time.Instant
* - Never writes using "long string" syntax in order to simplify the writer.
* - Uses `[: ... ]` for expression groups.
* - Does not try to resolve symbol tokens. That is the concern of the managed writer.
* - To make it easier to concatenate streams, this eagerly emits a top-level separator after each top-level syntax item.
*/
class IonRawTextWriter_1_1 internal constructor(
private val options: _Private_IonTextWriterBuilder,
Expand Down Expand Up @@ -113,7 +114,12 @@ class IonRawTextWriter_1_1 internal constructor(

private inline fun closeValue(valueWriterExpression: () -> Unit) {
valueWriterExpression()
isPendingSeparator = true
if (currentContainer == Top) {
output.appendAscii(options.topLevelSeparator())
isPendingSeparator = false
} else {
isPendingSeparator = true
}
isPendingLeadingWhitespace = false
currentContainerHasValues = true
}
Expand Down Expand Up @@ -141,7 +147,8 @@ class IonRawTextWriter_1_1 internal constructor(
confirm(currentContainer == Top) { "IVM can only be written at the top level of an Ion stream." }
confirm(numAnnotations == 0) { "Cannot write an IVM with annotations" }
output.appendAscii(IVM)
isPendingSeparator = true
output.appendAscii(options.topLevelSeparator())
isPendingSeparator = false
}

override fun isInStruct(): Boolean = currentContainer == Struct
Expand Down Expand Up @@ -200,6 +207,12 @@ class IonRawTextWriter_1_1 internal constructor(
numAnnotations += annotations.size
}

override fun _private_clearAnnotations() {
numAnnotations = 0
}

override fun _private_hasFieldName(): Boolean = hasFieldName

override fun writeFieldName(sid: Int) {
confirm(currentContainer == Struct) { "Cannot write field name outside of a struct." }
confirm(!hasFieldName) { "Field name already set." }
Expand Down
16 changes: 16 additions & 0 deletions src/main/java/com/amazon/ion/impl/IonRawWriter_1_1.kt
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,17 @@ interface IonRawWriter_1_1 {
*/
fun writeIVM()

/**
* *Attempts* to reset the current annotations. This is not guaranteed to succeed, and will
* throw an [IllegalStateException] if annotations have eagerly been written to the output
* buffer.
*
* TODO: Decide if this is something that should be public. It seems advantageous for the 1.1
* writer if we reserve the ability for it to eagerly write annotations, and if we want
* to keep this behavior, then it's best _not_ to expose this method.
*/
fun _private_clearAnnotations()

/**
* Writes one annotation for the next value.
* [writeAnnotations] may be called more than once to build up a list of annotations.
Expand Down Expand Up @@ -84,6 +95,11 @@ interface IonRawWriter_1_1 {
*/
fun writeAnnotations(annotations: Array<CharSequence>)

/**
* TODO: Consider making this a public method. It's probably safe to do so.
*/
fun _private_hasFieldName(): Boolean

/**
* Writes the field name for the next value. Must be called while in a struct and must be called before [writeAnnotations].
* @throws com.amazon.ion.IonException if annotations are already written for the value or if not in a struct.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1187,7 +1187,7 @@ private static double readFloat16(ByteBuffer byteBuffer) {
int fraction = bits & FLOAT_16_FRACTION_MASK;
if (exponent == 0) {
if (fraction == 0) {
return sign == 0 ? -0e0 : 0e0;
return sign == 0 ? 0e0 : -0e0;
}
// Denormalized
throw new UnsupportedOperationException("Support for denormalized half-precision floats not yet added.");
Expand Down
20 changes: 4 additions & 16 deletions src/main/java/com/amazon/ion/impl/IonReaderTextUserX.java
Original file line number Diff line number Diff line change
@@ -1,18 +1,5 @@
/*
* Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package com.amazon.ion.impl;

import static com.amazon.ion.SystemSymbols.ION_1_0;
Expand Down Expand Up @@ -146,7 +133,8 @@ private final boolean has_next_user_value()
String version = symbolValue().getText();
if (isIonVersionMarker(version))
{
if (ION_1_0.equals(version))
// TODO: Determine if Ion 1.0 and 1.1 need separate branches here.
if (ION_1_0.equals(version) || "$ion_1_1".equals(version))
{
if (_value_keyword != IonTokenConstsX.KEYWORD_sid)
{
Expand Down
23 changes: 5 additions & 18 deletions src/main/java/com/amazon/ion/impl/LocalSymbolTable.java
Original file line number Diff line number Diff line change
@@ -1,18 +1,5 @@
/*
* Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package com.amazon.ion.impl;

import static com.amazon.ion.SystemSymbols.IMPORTS;
Expand Down Expand Up @@ -49,11 +36,11 @@
* <p>
* Instances of this class are safe for use by multiple threads.
*/
class LocalSymbolTable
public class LocalSymbolTable
implements _Private_LocalSymbolTable
{

static class Factory implements _Private_LocalSymbolTableFactory
public static class Factory implements _Private_LocalSymbolTableFactory
{

private Factory(){} // Should be accessed through the singleton
Expand Down Expand Up @@ -88,7 +75,7 @@ public SymbolTable newLocalSymtab(SymbolTable defaultSystemSymtab,

}

static final Factory DEFAULT_LST_FACTORY = new Factory();
public static final Factory DEFAULT_LST_FACTORY = new Factory();

/**
* The initial length of {@link #mySymbolNames}.
Expand Down
19 changes: 3 additions & 16 deletions src/main/java/com/amazon/ion/impl/bin/Block.java
Original file line number Diff line number Diff line change
@@ -1,18 +1,5 @@
/*
* Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package com.amazon.ion.impl.bin;

import java.io.Closeable;
Expand All @@ -24,7 +11,7 @@
* <p>
* This class and its implementations are <b>not</b> thread-safe.
*/
/*package*/ abstract class Block implements Closeable
public abstract class Block implements Closeable
{
/** The data backing this block. */
public final byte[] data;
Expand Down
Loading

0 comments on commit 85b03c1

Please sign in to comment.