Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add managed writer for Ion 1.1 with basic round-trip testing #830

Merged
merged 3 commits into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package com.amazon.ion.impl

import com.amazon.ion.impl.bin.*
import com.amazon.ion.util.*
import java.io.OutputStream

/**
* A [_Private_FastAppendable] that buffers data to blocks of memory which are managed by a [BlockAllocator]. Only when
* [flush] is called are the blocks written to the wrapped [OutputStream].
*
* This is necessary for cases where an [IonManagedWriter_1_1] over Ion text needs to emit encoding directives that are
* not known in advance. The [OutputStreamFastAppendable] class only buffers a fixed amount of data, so it is not safe
* to use if there are system values to be written. For a sufficiently large user value, an [OutputStreamFastAppendable]
* can end up flushing partial or whole user values flushing to the [OutputStream] before the [IonManagedWriter_1_1] can
* write the system value that it depends on.
*
* Once [IonManagedWriter_1_1] supports an auto-flush feature, then this class will have very little practical
* difference from [OutputStreamFastAppendable] for the case where no system values are needed.
*
* TODO:
* - Add proper tests
*/
internal class BlockBufferingOutputStreamFastAppendable(
private val out: OutputStream,
private val allocator: BlockAllocator,
/**
* The minimum utilization of a block before a longer value
* can skip the end of a block and just start a new block.
*/
minBlockUtilization: Double = 1.0,
) : OutputStream(), _Private_FastAppendable {

init {
// 0.0 would have the possibility of wasting entire blocks.
// 0.5 is somewhat arbitrary, but at least sensible that you should use at least
// half of a block before moving on to the next block.
require(minBlockUtilization in 0.5..1.0) { "minBlockUtilization must be between 0.5 and 1" }
require(allocator.blockSize > 10)
}

private val maxBlockWaste: Int = (allocator.blockSize * (1.0 - minBlockUtilization)).toInt()

private var index = -1
private val blocks = mutableListOf<Block>()
private var current: Block = nextBlock()

private fun nextBlock(): Block {
index++
if (index < 0) throw IllegalStateException("This output stream is closed.")
if (index >= blocks.size) blocks.add(allocator.allocateBlock())
return blocks[index]
}

override fun close() {
flush()
blocks.onEach { it.close() }.clear()
index = Int.MIN_VALUE
}

override fun flush() {
blocks.forEach { block ->
out.write(block.data, 0, block.limit)
block.reset()
}
index = 0
current = blocks[index]
}

override fun write(b: Int) {
if (current.remaining() < 1) current = nextBlock()
val block = current
block.data[block.limit] = b.toByte()
block.limit++
}

override fun write(b: ByteArray, off: Int, len: Int) {
if (len > current.remaining()) {
if (current.remaining() < maxBlockWaste && len < allocator.blockSize) {
current = nextBlock()
} else {
writeBytesSlow(b, off, len)
return
}
}
val block = current
System.arraycopy(b, off, block.data, block.limit, len)
block.limit += len
}

// slow in the sense that we do all kind of block boundary checking
private fun writeBytesSlow(bytes: ByteArray, _off: Int, _len: Int) {
var off = _off
var len = _len
while (len > 0) {
val block = current
val amount = Math.min(len, block.remaining())
System.arraycopy(bytes, off, block.data, block.limit, amount)
block.limit += amount
off += amount
len -= amount
if (block.remaining() == 0) {
current = nextBlock()
}
}
}

override fun append(c: Char): Appendable = apply { if (c.code < 0x80) appendAscii(c) else appendUtf16(c) }

override fun append(csq: CharSequence): Appendable = apply { append(csq, 0, csq.length) }

override fun append(csq: CharSequence, start: Int, end: Int): Appendable {
for (i in start until end) {
append(csq[i])
}
return this
}

override fun appendAscii(c: Char) {
assert(c.code < 0x80)
write(c.code)
}

override fun appendAscii(csq: CharSequence) = appendAscii(csq, 0, csq.length)

override fun appendAscii(csq: CharSequence, start: Int, end: Int) {
if (csq is String) {
// Using deprecated String.getBytes intentionally, since it is
// correct behavior in this case, and much faster.
var pos = start
val len = end - start
if (len > current.remaining() && current.remaining() < maxBlockWaste && len < allocator.blockSize) {
current = nextBlock()
}
while (true) {
val copyAmount = minOf(current.remaining(), end - pos)
csq.copyBytes(pos, pos + copyAmount, current.data, current.limit)
current.limit += copyAmount
pos += copyAmount
if (pos >= end) return
current = nextBlock()
}
} else {
append(csq, start, end)
}
}

override fun appendUtf16(c: Char) {
assert(c.code >= 0x80)
if (current.remaining() < 3) {
current = nextBlock()
}
if (c.code < 0x800) {
current.data[current.limit++] = (0xff and (0xC0 or (c.code shr 6))).toByte()
current.data[current.limit++] = (0xff and (0x80 or (c.code and 0x3F))).toByte()
} else if (c.code < 0x10000) {
current.data[current.limit++] = (0xff and (0xE0 or (c.code shr 12))).toByte()
current.data[current.limit++] = (0xff and (0x80 or (c.code shr 6 and 0x3F))).toByte()
current.data[current.limit++] = (0xff and (0x80 or (c.code and 0x3F))).toByte()
}
}

override fun appendUtf16Surrogate(leadSurrogate: Char, trailSurrogate: Char) {
// Here we must convert a UTF-16 surrogate pair to UTF-8 bytes.
val c = _Private_IonConstants.makeUnicodeScalar(leadSurrogate.code, trailSurrogate.code)
assert(c >= 0x10000)
if (current.remaining() < 4) {
current = nextBlock()
}
current.data[current.limit++] = (0xff and (0xF0 or (c shr 18))).toByte()
current.data[current.limit++] = (0xff and (0x80 or (c shr 12 and 0x3F))).toByte()
current.data[current.limit++] = (0xff and (0x80 or (c shr 6 and 0x3F))).toByte()
current.data[current.limit++] = (0xff and (0x80 or (c and 0x3F))).toByte()
}

/** Helper function to wrap [java.lang.String.getBytes]. */
private fun String.copyBytes(srcBegin: Int, srcEnd: Int, dst: ByteArray, dstBegin: Int) {
(this as java.lang.String).getBytes(srcBegin, srcEnd, dst, dstBegin)
}
}
17 changes: 15 additions & 2 deletions src/main/java/com/amazon/ion/impl/IonRawTextWriter_1_1.kt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import java.time.Instant
* - Never writes using "long string" syntax in order to simplify the writer.
* - Uses `[: ... ]` for expression groups.
* - Does not try to resolve symbol tokens. That is the concern of the managed writer.
* - To make it easier to concatenate streams, this eagerly emits a top-level separator after each top-level syntax item.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ I think this is something that we can probably live with.

*/
class IonRawTextWriter_1_1 internal constructor(
private val options: _Private_IonTextWriterBuilder,
Expand Down Expand Up @@ -113,7 +114,12 @@ class IonRawTextWriter_1_1 internal constructor(

private inline fun closeValue(valueWriterExpression: () -> Unit) {
valueWriterExpression()
isPendingSeparator = true
if (currentContainer == Top) {
output.appendAscii(options.topLevelSeparator())
isPendingSeparator = false
} else {
isPendingSeparator = true
}
isPendingLeadingWhitespace = false
currentContainerHasValues = true
}
Expand Down Expand Up @@ -141,7 +147,8 @@ class IonRawTextWriter_1_1 internal constructor(
confirm(currentContainer == Top) { "IVM can only be written at the top level of an Ion stream." }
confirm(numAnnotations == 0) { "Cannot write an IVM with annotations" }
output.appendAscii(IVM)
isPendingSeparator = true
output.appendAscii(options.topLevelSeparator())
isPendingSeparator = false
}

override fun isInStruct(): Boolean = currentContainer == Struct
Expand Down Expand Up @@ -200,6 +207,12 @@ class IonRawTextWriter_1_1 internal constructor(
numAnnotations += annotations.size
}

override fun _private_clearAnnotations() {
numAnnotations = 0
}

override fun _private_hasFieldName(): Boolean = hasFieldName

override fun writeFieldName(sid: Int) {
confirm(currentContainer == Struct) { "Cannot write field name outside of a struct." }
confirm(!hasFieldName) { "Field name already set." }
Expand Down
16 changes: 16 additions & 0 deletions src/main/java/com/amazon/ion/impl/IonRawWriter_1_1.kt
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,17 @@ interface IonRawWriter_1_1 {
*/
fun writeIVM()

/**
* *Attempts* to reset the current annotations. This is not guaranteed to succeed, and will
* throw an [IllegalStateException] if annotations have eagerly been written to the output
* buffer.
*
* TODO: Decide if this is something that should be public. It seems advantageous for the 1.1
* writer if we reserve the ability for it to eagerly write annotations, and if we want
* to keep this behavior, then it's best _not_ to expose this method.
*/
fun _private_clearAnnotations()

/**
* Writes one annotation for the next value.
* [writeAnnotations] may be called more than once to build up a list of annotations.
Expand Down Expand Up @@ -84,6 +95,11 @@ interface IonRawWriter_1_1 {
*/
fun writeAnnotations(annotations: Array<CharSequence>)

/**
* TODO: Consider making this a public method. It's probably safe to do so.
*/
fun _private_hasFieldName(): Boolean

/**
* Writes the field name for the next value. Must be called while in a struct and must be called before [writeAnnotations].
* @throws com.amazon.ion.IonException if annotations are already written for the value or if not in a struct.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1187,7 +1187,7 @@ private static double readFloat16(ByteBuffer byteBuffer) {
int fraction = bits & FLOAT_16_FRACTION_MASK;
if (exponent == 0) {
if (fraction == 0) {
return sign == 0 ? -0e0 : 0e0;
return sign == 0 ? 0e0 : -0e0;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ I'm pretty sure that if I'm understanding it right, when the sign bit is 0, the float is positive. Either way, I needed to make this change to get the Ion 1.1 round trip tests to pass.

}
// Denormalized
throw new UnsupportedOperationException("Support for denormalized half-precision floats not yet added.");
Expand Down
20 changes: 4 additions & 16 deletions src/main/java/com/amazon/ion/impl/IonReaderTextUserX.java
Original file line number Diff line number Diff line change
@@ -1,18 +1,5 @@
/*
* Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package com.amazon.ion.impl;

import static com.amazon.ion.SystemSymbols.ION_1_0;
Expand Down Expand Up @@ -146,7 +133,8 @@ private final boolean has_next_user_value()
String version = symbolValue().getText();
if (isIonVersionMarker(version))
{
if (ION_1_0.equals(version))
// TODO: Determine if Ion 1.0 and 1.1 need separate branches here.
if (ION_1_0.equals(version) || "$ion_1_1".equals(version))
{
if (_value_keyword != IonTokenConstsX.KEYWORD_sid)
{
Expand Down
23 changes: 5 additions & 18 deletions src/main/java/com/amazon/ion/impl/LocalSymbolTable.java
Original file line number Diff line number Diff line change
@@ -1,18 +1,5 @@
/*
* Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package com.amazon.ion.impl;

import static com.amazon.ion.SystemSymbols.IMPORTS;
Expand Down Expand Up @@ -49,11 +36,11 @@
* <p>
* Instances of this class are safe for use by multiple threads.
*/
class LocalSymbolTable
public class LocalSymbolTable
implements _Private_LocalSymbolTable
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ Just visibility changes in this file.

{

static class Factory implements _Private_LocalSymbolTableFactory
public static class Factory implements _Private_LocalSymbolTableFactory
{

private Factory(){} // Should be accessed through the singleton
Expand Down Expand Up @@ -88,7 +75,7 @@ public SymbolTable newLocalSymtab(SymbolTable defaultSystemSymtab,

}

static final Factory DEFAULT_LST_FACTORY = new Factory();
public static final Factory DEFAULT_LST_FACTORY = new Factory();

/**
* The initial length of {@link #mySymbolNames}.
Expand Down
19 changes: 3 additions & 16 deletions src/main/java/com/amazon/ion/impl/bin/Block.java
Original file line number Diff line number Diff line change
@@ -1,18 +1,5 @@
/*
* Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package com.amazon.ion.impl.bin;

import java.io.Closeable;
Expand All @@ -24,7 +11,7 @@
* <p>
* This class and its implementations are <b>not</b> thread-safe.
*/
/*package*/ abstract class Block implements Closeable
public abstract class Block implements Closeable
{
/** The data backing this block. */
public final byte[] data;
Expand Down
Loading
Loading