diff --git a/src/main/java/com/amazon/ion/MacroAwareIonReader.kt b/src/main/java/com/amazon/ion/MacroAwareIonReader.kt new file mode 100644 index 000000000..b05a7b09d --- /dev/null +++ b/src/main/java/com/amazon/ion/MacroAwareIonReader.kt @@ -0,0 +1,41 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion + +import java.io.Closeable +import java.io.IOException + +/** + * An enhancement to an Ion reader that supports macro-aware transcoding. + */ +interface MacroAwareIonReader : Closeable { + + /** + * Performs a macro-aware transcode of the stream being read by this reader. + * For Ion 1.0 streams, this functions similarly to providing a system-level + * [IonReader] to [IonWriter.writeValues]. For Ion 1.1 streams, the transcoded + * stream will include the same symbol tables, encoding directives, and + * e-expression invocations as the source stream. In both cases, the + * transcoded stream will be data-model equivalent to the source stream. + * + * The following limitations should be noted: + * 1. Encoding directives with no effect on the encoding context may be + * elided from the transcoded stream. An example would be an encoding + * directive that re-exports the existing context but adds no new + * macros or new symbols. + * 2. When transcoding from text to text, comments will not be preserved. + * 3. Open content in encoding directives (e.g. macro invocations that + * expand to nothing) will not be preserved. + * 4. Granular details of the binary encoding, like inlining vs. interning + * for a particular symbol or length-prefixing vs. delimiting for a + * particular container, may not be preserved. It is up to the user + * to provide a writer configured to match these details if important. + * + * To get a [MacroAwareIonReader] use `_Private_IonReaderBuilder.buildMacroAware`. + * To get a [MacroAwareIonWriter] use [IonEncodingVersion.textWriterBuilder] or + * [IonEncodingVersion.binaryWriterBuilder]. + * @param writer the writer to which the reader's stream will be transcoded. + */ + @Throws(IOException::class) + fun transcodeTo(writer: MacroAwareIonWriter) +} diff --git a/src/main/java/com/amazon/ion/MacroAwareIonWriter.kt b/src/main/java/com/amazon/ion/MacroAwareIonWriter.kt index e6610b016..633931d01 100644 --- a/src/main/java/com/amazon/ion/MacroAwareIonWriter.kt +++ b/src/main/java/com/amazon/ion/MacroAwareIonWriter.kt @@ -13,6 +13,35 @@ import com.amazon.ion.impl.macro.* */ interface MacroAwareIonWriter : IonWriter { + /** + * Starts a new encoding segment with an Ion version marker, flushing + * the previous segment (if any) and resetting the encoding context. + */ + fun startEncodingSegmentWithIonVersionMarker() + + /** + * Starts a new encoding segment with an encoding directive, flushing + * the previous segment (if any). + * @param macros the macros added in the new segment. + * @param isMacroTableAppend true if the macros from the previous segment + * are to remain available. + * @param symbols the symbols added in the new segment. + * @param isSymbolTableAppend true if the macros from the previous + * segment are to remain available. + * @param encodingDirectiveAlreadyWritten true if the encoding directive + * that begins the new segment has already been written to this writer. + * If false, the writer will write an encoding directive consistent + * with the arguments provided to this method, using verbose + * s-expression syntax. + */ + fun startEncodingSegmentWithEncodingDirective( + macros: Map, + isMacroTableAppend: Boolean, + symbols: List, + isSymbolTableAppend: Boolean, + encodingDirectiveAlreadyWritten: Boolean + ) + /** * Starts writing a macro invocation, adding it to the macro table, if needed. */ diff --git a/src/main/java/com/amazon/ion/impl/IonCursorBinary.java b/src/main/java/com/amazon/ion/impl/IonCursorBinary.java index bd7917cad..09f76c519 100644 --- a/src/main/java/com/amazon/ion/impl/IonCursorBinary.java +++ b/src/main/java/com/amazon/ion/impl/IonCursorBinary.java @@ -1444,6 +1444,8 @@ private void uncheckedReadFieldName_1_1() { fieldSid = (int) uncheckedReadFlexSym_1_1(fieldTextMarker); } else { fieldSid = (int) uncheckedReadFlexUInt_1_1(); + fieldTextMarker.startIndex = -1; + fieldTextMarker.endIndex = fieldSid; } } } @@ -1794,6 +1796,8 @@ private boolean slowReadFieldName_1_1() { return slowReadFieldNameFlexSym_1_1(); } else { fieldSid = (int) slowReadFlexUInt_1_1(); + fieldTextMarker.startIndex = -1; + fieldTextMarker.endIndex = fieldSid; return fieldSid < 0; } } diff --git a/src/main/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinary.java b/src/main/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinary.java index 30da18c2f..1a0d6f62c 100644 --- a/src/main/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinary.java +++ b/src/main/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinary.java @@ -943,33 +943,6 @@ private enum State { // The current state. private State state = State.READING_VALUE; - /** - * @return true if current value has a sequence of annotations that begins with `$ion_symbol_table`; otherwise, - * false. - */ - boolean startsWithIonSymbolTable() { - if (minorVersion == 0 && annotationSequenceMarker.startIndex >= 0) { - long savedPeekIndex = peekIndex; - peekIndex = annotationSequenceMarker.startIndex; - int sid = readVarUInt_1_0(); - peekIndex = savedPeekIndex; - return ION_SYMBOL_TABLE_SID == sid; - } else if (minorVersion == 1) { - Marker marker = annotationTokenMarkers.get(0); - return matchesSystemSymbol_1_1(marker, SystemSymbols_1_1.ION_SYMBOL_TABLE); - } - return false; - } - - /** - * @return true if the reader is positioned on a symbol table; otherwise, false. - */ - private boolean isPositionedOnSymbolTable() { - return hasAnnotations && - super.getType() == IonType.STRUCT && - startsWithIonSymbolTable(); - } - @Override public Event nextValue() { Event event; diff --git a/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java b/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java index 8f6056262..58bc5abd1 100644 --- a/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java +++ b/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java @@ -7,7 +7,10 @@ import com.amazon.ion.IonBufferConfiguration; import com.amazon.ion.IonCursor; import com.amazon.ion.IonException; +import com.amazon.ion.IonReader; import com.amazon.ion.IonType; +import com.amazon.ion.MacroAwareIonReader; +import com.amazon.ion.MacroAwareIonWriter; import com.amazon.ion.SymbolTable; import com.amazon.ion.SymbolToken; import com.amazon.ion.Timestamp; @@ -21,6 +24,7 @@ import com.amazon.ion.impl.macro.EncodingContext; import com.amazon.ion.impl.macro.Expression; import com.amazon.ion.impl.macro.EExpressionArgsReader; +import com.amazon.ion.impl.macro.IonReaderFromReaderAdapter; import com.amazon.ion.impl.macro.Macro; import com.amazon.ion.impl.macro.MacroCompiler; import com.amazon.ion.impl.macro.MacroTable; @@ -32,6 +36,7 @@ import com.amazon.ion.impl.macro.MacroRef; import com.amazon.ion.impl.macro.SystemMacro; +import java.io.IOException; import java.io.InputStream; import java.math.BigDecimal; import java.math.BigInteger; @@ -41,13 +46,14 @@ import java.util.Arrays; import java.util.Collections; import java.util.Date; -import java.util.HashMap; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.function.Consumer; import static com.amazon.ion.SystemSymbols.ION_ENCODING; +import static com.amazon.ion.SystemSymbols.ION_SYMBOL_TABLE_SID; import static com.amazon.ion.impl.IonReaderContinuableApplicationBinary.SYMBOLS_LIST_INITIAL_CAPACITY; import static com.amazon.ion.impl.IonTypeID.SYSTEM_SYMBOL_VALUE; import static com.amazon.ion.impl.bin.Ion_1_1_Constants.*; @@ -55,7 +61,7 @@ /** * An IonCursor capable of raw parsing of binary Ion streams. */ -class IonReaderContinuableCoreBinary extends IonCursorBinary implements IonReaderContinuableCore { +class IonReaderContinuableCoreBinary extends IonCursorBinary implements IonReaderContinuableCore, MacroAwareIonReader { // The UTF-8 bytes that represent the text "$ion_encoding" for quick byte-by-byte comparisons. private static final byte[] ION_ENCODING_UTF8 = ION_ENCODING.getBytes(StandardCharsets.UTF_8); @@ -132,6 +138,9 @@ class IonReaderContinuableCoreBinary extends IonCursorBinary implements IonReade // Adapts this reader for use in code that supports multiple reader types. private final ReaderAdapter readerAdapter = new ReaderAdapterContinuable(this); + // Adapts this reader for use in code that supports IonReader. + private final IonReader asIonReader = new IonReaderFromReaderAdapter(readerAdapter); + // Reads encoding directives from the stream. private final EncodingDirectiveReader encodingDirectiveReader = new EncodingDirectiveReader(); @@ -1105,7 +1114,7 @@ boolean startsWithIonEncoding() { public String getSymbol(int sid) { // Only symbol IDs declared in Ion 1.1 encoding directives (not Ion 1.0 symbol tables) are resolved by the // core reader. In Ion 1.0, 'symbols' is never populated by the core reader. - if (sid - 1 <= localSymbolMaxOffset) { + if (sid > 0 && sid - 1 <= localSymbolMaxOffset) { return symbols[sid - 1]; } return null; @@ -1217,7 +1226,7 @@ private class EncodingDirectiveReader { boolean isSymbolTableAppend = false; boolean isMacroTableAppend = false; List newSymbols = new ArrayList<>(8); - Map newMacros = new HashMap<>(); + Map newMacros = new LinkedHashMap<>(); MacroCompiler macroCompiler = new MacroCompiler(this::resolveMacro, readerAdapter); boolean isSymbolTableAlreadyClassified = false; @@ -1490,6 +1499,10 @@ void readEncodingDirective() { state = State.COMPILING_MACRO; Macro newMacro = macroCompiler.compileMacro(); newMacros.put(MacroRef.byId(++localMacroMaxOffset), newMacro); + String macroName = macroCompiler.getMacroName(); + if (macroName != null) { + newMacros.put(MacroRef.byName(macroName), newMacro); + } state = State.IN_MACRO_TABLE_SEXP; break; default: @@ -1733,6 +1746,33 @@ protected void stepOutOfEExpression() { } } + /** + * @return true if current value has a sequence of annotations that begins with `$ion_symbol_table`; otherwise, + * false. + */ + protected boolean startsWithIonSymbolTable() { + if (minorVersion == 0 && annotationSequenceMarker.startIndex >= 0) { + long savedPeekIndex = peekIndex; + peekIndex = annotationSequenceMarker.startIndex; + int sid = readVarUInt_1_0(); + peekIndex = savedPeekIndex; + return ION_SYMBOL_TABLE_SID == sid; + } else if (minorVersion == 1) { + Marker marker = annotationTokenMarkers.get(0); + return matchesSystemSymbol_1_1(marker, SystemSymbols_1_1.ION_SYMBOL_TABLE); + } + return false; + } + + /** + * @return true if the reader is positioned on a symbol table; otherwise, false. + */ + protected boolean isPositionedOnSymbolTable() { + return hasAnnotations && + getEncodingType() == IonType.STRUCT && + startsWithIonSymbolTable(); + } + /** * Consumes the next value (if any) from the MacroEvaluator, setting `event` based on the result. * @return true if evaluation of the current invocation has completed; otherwise, false. @@ -1758,6 +1798,97 @@ private boolean evaluateNext() { return false; } + @Override + public void transcodeTo(MacroAwareIonWriter writer) throws IOException { + registerIvmNotificationConsumer((major, minor) -> { + resetEncodingContext(); + // Which IVM to write is inherent to the writer implementation. + // We don't have a single implementation that writes both formats. + writer.startEncodingSegmentWithIonVersionMarker(); + }); + while (transcodeNextTo(writer) != Event.NEEDS_DATA); + } + + /** + * Transcodes the next value, and any encoding directives that may precede it, + * to the given writer. + * @param writer the writer to which the value will be transcoded. + * @return the result of the operation. + * @throws IOException if thrown during writing. + */ + Event transcodeNextTo(MacroAwareIonWriter writer) throws IOException { + // NOTE: this method is structured very similarly to nextValue(). During performance analysis, we should + // see if the methods can be unified without sacrificing hot path performance. Performance of this method + // is not considered critical. + lobBytesRead = 0; + while (true) { + if (parent == null || state != State.READING_VALUE) { + if (state != State.READING_VALUE && state != State.COMPILING_MACRO) { + boolean isEncodingDirectiveFromEExpression = isEvaluatingEExpression; + encodingDirectiveReader.readEncodingDirective(); + if (state != State.READING_VALUE) { + throw new IonException("Unexpected EOF when writing encoding-level value."); + } + // If the encoding directive was expanded from an e-expression, that expression has already been + // written. In that case, just make sure the writer is using the new context. Otherwise, also write + // the encoding directive. + writer.startEncodingSegmentWithEncodingDirective( + encodingDirectiveReader.newMacros, + encodingDirectiveReader.isMacroTableAppend, + encodingDirectiveReader.newSymbols, + encodingDirectiveReader.isSymbolTableAppend, + isEncodingDirectiveFromEExpression + ); + } + if (isEvaluatingEExpression) { + if (evaluateNext()) { + continue; + } + } else { + event = super.nextValue(); + } + if (minorVersion == 1 && parent == null && isPositionedOnEncodingDirective()) { + encodingDirectiveReader.resetState(); + state = State.ON_ION_ENCODING_SEXP; + continue; + } + } else if (isEvaluatingEExpression) { + if (evaluateNext()) { + continue; + } + } else { + event = super.nextValue(); + } + if (valueTid != null && valueTid.isMacroInvocation) { + expressionArgsReader.beginEvaluatingMacroInvocation(macroEvaluator); + macroEvaluatorIonReader.transcodeArgumentsTo(writer); + isEvaluatingEExpression = true; + if (evaluateNext()) { + continue; + } + if (parent == null && isPositionedOnEvaluatedEncodingDirective()) { + encodingDirectiveReader.resetState(); + state = State.ON_ION_ENCODING_SEXP; + continue; + } + } + if (isEvaluatingEExpression) { + // EExpressions are not expanded and provided to the writer; only the raw encoding is transferred. + continue; + } + break; + } + if (event != Event.NEEDS_DATA) { + if (minorVersion > 0 && isPositionedOnSymbolTable()) { + // TODO finalize handling of Ion 1.0-style symbol tables in Ion 1.1: https://github.com/amazon-ion/ion-java/issues/1002 + throw new IonException("Macro-aware transcoding of Ion 1.1 data containing Ion 1.0-style symbol tables not yet supported."); + } + // The reader is now positioned on an actual encoding value. Write the value. + writer.writeValue(asIonReader); + } + return event; + } + @Override public Event nextValue() { lobBytesRead = 0; @@ -2511,7 +2642,7 @@ IntList getAnnotationSidList() { * @return a SymbolToken. */ protected SymbolToken getSymbolToken(int sid) { - return new SymbolTokenImpl(sid); + return new SymbolTokenImpl(getSymbol(sid), sid); } protected final SymbolToken getSystemSymbolToken(Marker marker) { diff --git a/src/main/java/com/amazon/ion/impl/_Private_IonReaderBuilder.java b/src/main/java/com/amazon/ion/impl/_Private_IonReaderBuilder.java index c7e56f8ab..3344b4732 100644 --- a/src/main/java/com/amazon/ion/impl/_Private_IonReaderBuilder.java +++ b/src/main/java/com/amazon/ion/impl/_Private_IonReaderBuilder.java @@ -1,6 +1,5 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 - package com.amazon.ion.impl; import com.amazon.ion.IonCatalog; @@ -8,6 +7,7 @@ import com.amazon.ion.IonReader; import com.amazon.ion.IonTextReader; import com.amazon.ion.IonValue; +import com.amazon.ion.MacroAwareIonReader; import com.amazon.ion.system.IonReaderBuilder; import com.amazon.ion.util.IonStreamUtils; @@ -352,4 +352,16 @@ public IonTextReader build(String ionText) { return makeReaderText(validateCatalog(), ionText, lstFactory); } + /** + * Creates a new {@link MacroAwareIonReader} over the given data. + * @param ionData the data to read. + * @return a new MacroAwareIonReader instance. + */ + public MacroAwareIonReader buildMacroAware(byte[] ionData) { + // TODO make this work for text too. + if (!IonStreamUtils.isIonBinary(ionData)) { + throw new UnsupportedOperationException("MacroAwareIonReader is not yet implemented for text data."); + } + return new IonReaderContinuableCoreBinary(getBufferConfiguration(), ionData, 0, ionData.length); + } } diff --git a/src/main/java/com/amazon/ion/impl/bin/IonManagedBinaryWriter.java b/src/main/java/com/amazon/ion/impl/bin/IonManagedBinaryWriter.java index 61f5cbc17..dccbd3beb 100644 --- a/src/main/java/com/amazon/ion/impl/bin/IonManagedBinaryWriter.java +++ b/src/main/java/com/amazon/ion/impl/bin/IonManagedBinaryWriter.java @@ -1,23 +1,11 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl.bin; import static com.amazon.ion.IonType.LIST; import static com.amazon.ion.IonType.STRUCT; import static com.amazon.ion.SystemSymbols.IMPORTS_SID; +import static com.amazon.ion.SystemSymbols.ION_1_0; import static com.amazon.ion.SystemSymbols.ION_1_0_MAX_ID; import static com.amazon.ion.SystemSymbols.ION_SYMBOL_TABLE_SID; import static com.amazon.ion.SystemSymbols.MAX_ID_SID; @@ -1033,8 +1021,14 @@ public void writeSymbol(String content) throws IOException writeSymbolToken(intern(content)); } - private boolean handleIVM(int sid) throws IOException { - if (user.isIVM(sid)) + private boolean handleIVM(SymbolToken symbol) throws IOException { + if (getDepth() != 0 || user.hasAnnotations()) { + return false; + } + // A symbol's text always takes precedence over its symbol ID. Only symbols with unknown text are compared + // against SID 2. + String text = symbol.getText(); + if (ION_1_0.equals(text) || (text == null && user.isIVM(symbol.getSid()))) { if (user.hasWrittenValuesSinceFinished()) { @@ -1054,7 +1048,7 @@ private boolean handleIVM(int sid) throws IOException { public void writeSymbolToken(SymbolToken token) throws IOException { - if (token != null && handleIVM(token.getSid())) + if (token != null && handleIVM(token)) { return; } diff --git a/src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt b/src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt index 1278f7dbf..8b4596a0e 100644 --- a/src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt +++ b/src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt @@ -124,6 +124,10 @@ internal class IonManagedWriter_1_1( private var macroNames = ArrayList() /** Macro definitions by user-space address, including new macros. */ private var macrosById = ArrayList() + /** The first symbol ID in the current encoding context. */ + private var firstLocalSid: Int = 0 + /** True if the current encoding context contains the system symbols. */ + private var areSystemSymbolsInScope = true /** * Transformer for symbol IDs encountered during writeValues. Can be used to upgrade Ion 1.0 symbol IDs to the @@ -146,7 +150,7 @@ internal class IonManagedWriter_1_1( sid = newSymbols[text] if (sid != null) return sid // Add to the to-be-appended symbols - sid = symbolTable.size + newSymbols.size + 1 + sid = firstLocalSid + symbolTable.size + newSymbols.size + 1 newSymbols[text] = sid return sid } @@ -244,6 +248,60 @@ internal class IonManagedWriter_1_1( return assignMacroAddress(macro) } + override fun startEncodingSegmentWithIonVersionMarker() { + if (!newSymbols.isEmpty() || !newMacros.isEmpty()) { + throw IonException("Cannot start a new encoding segment while the previous segment is active.") + } + needsIVM = false + flush() + systemData.writeIVM() + resetEncodingContext() + } + + override fun startEncodingSegmentWithEncodingDirective( + macros: Map, + isMacroTableAppend: Boolean, + symbols: List, + isSymbolTableAppend: Boolean, + encodingDirectiveAlreadyWritten: Boolean + ) { + // It is assumed that the IVM is written manually when using endEncodingSegment. + needsIVM = false + // First, flush the previous segment. This method begins a new segment. + flush() + firstLocalSid = if (isSymbolTableAppend) { + if (areSystemSymbolsInScope) SystemSymbols_1_1.size() else 0 + } else { + symbolTable.clear() + areSystemSymbolsInScope = false + 0 + } + for (symbol in symbols) { + intern(symbol) + } + if (!isMacroTableAppend) { + macroNames.clear() + macrosById.clear() + macroTable.clear() + newMacros.clear() + } + for (entry in macros.entries) { + when (entry.key) { + is MacroRef.ByName -> getOrAssignMacroAddressAndName((entry.key as MacroRef.ByName).name, entry.value) + is MacroRef.ById -> getOrAssignMacroAddress(entry.value) + } + } + if (encodingDirectiveAlreadyWritten) { + // This prevents another encoding directive from being written for this context. + symbolTable.putAll(newSymbols) + newSymbols.clear() + macroTable.putAll(newMacros) + newMacros.clear() + } else { + writeVerboseEncodingDirective() + } + } + /** Unconditionally adds a macro to the macro table data structures and returns the new address. */ private fun assignMacroAddress(macro: Macro): Int { val address = macrosById.size @@ -263,6 +321,8 @@ internal class IonManagedWriter_1_1( newMacros.clear() needsIVM = true + firstLocalSid = 0 + areSystemSymbolsInScope = true } /** Helper function for writing encoding directives */ @@ -299,7 +359,27 @@ internal class IonManagedWriter_1_1( } /** - * Writes the `(symbol_table ...)` clause into the encoding expression. + * Writes an encoding directive for the current encoding context using the verbose `$ion_encoding::(...)` syntax, + * and updates internal state accordingly. This always appends to the current encoding context. If there is nothing + * to append, calling this function is a no-op. + */ + private fun writeVerboseEncodingDirective() { + if (newSymbols.isEmpty() && newMacros.isEmpty()) return + + systemData.writeAnnotations(SystemSymbols_1_1.ION_ENCODING) + writeSystemSexp { + writeVerboseSymbolTableClause() + writeVerboseMacroTableClause() + } + symbolTable.putAll(newSymbols) + newSymbols.clear() + macroTable.putAll(newMacros) + newMacros.clear() + } + + /** + * Writes the `(symbol_table ...)` clause into the encoding expression by invoking + * the `add_symbols` or `set_symbols` system macro. * If the symbol table would be empty, writes nothing, which is equivalent * to an empty symbol table. */ @@ -321,7 +401,41 @@ internal class IonManagedWriter_1_1( } /** - * Writes the `(macro_table ...)` clause into the encoding expression. + * Writes the `(symbol_table ...)` clause into the encoding expression using the + * verbose s-expression syntax. + * If the symbol table would be empty, writes nothing, which is equivalent + * to an empty symbol table. + */ + private fun writeVerboseSymbolTableClause() { + val hasSymbolsToAdd = newSymbols.isNotEmpty() + val hasSymbolsToRetain = symbolTable.isNotEmpty() + if (!hasSymbolsToAdd && !hasSymbolsToRetain) return + + writeSystemSexp { + forceNoNewlines(true) + systemData.writeSymbol(SystemSymbols_1_1.SYMBOL_TABLE) + + // Add previous symbol table + if (hasSymbolsToRetain) { + if (newSymbols.size > 0) forceNoNewlines(false) + writeSymbol(SystemSymbols_1_1.ION_ENCODING) + } + + // Add new symbols + if (hasSymbolsToAdd) { + stepInList(usingLengthPrefix = false) + if (newSymbols.size <= MAX_SYMBOLS_IN_SINGLE_LINE_SYMBOL_TABLE) forceNoNewlines(true) + newSymbols.forEach { (text, _) -> writeString(text) } + stepOut() + } + forceNoNewlines(true) + } + systemData.forceNoNewlines(false) + } + + /** + * Writes the `(macro_table ...)` clause into the encoding expression by invoking + * the `add_macros` or `set_macros` system macro. * If the macro table would be empty, writes nothing, which is equivalent * to an empty macro table. */ @@ -352,6 +466,42 @@ internal class IonManagedWriter_1_1( systemData.forceNoNewlines(false) } + /** + * Writes the `(macro_table ...)` clause into the encoding expression using the + * verbose s-expression syntax. + * If the macro table would be empty, writes nothing, which is equivalent + * to an empty macro table. + */ + private fun writeVerboseMacroTableClause() { + val hasMacrosToAdd = newMacros.isNotEmpty() + val hasMacrosToRetain = macroTable.isNotEmpty() + if (!hasMacrosToAdd && !hasMacrosToRetain) return + + writeSystemSexp { + forceNoNewlines(true) + writeSymbol(SystemSymbols_1_1.MACRO_TABLE) + if (newMacros.size > 0) forceNoNewlines(false) + if (hasMacrosToRetain) { + writeSymbol(SystemSymbols_1_1.ION_ENCODING) + } + forceNoNewlines(false) + newMacros.forEach { (macro, address) -> + val name = macroNames[address] + when (macro) { + is TemplateMacro -> writeMacroDefinition(name, macro) + is SystemMacro -> { + if (name != macro.macroName) { + exportSystemMacro(macro, name) + } + // Else, no need to export the macro since it's already known by the desired name + } + } + } + forceNoNewlines(true) + } + systemData.forceNoNewlines(false) + } + private fun exportSystemMacro(macro: SystemMacro, alias: String?) { writeSystemSexp { forceNoNewlines(true) @@ -741,7 +891,13 @@ internal class IonManagedWriter_1_1( // TODO: Can't use reader.fieldId, reader.fieldName because it will throw UnknownSymbolException. // However, this might mean we're unnecessarily constructing `SymbolToken` instances. val fieldName = reader.fieldNameSymbol - handleSymbolToken(fieldName.sid, fieldName.text, SymbolKind.FIELD_NAME, userData, preserveEncoding = true) + // If there is no field name, it still may have been set externally, e.g. + // writer.setFieldName(...); writer.writeValue(reader); + // This occurs when serializing a sequence of Expressions, which hold field names separate from + // values. + if (fieldName != null) { + handleSymbolToken(fieldName.sid, fieldName.text, SymbolKind.FIELD_NAME, userData, preserveEncoding = true) + } } if (reader.isNullValue) { @@ -794,8 +950,11 @@ internal class IonManagedWriter_1_1( private fun IonReader.isCurrentValueAnIvm(): Boolean { if (depth != 0 || type != IonType.SYMBOL || typeAnnotationSymbols.isNotEmpty()) return false val symbol = symbolValue() ?: return false - if (symbol.sid == 2) return true - symbol.text ?: return false + if (symbol.text == null) { + // TODO FIX: Ion 1.1 system symbols can be removed from the encoding context, so an IVM may not always + // have symbol ID 2. + return symbol.sid == 2 + } return ION_VERSION_MARKER_REGEX.matches(symbol.assumeText()) } @@ -828,7 +987,8 @@ internal class IonManagedWriter_1_1( startSystemMacro(macro) } else { val address = getOrAssignMacroAddress(macro) - startMacro(null, address, macro) + // Note: macroNames[address] will be null if the macro is unnamed. + startMacro(macroNames[address], address, macro) } } diff --git a/src/main/java/com/amazon/ion/impl/macro/IonReaderFromReaderAdapter.kt b/src/main/java/com/amazon/ion/impl/macro/IonReaderFromReaderAdapter.kt new file mode 100644 index 000000000..79853e928 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/macro/IonReaderFromReaderAdapter.kt @@ -0,0 +1,85 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.macro + +import com.amazon.ion.* +import java.lang.UnsupportedOperationException +import java.math.BigDecimal +import java.math.BigInteger +import java.util.* + +/** + * An [IonReader] that delegates to a [ReaderAdapter]. + */ +internal class IonReaderFromReaderAdapter(val reader: ReaderAdapter) : IonReader { + + override fun close() { + // Do nothing. ReaderAdapter does not implement close(). + } + + override fun asFacet(facetType: Class?): T { + throw UnsupportedOperationException() + } + + override fun hasNext(): Boolean { + throw UnsupportedOperationException() + } + + override fun next(): IonType? = if (reader.nextValue()) reader.encodingType()!! else null + + override fun stringValue(): String = reader.stringValue() + + override fun intValue(): Int = reader.intValue() + + override fun bigDecimalValue(): BigDecimal = reader.decimalValue() + + override fun decimalValue(): Decimal = reader.ionDecimalValue() + + override fun dateValue(): Date = TODO("Not yet implemented") + + override fun doubleValue(): Double = reader.doubleValue() + + override fun stepIn() = reader.stepIntoContainer() + + override fun stepOut() = reader.stepOutOfContainer() + + override fun getDepth(): Int = reader.getDepth() + + override fun getSymbolTable(): SymbolTable = TODO("Not yet implemented") + + override fun getType(): IonType? = reader.encodingType() + + override fun getTypeAnnotationSymbols(): Array = reader.getTypeAnnotationSymbols().toTypedArray() + + override fun iterateTypeAnnotations(): MutableIterator = TODO("Not yet implemented") + + override fun getFieldId(): Int = TODO("Not yet implemented") + + override fun getFieldName(): String = TODO("Not yet implemented") + + override fun booleanValue(): Boolean = reader.booleanValue() + + override fun isNullValue(): Boolean = reader.isNullValue() + + override fun longValue(): Long = reader.longValue() + + override fun bigIntegerValue(): BigInteger = reader.bigIntegerValue() + + override fun timestampValue(): Timestamp = reader.timestampValue() + + override fun newBytes(): ByteArray = reader.newBytes() + + override fun getBytes(buffer: ByteArray?, offset: Int, len: Int): Int = TODO("Not yet implemented") + + override fun symbolValue(): SymbolToken = reader.symbolValue() + + override fun byteSize(): Int = TODO("Not yet implemented") + + override fun getIntegerSize(): IntegerSize = reader.getIntegerSize() + + override fun getTypeAnnotations(): Array = TODO("Not yet implemented") + + override fun getFieldNameSymbol(): SymbolToken = reader.getFieldNameSymbol() + + override fun isInStruct(): Boolean = reader.isInStruct() +} diff --git a/src/main/java/com/amazon/ion/impl/macro/MacroEvaluator.kt b/src/main/java/com/amazon/ion/impl/macro/MacroEvaluator.kt index 2a345cead..b0e167645 100644 --- a/src/main/java/com/amazon/ion/impl/macro/MacroEvaluator.kt +++ b/src/main/java/com/amazon/ion/impl/macro/MacroEvaluator.kt @@ -427,6 +427,13 @@ class MacroEvaluator { pushExpansion(ExpansionKind.Values, 0, encodingExpressions.size, Environment.EMPTY, encodingExpressions) } + /** + * Returns the e-expression argument expressions that this MacroEvaluator would evaluate. + */ + fun getArguments(): List { + return expansionStack.peek().expressions!! + } + /** * Evaluate the macro expansion until the next [DataModelExpression] can be returned. * Returns null if at the end of a container or at the end of the expansion. diff --git a/src/main/java/com/amazon/ion/impl/macro/MacroEvaluatorAsIonReader.kt b/src/main/java/com/amazon/ion/impl/macro/MacroEvaluatorAsIonReader.kt index 5c4062164..4945e5c47 100644 --- a/src/main/java/com/amazon/ion/impl/macro/MacroEvaluatorAsIonReader.kt +++ b/src/main/java/com/amazon/ion/impl/macro/MacroEvaluatorAsIonReader.kt @@ -2,14 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl.macro -import com.amazon.ion.Decimal -import com.amazon.ion.IntegerSize -import com.amazon.ion.IonReader -import com.amazon.ion.IonType -import com.amazon.ion.SymbolTable -import com.amazon.ion.SymbolToken -import com.amazon.ion.Timestamp -import com.amazon.ion.impl._Private_RecyclingStack +import com.amazon.ion.* +import com.amazon.ion.impl.* import java.math.BigDecimal import java.math.BigInteger import java.util.* @@ -66,6 +60,53 @@ class MacroEvaluatorAsIonReader( return getType() } + /** + * Transcodes the e-expression argument expressions provided to this MacroEvaluator + * without evaluation. + * @param writer the writer to which the expressions will be transcoded. + */ + fun transcodeArgumentsTo(writer: MacroAwareIonWriter) { + var index = 0 + val arguments: List = evaluator.getArguments() + val numberOfContainerEndsAtExpressionIndex = IntArray(arguments.size + 1) + + while (index < arguments.size) { + for (i in 0 until numberOfContainerEndsAtExpressionIndex[index]) { + writer.stepOut() + } + when (val argument = arguments[index]) { + is Expression.DataModelContainer -> { + if (hasAnnotations()) { + writer.setTypeAnnotationSymbols(*typeAnnotationSymbols!!) + } + writer.stepIn(argument.type) + numberOfContainerEndsAtExpressionIndex[argument.endExclusive]++ + } + is Expression.DataModelValue -> { + currentValueExpression = argument + writer.writeValue(this) + } + is Expression.FieldName -> { + queuedFieldName = argument + writer.setFieldNameSymbol(argument.value) + } + is Expression.EExpression -> { + writer.startMacro(argument.macro) + numberOfContainerEndsAtExpressionIndex[argument.endExclusive]++ + } + is Expression.ExpressionGroup -> { + writer.startExpressionGroup() + numberOfContainerEndsAtExpressionIndex[argument.endExclusive]++ + } + else -> throw IllegalStateException("Unexpected branch") + } + index++ + } + for (i in 0 until numberOfContainerEndsAtExpressionIndex[index]) { + writer.stepOut() + } + } + override fun stepIn() { // This is essentially a no-op for Lists and SExps containerStack.peek()?.currentFieldName = this.currentFieldName @@ -109,7 +150,7 @@ class MacroEvaluatorAsIonReader( ?: return Collections.emptyIterator() } - override fun isInStruct(): Boolean = TODO("Not yet implemented") + override fun isInStruct(): Boolean = containerStack.peek()?.container?.type == IonType.STRUCT override fun getFieldId(): Int = currentFieldName?.value?.sid ?: 0 override fun getFieldName(): String? = currentFieldName?.value?.text diff --git a/src/main/java/com/amazon/ion/impl/macro/ReaderAdapter.kt b/src/main/java/com/amazon/ion/impl/macro/ReaderAdapter.kt index 2b12e769b..3a0715a55 100644 --- a/src/main/java/com/amazon/ion/impl/macro/ReaderAdapter.kt +++ b/src/main/java/com/amazon/ion/impl/macro/ReaderAdapter.kt @@ -19,9 +19,11 @@ internal interface ReaderAdapter { /** Returns true if positioned on a value; false if at container or stream end. */ fun nextValue(): Boolean + fun getDepth(): Int fun stringValue(): String fun intValue(): Int fun decimalValue(): BigDecimal + fun ionDecimalValue(): Decimal fun doubleValue(): Double fun stepIntoContainer() fun stepOutOfContainer() diff --git a/src/main/java/com/amazon/ion/impl/macro/ReaderAdapterContinuable.kt b/src/main/java/com/amazon/ion/impl/macro/ReaderAdapterContinuable.kt index 61aabc47f..cece480f7 100644 --- a/src/main/java/com/amazon/ion/impl/macro/ReaderAdapterContinuable.kt +++ b/src/main/java/com/amazon/ion/impl/macro/ReaderAdapterContinuable.kt @@ -25,6 +25,8 @@ internal class ReaderAdapterContinuable(val reader: IonReaderContinuableCore) : return event != IonCursor.Event.NEEDS_DATA && event != IonCursor.Event.END_CONTAINER } + override fun getDepth(): Int = reader.depth + /** * Ensures that the value on which the reader is positioned is fully buffered. */ @@ -51,6 +53,11 @@ internal class ReaderAdapterContinuable(val reader: IonReaderContinuableCore) : return reader.decimalValue() } + override fun ionDecimalValue(): Decimal { + prepareValue() + return reader.decimalValue() + } + override fun doubleValue(): Double { prepareValue() return reader.doubleValue() diff --git a/src/main/java/com/amazon/ion/impl/macro/ReaderAdapterIonReader.kt b/src/main/java/com/amazon/ion/impl/macro/ReaderAdapterIonReader.kt index d49dfed67..91683b68a 100644 --- a/src/main/java/com/amazon/ion/impl/macro/ReaderAdapterIonReader.kt +++ b/src/main/java/com/amazon/ion/impl/macro/ReaderAdapterIonReader.kt @@ -20,12 +20,14 @@ internal class ReaderAdapterIonReader(val reader: IonReader) : ReaderAdapter { override fun encodingType(): IonType? = reader.type override fun nextValue(): Boolean = reader.next() != null + override fun getDepth(): Int = reader.depth override fun stringValue(): String = reader.stringValue() override fun intValue(): Int = reader.intValue() override fun decimalValue(): BigDecimal = reader.bigDecimalValue() + override fun ionDecimalValue(): Decimal = reader.decimalValue() override fun doubleValue(): Double = reader.doubleValue() diff --git a/src/test/java/com/amazon/ion/Ion_1_1_RoundTripTest.kt b/src/test/java/com/amazon/ion/Ion_1_1_RoundTripTest.kt index 123daf51f..52f7b5e3a 100644 --- a/src/test/java/com/amazon/ion/Ion_1_1_RoundTripTest.kt +++ b/src/test/java/com/amazon/ion/Ion_1_1_RoundTripTest.kt @@ -4,6 +4,7 @@ package com.amazon.ion import com.amazon.ion.IonEncodingVersion.* import com.amazon.ion.TestUtils.* +import com.amazon.ion.impl._Private_IonReaderBuilder import com.amazon.ion.impl._Private_IonSystem import com.amazon.ion.impl._Private_IonWriter import com.amazon.ion.impl.bin.* @@ -243,194 +244,176 @@ class Ion_1_1_RoundTripTest { override val writerFn: (OutputStream) -> IonWriter = WRITER_INTERNED_DELIMITED override val readerFn: (ByteArray) -> IonReader = READER_NON_CONTINUABLE_STREAM_16 } -} - -/** - * Base class that contains text-specific cases - */ -abstract class Ion_1_1_RoundTripTextBase : Ion_1_1_RoundTripBase() { - abstract val newWriterForAppendable: (Appendable) -> IonWriter - override val readerFn: (ByteArray) -> IonReader = IonReaderBuilder.standard()::build - - @ParameterizedTest(name = "{0}") - @MethodSource("testData") - fun testUserValuesSurviveRoundTripWrittenToAppendable(name: String, ion: ByteArray) { - val data: List = ION.loader.load(ion) - val appendable = StringBuilder() - val writer = newWriterForAppendable(appendable) - data.forEach { it.writeTo(writer) } - writer.close() - val actual = appendable.toString() - - if (DEBUG_MODE) { - println("Expected:") - ion.printDisplayString() - println("Actual:") - println(actual) - } - - assertReadersHaveEquivalentValues( - ION.newReader(ion), - ION.newReader(actual) - ) - } -} -@OptIn(ExperimentalStdlibApi::class) -abstract class Ion_1_1_RoundTripBase { + // Macro-aware Ion 1.1 transcode - abstract val writerFn: (OutputStream) -> IonWriter - abstract val readerFn: (ByteArray) -> IonReader - val systemReaderFn: (ByteArray) -> IonReader = ION::newSystemReader + @Nested + inner class BinaryMacroAwareTranscode_ReaderNonContinuableBufferDefault { - @ParameterizedTest(name = "{0}") - @MethodSource("testData") - fun testUserValuesArePreservedWhenTransferringUserValues(name: String, ion: ByteArray) { + // TODO refactor the following method into a base class and add nested inner class implementations to exercise + // all combinations of reading from [ByteArray, InputStream] in [Text, Binary], and to [Text, Binary]. + @ParameterizedTest(name = "{0}") + @MethodSource("com.amazon.ion.Ion_1_1_RoundTripTest#testData") + fun testEncodingDirectivesAndMacroInvocationsArePreservedWhenPerformingLowLevelTranscode(name: String, ion: ByteArray) { + if (!ion.isIonBinary()) { + return + } + val actual = StringBuilder() + val reader: MacroAwareIonReader = (IonReaderBuilder.standard() as _Private_IonReaderBuilder).buildMacroAware(ion) + val writer: MacroAwareIonWriter = ION_1_1.textWriterBuilder().build(actual) as MacroAwareIonWriter - // Read and compare the data. - val actual = roundTripToByteArray { w -> newReader(ion).let(::iterate).forEach { it.writeTo(w) } } + reader.transcodeTo(writer) - printDebugInfo(ion, actual) + reader.close() + writer.close() - assertReadersHaveEquivalentValues( - readerFn(ion), - readerFn(actual) - ) + assertReadersHaveEquivalentValues( + ION.newReader(ion), + ION.newReader(actual.toString()) + ) + } } - @ParameterizedTest(name = "{0}") - @MethodSource("testData") - fun testUserValuesArePreservedWhenTransferringUserValuesUsingWriteValueForReader(name: String, ion: ByteArray) { + /** + * Base class that contains text-specific cases + */ + abstract class Ion_1_1_RoundTripTextBase : Ion_1_1_RoundTripBase() { + abstract val newWriterForAppendable: (Appendable) -> IonWriter + override val readerFn: (ByteArray) -> IonReader = IonReaderBuilder.standard()::build - // Read and compare the data. - val actual = roundTripToByteArray { w -> newReader(ion).let { r -> while (r.next() != null) w.writeValue(r) } } + @ParameterizedTest(name = "{0}") + @MethodSource("com.amazon.ion.Ion_1_1_RoundTripTest#testData") + fun testUserValuesSurviveRoundTripWrittenToAppendable(name: String, ion: ByteArray) { + val data: List = ION.loader.load(ion) + val appendable = StringBuilder() + val writer = newWriterForAppendable(appendable) + data.forEach { it.writeTo(writer) } + writer.close() + val actual = appendable.toString() - printDebugInfo(ion, actual) + if (DEBUG_MODE) { + println("Expected:") + ion.printDisplayString() + println("Actual:") + println(actual) + } - assertReadersHaveEquivalentValues( - readerFn(ion), - readerFn(actual) - ) + assertReadersHaveEquivalentValues( + ION.newReader(ion), + ION.newReader(actual) + ) + } } - @ParameterizedTest(name = "{0}") - @MethodSource("testData") - fun testUserValuesArePreservedWhenTransferringUserValuesUsingWriteValueForIonValue(name: String, ion: ByteArray) { - // Read and compare the data. - val actual = roundTripToByteArray { w -> newReader(ion).let(::iterate).forEach { w.writeValue(it) } } + abstract class Ion_1_1_RoundTripBase { - printDebugInfo(ion, actual) + abstract val writerFn: (OutputStream) -> IonWriter + abstract val readerFn: (ByteArray) -> IonReader + val systemReaderFn: (ByteArray) -> IonReader = ION::newSystemReader - assertReadersHaveEquivalentValues( - readerFn(ion), - readerFn(actual) - ) - } + @ParameterizedTest(name = "{0}") + @MethodSource("com.amazon.ion.Ion_1_1_RoundTripTest#testData") + fun testUserValuesArePreservedWhenTransferringUserValues(name: String, ion: ByteArray) { - @ParameterizedTest(name = "{0}") - @MethodSource("testData") - @Disabled("Re-interpreting system directives is not supported yet.") - open fun testUserValuesArePreservedWhenTransferringSystemValues(name: String, ion: ByteArray) { + // Read and compare the data. + val actual = roundTripToByteArray { w -> newReader(ion).let(::iterate).forEach { it.writeTo(w) } } - // Read and compare the data. - val actual = roundTripToByteArray { w -> - w as _Private_IonWriter - w.writeValues(newSystemReader(ion)) { x -> x - 9 } + printDebugInfo(ion, actual) + + assertReadersHaveEquivalentValues( + readerFn(ion), + readerFn(actual) + ) } - printDebugInfo(ion, actual) + @ParameterizedTest(name = "{0}") + @MethodSource("com.amazon.ion.Ion_1_1_RoundTripTest#testData") + fun testUserValuesArePreservedWhenTransferringUserValuesUsingWriteValueForReader(name: String, ion: ByteArray) { - // Check the user values - assertReadersHaveEquivalentValues( - readerFn(ion), - readerFn(actual) - ) - } + // Read and compare the data. + val actual = roundTripToByteArray { w -> newReader(ion).let { r -> while (r.next() != null) w.writeValue(r) } } - @ParameterizedTest(name = "{0}") - @MethodSource("testData") - @Disabled("Re-interpreting system directives is not supported yet.") - open fun testSystemValuesArePreservedWhenTransferringSystemValues(name: String, ion: ByteArray) { + printDebugInfo(ion, actual) - // Read and compare the data. - val actual = roundTripToByteArray { w -> - w as _Private_IonWriter - w.writeValues(newSystemReader(ion)) { x -> x - 9 } + assertReadersHaveEquivalentValues( + readerFn(ion), + readerFn(actual) + ) } - printDebugInfo(ion, actual) + @ParameterizedTest(name = "{0}") + @MethodSource("com.amazon.ion.Ion_1_1_RoundTripTest#testData") + fun testUserValuesArePreservedWhenTransferringUserValuesUsingWriteValueForIonValue(name: String, ion: ByteArray) { + // Read and compare the data. + val actual = roundTripToByteArray { w -> newReader(ion).let(::iterate).forEach { w.writeValue(it) } } - // Check the system values - assertReadersHaveEquivalentValues( - systemReaderFn(ion), - // Skip the initial IVM since it ends up being doubled when we're copying. - systemReaderFn(actual).apply { next() } - ) - } + printDebugInfo(ion, actual) - private fun roundTripToByteArray(block: _Private_IonSystem.(IonWriter) -> Unit): ByteArray { - // Create a new copy of the data in Ion 1.1 - val baos = object : ByteArrayOutputStream() { - var closed = false - override fun close() { - assertFalse(closed) - closed = true - super.close() - } + assertReadersHaveEquivalentValues( + readerFn(ion), + readerFn(actual) + ) } - val writer = writerFn(baos) - block(ION, writer) - writer.close() - return baos.toByteArray() - } - - fun assertReadersHaveEquivalentValues(expectedDataReader: IonReader, actualDataReader: IonReader) { - // Read and compare the data. - val expectedData: Iterator = ION.iterate(expectedDataReader) - val actualData: Iterator = ION.iterate(actualDataReader) - - var ie = 0 - while (expectedData.hasNext() && actualData.hasNext()) { - val expected = expectedData.next() - try { - val actual = actualData.next() - - if (expected is IonSymbol && actual is IonSymbol) { - if (expected.typeAnnotationSymbols.isEmpty() && - isIonVersionMarker(expected.symbolValue()) && - actual.typeAnnotationSymbols.isEmpty() && - isIonVersionMarker(actual.symbolValue()) - ) { - // Both are IVMs. We won't actually compare them because we - // could be comparing data from different Ion versions - continue - } - } - assertEquals(expected, actual, "value $ie is different") - } catch (e: IonException) { - throw AssertionError("Encountered IonException when reading the transcribed version of value #$ie\nExpected: $expected", e) + @ParameterizedTest(name = "{0}") + @MethodSource("com.amazon.ion.Ion_1_1_RoundTripTest#testData") + @Disabled("Re-interpreting system directives is not supported yet.") + open fun testUserValuesArePreservedWhenTransferringSystemValues(name: String, ion: ByteArray) { + + // Read and compare the data. + val actual = roundTripToByteArray { w -> + w as _Private_IonWriter + w.writeValues(newSystemReader(ion)) { x -> x - 9 } } - ie++ + + printDebugInfo(ion, actual) + + // Check the user values + assertReadersHaveEquivalentValues( + readerFn(ion), + readerFn(actual) + ) } - // Make sure that both are fully consumed. - var ia = ie - while (expectedData.hasNext()) { expectedData.next(); ie++ } - while (actualData.hasNext()) { actualData.next(); ia++ } + @ParameterizedTest(name = "{0}") + @MethodSource("com.amazon.ion.Ion_1_1_RoundTripTest#testData") + @Disabled("Re-interpreting system directives is not supported yet.") + open fun testSystemValuesArePreservedWhenTransferringSystemValues(name: String, ion: ByteArray) { - assertEquals(ie, ia, "Data is unequal length") - expectedDataReader.close() - actualDataReader.close() - } + // Read and compare the data. + val actual = roundTripToByteArray { w -> + w as _Private_IonWriter + w.writeValues(newSystemReader(ion)) { x -> x - 9 } + } + + printDebugInfo(ion, actual) + + // Check the system values + assertReadersHaveEquivalentValues( + systemReaderFn(ion), + // Skip the initial IVM since it ends up being doubled when we're copying. + systemReaderFn(actual).apply { next() } + ) + } - private fun isIonVersionMarker(symbol: SymbolToken?): Boolean { - symbol ?: return false - if (symbol.sid == 2) return true - symbol.text ?: return false - return ION_VERSION_MARKER_REGEX.matches(symbol.assumeText()) + private fun roundTripToByteArray(block: _Private_IonSystem.(IonWriter) -> Unit): ByteArray { + // Create a new copy of the data in Ion 1.1 + val baos = object : ByteArrayOutputStream() { + var closed = false + override fun close() { + assertFalse(closed) + closed = true + super.close() + } + } + val writer = writerFn(baos) + block(ION, writer) + writer.close() + return baos.toByteArray() + } } + @OptIn(ExperimentalStdlibApi::class) companion object { @JvmStatic @@ -442,20 +425,28 @@ abstract class Ion_1_1_RoundTripBase { @JvmStatic private val BUFFER_CONFIGURATION_INITIAL_SIZE_16: IonBufferConfiguration = IonBufferConfiguration.Builder.standard().withInitialBufferSize(16).build() + @JvmStatic protected val READER_NON_CONTINUABLE_BUFFER_DEFAULT: (ByteArray) -> IonReader = IonReaderBuilder.standard()::build + @JvmStatic protected val READER_NON_CONTINUABLE_STREAM_DEFAULT: (ByteArray) -> IonReader = { IonReaderBuilder.standard().build(ByteArrayInputStream(it)) } + @JvmStatic protected val READER_NON_CONTINUABLE_BUFFER_16: (ByteArray) -> IonReader = IonReaderBuilder.standard().withBufferConfiguration(BUFFER_CONFIGURATION_INITIAL_SIZE_16)::build + @JvmStatic protected val READER_NON_CONTINUABLE_STREAM_16: (ByteArray) -> IonReader = { IonReaderBuilder.standard().withBufferConfiguration(BUFFER_CONFIGURATION_INITIAL_SIZE_16).build(ByteArrayInputStream(it)) } + @JvmStatic protected val READER_CONTINUABLE_BUFFER_DEFAULT: (ByteArray) -> IonReader = IonReaderBuilder.standard().withIncrementalReadingEnabled(true)::build + @JvmStatic protected val READER_CONTINUABLE_STREAM_DEFAULT: (ByteArray) -> IonReader = { IonReaderBuilder.standard().withIncrementalReadingEnabled(true).build(ByteArrayInputStream(it)) } + @JvmStatic protected val READER_CONTINUABLE_BUFFER_16: (ByteArray) -> IonReader = IonReaderBuilder.standard().withIncrementalReadingEnabled(true).withBufferConfiguration(BUFFER_CONFIGURATION_INITIAL_SIZE_16)::build + @JvmStatic protected val READER_CONTINUABLE_STREAM_16: (ByteArray) -> IonReader = { IonReaderBuilder.standard().withIncrementalReadingEnabled(true).withBufferConfiguration(BUFFER_CONFIGURATION_INITIAL_SIZE_16).build(ByteArrayInputStream(it)) } @@ -463,23 +454,79 @@ abstract class Ion_1_1_RoundTripBase { protected val WRITER_INTERNED_PREFIXED: (OutputStream) -> IonWriter = ION_1_1.binaryWriterBuilder() .withSymbolInliningStrategy(SymbolInliningStrategy.NEVER_INLINE) .withLengthPrefixStrategy(LengthPrefixStrategy.ALWAYS_PREFIXED)::build + @JvmStatic protected val WRITER_INLINE_PREFIXED: (OutputStream) -> IonWriter = ION_1_1.binaryWriterBuilder() .withSymbolInliningStrategy(SymbolInliningStrategy.ALWAYS_INLINE) .withLengthPrefixStrategy(LengthPrefixStrategy.ALWAYS_PREFIXED)::build + @JvmStatic protected val WRITER_INTERNED_DELIMITED: (OutputStream) -> IonWriter = ION_1_1.binaryWriterBuilder() .withSymbolInliningStrategy(SymbolInliningStrategy.NEVER_INLINE) .withLengthPrefixStrategy(LengthPrefixStrategy.NEVER_PREFIXED)::build + @JvmStatic protected val WRITER_INLINE_DELIMITED: (OutputStream) -> IonWriter = ION_1_1.binaryWriterBuilder() .withSymbolInliningStrategy(SymbolInliningStrategy.ALWAYS_INLINE) .withLengthPrefixStrategy(LengthPrefixStrategy.NEVER_PREFIXED)::build + @JvmStatic + fun assertReadersHaveEquivalentValues(expectedDataReader: IonReader, actualDataReader: IonReader) { + // Read and compare the data. + val expectedData: Iterator = ION.iterate(expectedDataReader) + val actualData: Iterator = ION.iterate(actualDataReader) + + var ie = 0 + while (expectedData.hasNext() && actualData.hasNext()) { + val expected = expectedData.next() + try { + val actual = actualData.next() + + if (expected is IonSymbol && actual is IonSymbol) { + if (expected.typeAnnotationSymbols.isEmpty() && + isIonVersionMarker(expected.symbolValue()) && + actual.typeAnnotationSymbols.isEmpty() && + isIonVersionMarker(actual.symbolValue()) + ) { + // Both are IVMs. We won't actually compare them because we + // could be comparing data from different Ion versions + continue + } + } + + assertEquals(expected, actual, "value $ie is different") + } catch (e: IonException) { + throw AssertionError("Encountered IonException when reading the transcribed version of value #$ie\nExpected: $expected", e) + } + ie++ + } + + // Make sure that both are fully consumed. + var ia = ie + while (expectedData.hasNext()) { + expectedData.next(); ie++ + } + while (actualData.hasNext()) { + actualData.next(); ia++ + } + + assertEquals(ie, ia, "Data is unequal length") + expectedDataReader.close() + actualDataReader.close() + } + + @JvmStatic + fun isIonVersionMarker(symbol: SymbolToken?): Boolean { + symbol ?: return false + if (symbol.sid == 2) return true + symbol.text ?: return false + return ION_VERSION_MARKER_REGEX.matches(symbol.assumeText()) + } + /** * Checks if this ByteArray contains Ion Binary. */ - private fun ByteArray.isIonBinary(): Boolean { + fun ByteArray.isIonBinary(): Boolean { return get(0) == 0xE0.toByte() && get(1) == 0x01.toByte() && get(2) in setOf(0, 1) && diff --git a/src/test/java/com/amazon/ion/impl/EncodingDirectiveCompilationTest.java b/src/test/java/com/amazon/ion/impl/EncodingDirectiveCompilationTest.java index b5beb1d6d..3178f4514 100644 --- a/src/test/java/com/amazon/ion/impl/EncodingDirectiveCompilationTest.java +++ b/src/test/java/com/amazon/ion/impl/EncodingDirectiveCompilationTest.java @@ -6,10 +6,14 @@ import com.amazon.ion.IntegerSize; import com.amazon.ion.IonDatagram; import com.amazon.ion.IonEncodingVersion; +import com.amazon.ion.IonException; import com.amazon.ion.IonLoader; import com.amazon.ion.IonReader; +import com.amazon.ion.IonSystem; import com.amazon.ion.IonText; import com.amazon.ion.IonType; +import com.amazon.ion.MacroAwareIonReader; +import com.amazon.ion.MacroAwareIonWriter; import com.amazon.ion.SystemSymbols; import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1; import com.amazon.ion.impl.macro.EncodingContext; @@ -22,6 +26,10 @@ import com.amazon.ion.impl.macro.TemplateMacro; import com.amazon.ion.system.IonReaderBuilder; import com.amazon.ion.system.IonSystemBuilder; +import org.hamcrest.Description; +import org.hamcrest.Matcher; +import org.hamcrest.TypeSafeMatcher; +import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; @@ -30,6 +38,7 @@ import java.io.ByteArrayOutputStream; import java.io.OutputStream; import java.math.BigDecimal; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; @@ -39,10 +48,14 @@ import java.util.TreeMap; import java.util.function.Consumer; +import static com.amazon.ion.BitUtils.bytes; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.allOf; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; /** @@ -228,6 +241,11 @@ void startMacroInvocationByName(IonRawWriter_1_1 writer, String name, Map newMacroTableByMacroRef(SortedMap macrosByNa void startMacroInvocationByName(IonRawWriter_1_1 writer, String name, Map macrosByName) { writer.stepInEExp(name); } + + @Override + MacroAwareIonWriter newMacroAwareWriter(OutputStream out) { + return (MacroAwareIonWriter) IonEncodingVersion.ION_1_1.textWriterBuilder().build(out); + } }; abstract IonRawWriter_1_1 newWriter(OutputStream out); abstract EncodingContext getEncodingContext(IonReader reader); abstract Map newMacroTableByMacroRef(SortedMap macrosByName); abstract void startMacroInvocationByName(IonRawWriter_1_1 writer, String name, Map macrosByName); + abstract MacroAwareIonWriter newMacroAwareWriter(OutputStream out); } public enum InputType { @@ -273,15 +297,26 @@ public enum InputType { IonReader newReader(byte[] input) { return IonReaderBuilder.standard().build(new ByteArrayInputStream(input)); } + + @Override + MacroAwareIonReader newMacroAwareReader(byte[] input) { + throw new UnsupportedOperationException("Building MacroAwareIonReader from InputStream not yet supported."); + } }, BYTE_ARRAY { @Override IonReader newReader(byte[] input) { return IonReaderBuilder.standard().build(input); } + + @Override + MacroAwareIonReader newMacroAwareReader(byte[] input) { + return ((_Private_IonReaderBuilder) IonReaderBuilder.standard()).buildMacroAware(input); + } }; abstract IonReader newReader(byte[] input); + abstract MacroAwareIonReader newMacroAwareReader(byte[] input); } public static Arguments[] allCombinations() { @@ -820,9 +855,7 @@ public void macroInvocationNestedWithinParameter(InputType inputType, StreamType } } - @ParameterizedTest(name = "{0},{1}") - @MethodSource("allCombinations") - public void macroInvocationsNestedWithinParameter(InputType inputType, StreamType streamType) throws Exception { + private byte[] macroInvocationsNestedWithinParameter(StreamType streamType) { ByteArrayOutputStream out = new ByteArrayOutputStream(); IonRawWriter_1_1 writer = streamType.newWriter(out); Macro expectedMacro = writeSimonSaysMacro(writer); @@ -844,8 +877,13 @@ public void macroInvocationsNestedWithinParameter(InputType inputType, StreamTyp writer.stepInList(true); writer.stepOut(); - byte[] data = getBytes(writer, out); + return getBytes(writer, out); + } + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void macroInvocationsNestedWithinParameter(InputType inputType, StreamType streamType) throws Exception { + byte[] data = macroInvocationsNestedWithinParameter(streamType); try (IonReader reader = inputType.newReader(data)) { assertEquals(IonType.LIST, reader.next()); reader.stepIn(); @@ -867,6 +905,98 @@ public void macroInvocationsNestedWithinParameter(InputType inputType, StreamTyp } } + public static class SubstringCountMatcher extends TypeSafeMatcher { + int expectedCount; + String substring; + + private SubstringCountMatcher(String substring, int expectedCount) { + this.expectedCount = expectedCount; + this.substring = substring; + } + + @Override + protected boolean matchesSafely(String s) { + return countOccurrencesOfSubstring(s, substring) == expectedCount; + } + + @Override + public void describeTo(Description description) { + description.appendText("a String including " + expectedCount + " occurrences of " + substring); + } + + /** + * Counts the number of times the given substring occurs in the given string (non-overlapping). + * @param string the string. + * @param substring the substring. + * @return the number of occurrences. + */ + private static int countOccurrencesOfSubstring(String string, String substring) { + int lastMatchIndex = 0; + int count = 0; + while (lastMatchIndex >= 0) { + lastMatchIndex = string.indexOf(substring, lastMatchIndex); + if (lastMatchIndex >= 0) { + lastMatchIndex += substring.length(); + count++; + } + } + return count; + } + } + + static SubstringCountMatcher substringCount(String sub, int count) { + return new SubstringCountMatcher(sub, count); + } + + static SubstringCountMatcher substringCount(SystemSymbols_1_1 sub, int count) { + return new SubstringCountMatcher(sub.getText(), count); + } + + /** + * Performs a macro-aware transcode of the given data, verifying that the resulting stream has the + * characteristics described by the arguments to this method and that it is data-model equivalent + * to the source data. + * @param data the source data. + * @param inputType the InputType to test. + * @param streamType the StreamType to which the source data will be transcoded. + * @param expectations a list of expectations for the text representation of the transcoded data. + */ + private void verifyMacroAwareTranscode( + byte[] data, + InputType inputType, + StreamType streamType, + Matcher... expectations + ) throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + try ( + MacroAwareIonReader reader = inputType.newMacroAwareReader(data); + MacroAwareIonWriter rewriter = streamType.newMacroAwareWriter(out); + ) { + reader.transcodeTo(rewriter); + } + if (streamType == StreamType.TEXT) { + String rewritten = out.toString(StandardCharsets.UTF_8.name()); + assertThat(rewritten, allOf(expectations)); + } + IonSystem system = IonSystemBuilder.standard().build(); + IonDatagram actual = system.getLoader().load(out.toByteArray()); + IonDatagram expected = system.getLoader().load(data); + assertEquals(expected, actual); + } + + @Test // TODO parameterize for all combinations once support for macro-aware text reading is added + public void macroInvocationsNestedWithinParameterMacroAwareTranscode() throws Exception { + byte[] data = macroInvocationsNestedWithinParameter(StreamType.BINARY); + verifyMacroAwareTranscode(data, InputType.BYTE_ARRAY, StreamType.TEXT, + substringCount("$ion_1_1", 1), + substringCount(SystemSymbols_1_1.ADD_SYMBOLS, 0), + substringCount(SystemSymbols_1_1.ADD_MACROS, 0), + substringCount(SystemSymbols_1_1.SET_SYMBOLS, 0), + substringCount(SystemSymbols_1_1.SET_MACROS, 0), + substringCount(SystemSymbols_1_1.ION_ENCODING, 2) + ); + } + @ParameterizedTest(name = "{0},{1}") @MethodSource("allCombinations") public void annotationInParameter(InputType inputType, StreamType streamType) throws Exception { @@ -964,9 +1094,7 @@ public void twoArgumentGroups(InputType inputType, StreamType streamType) throws } } - @ParameterizedTest(name = "{0},{1}") - @MethodSource("allCombinations") - public void macroInvocationInMacroDefinition(InputType inputType, StreamType streamType) throws Exception { + private byte[] macroInvocationInMacroDefinition(StreamType streamType) { ByteArrayOutputStream out = new ByteArrayOutputStream(); IonRawWriter_1_1 writer = streamType.newWriter(out); @@ -1008,8 +1136,13 @@ public void macroInvocationInMacroDefinition(InputType inputType, StreamType str writer.stepInEExp(1, false, expectedMacro); writer.stepOut(); - byte[] data = getBytes(writer, out); + return getBytes(writer, out); + } + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void macroInvocationInMacroDefinition(InputType inputType, StreamType streamType) throws Exception { + byte[] data = macroInvocationInMacroDefinition(streamType); try (IonReader reader = inputType.newReader(data)) { assertEquals(IonType.INT, reader.next()); assertEquals(IntegerSize.INT, reader.getIntegerSize()); @@ -1018,6 +1151,19 @@ public void macroInvocationInMacroDefinition(InputType inputType, StreamType str } } + @Test // TODO parameterize for all combinations once support for macro-aware text reading is added + public void macroInvocationInMacroDefinitionMacroAwareTranscode() throws Exception { + byte[] data = macroInvocationInMacroDefinition(StreamType.BINARY); + verifyMacroAwareTranscode(data, InputType.BYTE_ARRAY, StreamType.TEXT, + substringCount("$ion_1_1", 1), + substringCount(SystemSymbols_1_1.ADD_SYMBOLS, 0), + substringCount(SystemSymbols_1_1.ADD_MACROS, 0), + substringCount(SystemSymbols_1_1.SET_SYMBOLS, 0), + substringCount(SystemSymbols_1_1.SET_MACROS, 0), + substringCount(SystemSymbols_1_1.ION_ENCODING, 2) + ); + } + @ParameterizedTest(name = "{0},{1}") @MethodSource("allCombinations") public void blobsAndClobs(InputType inputType, StreamType streamType) throws Exception { @@ -1171,9 +1317,7 @@ private static void writeSymbolTableAppendEExpression(IonRawWriter_1_1 writer, M } } - @ParameterizedTest(name = "{0},{1}") - @MethodSource("allCombinations") - public void macroInvocationsProduceEncodingDirectivesThatModifySymbolTable(InputType inputType, StreamType streamType) throws Exception { + private static byte[] macroInvocationsProduceEncodingDirectivesThatModifySymbolTable(StreamType streamType) { ByteArrayOutputStream out = new ByteArrayOutputStream(); IonRawWriter_1_1 writer = streamType.newWriter(out); writer.writeIVM(); @@ -1190,7 +1334,13 @@ public void macroInvocationsProduceEncodingDirectivesThatModifySymbolTable(Input writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID); writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID + 1); - byte[] data = getBytes(writer, out); + return getBytes(writer, out); + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void macroInvocationsProduceEncodingDirectivesThatModifySymbolTable(InputType inputType, StreamType streamType) throws Exception { + byte[] data = macroInvocationsProduceEncodingDirectivesThatModifySymbolTable(streamType); try (IonReader reader = inputType.newReader(data)) { assertEquals(IonType.SYMBOL, reader.next()); assertEquals("foo", reader.stringValue()); @@ -1209,13 +1359,24 @@ public void macroInvocationsProduceEncodingDirectivesThatModifySymbolTable(Input } } + @Test // TODO parameterize for all combinations once support for macro-aware text reading is added + public void macroInvocationsProduceEncodingDirectivesThatModifySymbolTableMacroAwareTranscode() throws Exception { + byte[] data = macroInvocationsProduceEncodingDirectivesThatModifySymbolTable(StreamType.BINARY); + verifyMacroAwareTranscode(data, InputType.BYTE_ARRAY, StreamType.TEXT, + substringCount("$ion_1_1", 1), + substringCount(SystemSymbols_1_1.ADD_SYMBOLS, 1), + substringCount(SystemSymbols_1_1.ADD_MACROS, 0), + substringCount(SystemSymbols_1_1.SET_SYMBOLS, 2), + substringCount(SystemSymbols_1_1.SET_MACROS, 0), + substringCount(SystemSymbols_1_1.ION_ENCODING, 0) + ); + } + private static Map systemSymbols() { return makeSymbolsMap(FIRST_LOCAL_SYMBOL_ID, SystemSymbols_1_1.allSymbolTexts().toArray(new String[0])); } - @ParameterizedTest(name = "{0},{1}") - @MethodSource("allCombinations") - public void macroInvocationsProduceEncodingDirectivesThatModifyMacroTable(InputType inputType, StreamType streamType) throws Exception { + private static byte[] macroInvocationsProduceEncodingDirectivesThatModifyMacroTable(StreamType streamType) { BigDecimal pi = new BigDecimal("3.14159"); SortedMap macroTable = new TreeMap<>(); macroTable.put("Pi", new TemplateMacro( @@ -1270,12 +1431,18 @@ public void macroInvocationsProduceEncodingDirectivesThatModifyMacroTable(InputT writer.stepOut(); writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID + 1); // Still foo because AddMacros/SetMacros does not mutate the symbol table. - byte[] data = getBytes(writer, out); + return getBytes(writer, out); + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void macroInvocationsProduceEncodingDirectivesThatModifyMacroTable(InputType inputType, StreamType streamType) throws Exception { + byte[] data = macroInvocationsProduceEncodingDirectivesThatModifyMacroTable(streamType); try (IonReader reader = inputType.newReader(data)) { assertEquals(IonType.SYMBOL, reader.next()); assertEquals("Pi", reader.stringValue()); assertEquals(IonType.DECIMAL, reader.next()); - assertEquals(pi, reader.bigDecimalValue()); + assertEquals(new BigDecimal("3.14159"), reader.bigDecimalValue()); assertEquals(IonType.STRING, reader.next()); assertEquals("bar", reader.stringValue()); @@ -1291,6 +1458,19 @@ public void macroInvocationsProduceEncodingDirectivesThatModifyMacroTable(InputT } } + @Test // TODO parameterize for all combinations once support for macro-aware text reading is added + public void macroInvocationsProduceEncodingDirectivesThatModifyMacroTableMacroAwareTranscode() throws Exception { + byte[] data = macroInvocationsProduceEncodingDirectivesThatModifyMacroTable(StreamType.BINARY); + verifyMacroAwareTranscode(data, InputType.BYTE_ARRAY, StreamType.TEXT, + substringCount("$ion_1_1", 1), + substringCount(SystemSymbols_1_1.ADD_SYMBOLS, 1), + substringCount(SystemSymbols_1_1.ADD_MACROS, 2), + substringCount(SystemSymbols_1_1.SET_SYMBOLS, 1), + substringCount(SystemSymbols_1_1.SET_MACROS, 1), + substringCount(SystemSymbols_1_1.ION_ENCODING, 0) + ); + } + @ParameterizedTest(name = "{0},{1}") @MethodSource("allCombinations") public void multipleListsWithinSymbolTableDeclaration(InputType inputType, StreamType streamType) throws Exception { @@ -1324,9 +1504,7 @@ public void multipleListsWithinSymbolTableDeclaration(InputType inputType, Strea } } - @ParameterizedTest(name = "{0},{1}") - @MethodSource("allCombinations") - public void emptyMacroAppendToEmptyTable(InputType inputType, StreamType streamType) throws Exception { + private byte[] emptyMacroAppendToEmptyTable(StreamType streamType) { ByteArrayOutputStream out = new ByteArrayOutputStream(); IonRawWriter_1_1 writer = streamType.newWriter(out); writer.writeIVM(); @@ -1337,15 +1515,32 @@ public void emptyMacroAppendToEmptyTable(InputType inputType, StreamType streamT endMacroTable(writer); endEncodingDirective(writer); - byte[] data = getBytes(writer, out); + return getBytes(writer, out); + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void emptyMacroAppendToEmptyTable(InputType inputType, StreamType streamType) throws Exception { + byte[] data = emptyMacroAppendToEmptyTable(streamType); try (IonReader reader = inputType.newReader(data)) { assertNull(reader.next()); } } - @ParameterizedTest(name = "{0},{1}") - @MethodSource("allCombinations") - public void emptyMacroAppendToNonEmptyTable(InputType inputType, StreamType streamType) throws Exception { + @Test // TODO parameterize for all combinations once support for macro-aware text reading is added + public void emptyMacroAppendToEmptyTableMacroAwareTranscode() throws Exception { + byte[] data = emptyMacroAppendToEmptyTable(StreamType.BINARY); + verifyMacroAwareTranscode(data, InputType.BYTE_ARRAY, StreamType.TEXT, + substringCount("$ion_1_1", 1), + substringCount(SystemSymbols_1_1.ADD_SYMBOLS, 0), + substringCount(SystemSymbols_1_1.ADD_MACROS, 0), + substringCount(SystemSymbols_1_1.SET_SYMBOLS, 0), + substringCount(SystemSymbols_1_1.SET_MACROS, 0), + substringCount(SystemSymbols_1_1.ION_ENCODING, 0) // The empty append to an empty table has no effect, and it is not transcoded. This is a known limitation. + ); + } + + private byte[] emptyMacroAppendToNonEmptyTable(StreamType streamType) { ByteArrayOutputStream out = new ByteArrayOutputStream(); IonRawWriter_1_1 writer = streamType.newWriter(out); writer.writeIVM(); @@ -1378,28 +1573,45 @@ public void emptyMacroAppendToNonEmptyTable(InputType inputType, StreamType stre writer.writeSymbol(1); } writer.stepOut(); - byte[] data = getBytes(writer, out); + return getBytes(writer, out); + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void emptyMacroAppendToNonEmptyTable(InputType inputType, StreamType streamType) throws Exception { + byte[] data = emptyMacroAppendToNonEmptyTable(streamType); try (IonReader reader = inputType.newReader(data)) { assertEquals(IonType.SYMBOL, reader.next()); assertEquals("bar", reader.stringValue()); } } - @ParameterizedTest(name = "{0},{1}") - @MethodSource("allCombinations") - public void invokeUnqualifiedSystemMacroInTDL(InputType inputType, StreamType streamType) throws Exception { + @Test // TODO parameterize for all combinations once support for macro-aware text reading is added + public void emptyMacroAppendToNonEmptyTableMacroAwareTranscode() throws Exception { + byte[] data = emptyMacroAppendToNonEmptyTable(StreamType.BINARY); + verifyMacroAwareTranscode(data, InputType.BYTE_ARRAY, StreamType.TEXT, + substringCount("$ion_1_1", 1), + substringCount(SystemSymbols_1_1.ADD_SYMBOLS, 0), + substringCount(SystemSymbols_1_1.ADD_MACROS, 0), + substringCount(SystemSymbols_1_1.SET_SYMBOLS, 0), + substringCount(SystemSymbols_1_1.SET_MACROS, 0), + substringCount(SystemSymbols_1_1.ION_ENCODING, 3) // Two encoding directives, plus one $ion_encoding symbol to denote the macro table append. + ); + } + + private byte[] invokeUnqualifiedSystemMacroInTDL(StreamType streamType) { ByteArrayOutputStream out = new ByteArrayOutputStream(); IonRawWriter_1_1 writer = streamType.newWriter(out); writer.writeIVM(); SortedMap macroTable = new TreeMap<>(); macroTable.put("foo", new TemplateMacro( - Collections.singletonList(new Macro.Parameter("x", Macro.ParameterEncoding.Tagged, Macro.ParameterCardinality.ZeroOrMore)), - Arrays.asList( - new Expression.MacroInvocation(SystemMacro.Default, 0, 3), - new Expression.VariableRef(0), - new Expression.StringValue(Collections.emptyList(), "hello world") - ) + Collections.singletonList(new Macro.Parameter("x", Macro.ParameterEncoding.Tagged, Macro.ParameterCardinality.ZeroOrMore)), + Arrays.asList( + new Expression.MacroInvocation(SystemMacro.Default, 0, 3), + new Expression.VariableRef(0), + new Expression.StringValue(Collections.emptyList(), "hello world") + ) )); Map symbols = Collections.emptyMap(); @@ -1421,13 +1633,76 @@ public void invokeUnqualifiedSystemMacroInTDL(InputType inputType, StreamType st writer.stepInEExp(0, true, macroTable.get("foo")); { } writer.stepOut(); - byte[] data = getBytes(writer, out); + return getBytes(writer, out); + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void invokeUnqualifiedSystemMacroInTDL(InputType inputType, StreamType streamType) throws Exception { + byte[] data = invokeUnqualifiedSystemMacroInTDL(streamType); try (IonReader reader = inputType.newReader(data)) { assertEquals(IonType.STRING, reader.next()); assertEquals("hello world", reader.stringValue()); } } + @Test // TODO parameterize for all combinations once support for macro-aware text reading is added + public void invokeUnqualifiedSystemMacroInTDLMacroAwareTranscode() throws Exception { + byte[] data = invokeUnqualifiedSystemMacroInTDL(StreamType.BINARY); + verifyMacroAwareTranscode(data, InputType.BYTE_ARRAY, StreamType.TEXT, + substringCount("$ion_1_1", 1), + substringCount(SystemSymbols_1_1.ADD_SYMBOLS, 0), + substringCount(SystemSymbols_1_1.ADD_MACROS, 0), + substringCount(SystemSymbols_1_1.SET_SYMBOLS, 0), + substringCount(SystemSymbols_1_1.SET_MACROS, 0), + substringCount(SystemSymbols_1_1.ION_ENCODING, 1) + ); + } + + @Test // TODO parameterize for all combinations once support for macro-aware text reading is added + public void multipleIonVersionMarkersMacroAwareTranscode() throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = StreamType.BINARY.newWriter(out); + Map symbols = new HashMap<>(); + writer.writeIVM(); + writeSymbolTableAppendEExpression(writer, symbols, "foo"); + writer.writeSymbol(SystemSymbols_1_1.size() + FIRST_LOCAL_SYMBOL_ID); // foo + writer.writeIVM(); + writeSymbolTableAppendEExpression(writer, symbols, "bar"); // bar + writer.writeSymbol(SystemSymbols_1_1.size() + FIRST_LOCAL_SYMBOL_ID); + byte[] data = getBytes(writer, out); + verifyMacroAwareTranscode(data, InputType.BYTE_ARRAY, StreamType.TEXT, + substringCount("$ion_1_1", 2), + substringCount(SystemSymbols_1_1.ADD_SYMBOLS, 2), + substringCount(SystemSymbols_1_1.ADD_MACROS, 0), + substringCount(SystemSymbols_1_1.SET_SYMBOLS, 0), + substringCount(SystemSymbols_1_1.SET_MACROS, 0), + substringCount(SystemSymbols_1_1.ION_ENCODING, 0) + ); + } + + @Test // TODO finalize handling of Ion 1.0-style symbol tables in Ion 1.1: https://github.com/amazon-ion/ion-java/issues/1002 + public void ion10SymbolTableMacroAwareTranscode() throws Exception { + byte[] data = bytes( + 0xE0, 0x01, 0x01, 0xEA, // Ion 1.1 IVM + 0xE4, 0x07, // $ion_symbol_table:: + 0xD4, // { + 0x0F, // symbols: + 0xB2, // [ + 0x91, 'a', // "a" + // ]} + 0xE1, 0x01 + ); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + try ( + MacroAwareIonReader reader = InputType.BYTE_ARRAY.newMacroAwareReader(data); + MacroAwareIonWriter rewriter = StreamType.BINARY.newMacroAwareWriter(out); + ) { + // This may at some point be supported. + assertThrows(IonException.class, () -> reader.transcodeTo(rewriter)); + } + } + // TODO cover every Ion type // TODO annotations in macro definition (using 'annotate' system macro) // TODO test error conditions diff --git a/src/test/java/com/amazon/ion/impl/IonReaderContinuableCoreBinaryTest.java b/src/test/java/com/amazon/ion/impl/IonReaderContinuableCoreBinaryTest.java index 7fecebedd..37487506a 100644 --- a/src/test/java/com/amazon/ion/impl/IonReaderContinuableCoreBinaryTest.java +++ b/src/test/java/com/amazon/ion/impl/IonReaderContinuableCoreBinaryTest.java @@ -4,10 +4,15 @@ import com.amazon.ion.IntegerSize; import com.amazon.ion.IonCursor; +import com.amazon.ion.IonDatagram; +import com.amazon.ion.IonEncodingVersion; import com.amazon.ion.IonException; import com.amazon.ion.IonReader; +import com.amazon.ion.IonSystem; import com.amazon.ion.IonType; +import com.amazon.ion.MacroAwareIonWriter; import com.amazon.ion.TestUtils; +import com.amazon.ion.system.IonSystemBuilder; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; @@ -1146,4 +1151,57 @@ public void systemReaderWrapperReadsEncodingDirectiveWithAppend(boolean construc assertNull(systemReader.next()); } } + + /** + * Performs a macro-aware transcode of the given data, verifying that the resulting stream is data-model equivalent + * to the source data. + * @param data the source data. + * @param constructFromBytes true if the reader is to be backed by a byte array; otherwise, the reader will be + * be backed by an InputStream. + */ + private void assertMacroAwareTranscribeProducesEquivalentStream(byte[] data, boolean constructFromBytes) throws Exception { + StringBuilder sb = new StringBuilder(); + try ( + IonReaderContinuableCoreBinary reader = initializeReader(constructFromBytes, data); + MacroAwareIonWriter writer = (MacroAwareIonWriter) IonEncodingVersion.ION_1_1.textWriterBuilder().build(sb); + ) { + reader.transcodeTo(writer); + } + IonSystem system = IonSystemBuilder.standard().build(); + IonDatagram actual = system.getLoader().load(sb.toString()); + IonDatagram expected = system.getLoader().load(data); + assertEquals(expected, actual); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void encodingLevelTranscodeOfSystemMacroInvocation(boolean constructFromBytes) throws Exception { + byte[] data = withIvm(1, bytes( + 0xEF, 0x0C, // system macro add_symbols + 0x02, // AEB: 0b------aa; a=10, expression group + 0x01, // FlexInt 0, a delimited expression group + 0x93, 0x61, 0x62, 0x63, // 3-byte string, utf-8 "abc" + 0xF0, // delimited end... of expression group + 0xE1, // SID single byte + 0x42 // SID $66 + )); + assertMacroAwareTranscribeProducesEquivalentStream(data, constructFromBytes); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void encodingLevelTranscodeOfIon10SymbolTable(boolean constructFromBytes) throws Exception { + byte[] data = withIvm(0, bytes( + 0xEA, 0x81, 0x83, // $ion_symbol_table + 0xD7, // { + 0x87, // symbols: + 0xB5, // [ + 0x84, 'a', 'b', 'c', 'd', // "abcd" -> $10 + // ]} + 0xC4, // ( + 0xE3, 0x81, 0x8A, // abcd:: + 0x20 // 0 + )); + assertMacroAwareTranscribeProducesEquivalentStream(data, constructFromBytes); + } } diff --git a/src/test/java/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java b/src/test/java/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java index 79ef1ef4a..330910915 100644 --- a/src/test/java/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java +++ b/src/test/java/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java @@ -6086,5 +6086,28 @@ public void readIon11SymbolTableAppendUsingSystemSymbolValue(boolean constructFr closeAndCount(); } + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void readIon11SymbolTableWithFlexUIntFieldNames(boolean constructFromBytes) throws Exception { + reader = readerForIon11( + bytes( + 0xE7, 0x01, 0x63, // One FlexSym annotation, with opcode, opcode 63 = system symbol 3 = $ion_symbol_table + 0xD7, // { + 0x0D, // FlexUInt 6 = imports + 0xEE, 0x03, // System symbol value 3 = $ion_symbol_table (denoting symbol table append) + 0x0F, // FlexUInt 7 = symbols + 0xB2, 0x91, 'a', // ["a"] + 0xE1, SystemSymbols_1_1.size() + 1 // first user symbol = a + ), + constructFromBytes + ); + assertSequence( + next(IonType.SYMBOL), + stringValue("a"), + next(null) + ); + closeAndCount(); + } + // TODO Ion 1.1 symbol tables with all kinds of annotation encodings (opcodes E4 - E9, inline and SID) } diff --git a/src/test/java/com/amazon/ion/impl/bin/IonManagedBinaryWriterTest.java b/src/test/java/com/amazon/ion/impl/bin/IonManagedBinaryWriterTest.java index 1b0b1815f..293d8fed4 100644 --- a/src/test/java/com/amazon/ion/impl/bin/IonManagedBinaryWriterTest.java +++ b/src/test/java/com/amazon/ion/impl/bin/IonManagedBinaryWriterTest.java @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl.bin; +import com.amazon.ion.FakeSymbolToken; import com.amazon.ion.IonDatagram; import com.amazon.ion.IonInt; import com.amazon.ion.IonLoader; @@ -13,7 +14,9 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.nio.charset.StandardCharsets; +import com.amazon.ion.TestUtils; import com.amazon.ion.system.IonBinaryWriterBuilder; import com.amazon.ion.system.IonSystemBuilder; import org.junit.Before; @@ -457,15 +460,41 @@ public void testNestedEmptyAnnotatedContainer() throws Exception assertValue("{bar: foo::[]}"); } + @Test + public void testSymbolWithKnownTextAndSid2IsNotConsideredIvm() throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonWriter writer = IonBinaryWriterBuilder.standard().build(out); + writer.writeSymbol("foo"); + // Should not be an IVM even though SID 2 is present because known text always takes precedence. + writer.writeSymbolToken(new FakeSymbolToken("abc", 2)); + // If the previous symbol were interpreted as an IVM, then the following symbol IDs would be out of range. + writer.writeSymbolToken(new FakeSymbolToken(null, 10)); + writer.writeSymbolToken(new FakeSymbolToken(null, 11)); + writer.close(); + assertEquivalentDataModel( + out.toByteArray(), + TestUtils.ensureBinary(system(), "foo abc foo abc".getBytes(StandardCharsets.UTF_8)) + ); + } + /** * Asserts equivalence of ion data model between two provided data streams. - * @param actual represents the serialized data streams when auto-flush is enabled. - * @param expected represents the expected data streams. + * @param actual represents the actual data stream. + * @param expected represents the expected data stream. */ private void assertEquivalentDataModel(ByteArrayOutputStream actual, ByteArrayOutputStream expected) { + assertEquivalentDataModel(actual.toByteArray(), expected.toByteArray()); + } + + /** + * Asserts equivalence of ion data model between two provided data streams. + * @param actual represents the actual data stream. + * @param expected represents the expected data stream. + */ + private void assertEquivalentDataModel(byte[] actual, byte[] expected) { IonLoader loader = IonSystemBuilder.standard().build().newLoader(); - IonDatagram actualDatagram = loader.load(actual.toByteArray()); - IonDatagram expectedDatagram = loader.load(expected.toByteArray()); + IonDatagram actualDatagram = loader.load(actual); + IonDatagram expectedDatagram = loader.load(expected); assertEquals(expectedDatagram, actualDatagram); } }