Skip to content

Commit

Permalink
Fixes gaps in macro-aware transcoding identified while integrating wi…
Browse files Browse the repository at this point in the history
…th ion-java-benchmark-cli.
  • Loading branch information
tgregg committed Nov 27, 2024
1 parent 9138aca commit 7482237
Show file tree
Hide file tree
Showing 9 changed files with 132 additions and 52 deletions.
4 changes: 4 additions & 0 deletions src/main/java/com/amazon/ion/impl/IonCursorBinary.java
Original file line number Diff line number Diff line change
Expand Up @@ -1444,6 +1444,8 @@ private void uncheckedReadFieldName_1_1() {
fieldSid = (int) uncheckedReadFlexSym_1_1(fieldTextMarker);
} else {
fieldSid = (int) uncheckedReadFlexUInt_1_1();
fieldTextMarker.startIndex = -1;
fieldTextMarker.endIndex = fieldSid;
}
}
}
Expand Down Expand Up @@ -1794,6 +1796,8 @@ private boolean slowReadFieldName_1_1() {
return slowReadFieldNameFlexSym_1_1();
} else {
fieldSid = (int) slowReadFlexUInt_1_1();
fieldTextMarker.startIndex = -1;
fieldTextMarker.endIndex = fieldSid;
return fieldSid < 0;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -943,33 +943,6 @@ private enum State {
// The current state.
private State state = State.READING_VALUE;

/**
* @return true if current value has a sequence of annotations that begins with `$ion_symbol_table`; otherwise,
* false.
*/
boolean startsWithIonSymbolTable() {
if (minorVersion == 0 && annotationSequenceMarker.startIndex >= 0) {
long savedPeekIndex = peekIndex;
peekIndex = annotationSequenceMarker.startIndex;
int sid = readVarUInt_1_0();
peekIndex = savedPeekIndex;
return ION_SYMBOL_TABLE_SID == sid;
} else if (minorVersion == 1) {
Marker marker = annotationTokenMarkers.get(0);
return matchesSystemSymbol_1_1(marker, SystemSymbols_1_1.ION_SYMBOL_TABLE);
}
return false;
}

/**
* @return true if the reader is positioned on a symbol table; otherwise, false.
*/
private boolean isPositionedOnSymbolTable() {
return hasAnnotations &&
super.getType() == IonType.STRUCT &&
startsWithIonSymbolTable();
}

@Override
public Event nextValue() {
Event event;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
import java.util.function.Consumer;

import static com.amazon.ion.SystemSymbols.ION_ENCODING;
import static com.amazon.ion.SystemSymbols.ION_SYMBOL_TABLE_SID;
import static com.amazon.ion.impl.IonReaderContinuableApplicationBinary.SYMBOLS_LIST_INITIAL_CAPACITY;
import static com.amazon.ion.impl.IonTypeID.SYSTEM_SYMBOL_VALUE;
import static com.amazon.ion.impl.bin.Ion_1_1_Constants.*;
Expand Down Expand Up @@ -1747,6 +1748,33 @@ protected void stepOutOfEExpression() {
}
}

/**
* @return true if current value has a sequence of annotations that begins with `$ion_symbol_table`; otherwise,
* false.
*/
protected boolean startsWithIonSymbolTable() {
if (minorVersion == 0 && annotationSequenceMarker.startIndex >= 0) {
long savedPeekIndex = peekIndex;
peekIndex = annotationSequenceMarker.startIndex;
int sid = readVarUInt_1_0();
peekIndex = savedPeekIndex;
return ION_SYMBOL_TABLE_SID == sid;
} else if (minorVersion == 1) {
Marker marker = annotationTokenMarkers.get(0);
return matchesSystemSymbol_1_1(marker, SystemSymbols_1_1.ION_SYMBOL_TABLE);
}
return false;
}

/**
* @return true if the reader is positioned on a symbol table; otherwise, false.
*/
protected boolean isPositionedOnSymbolTable() {
return hasAnnotations &&
getEncodingType() == IonType.STRUCT &&
startsWithIonSymbolTable();
}

/**
* Consumes the next value (if any) from the MacroEvaluator, setting `event` based on the result.
* @return true if evaluation of the current invocation has completed; otherwise, false.
Expand Down Expand Up @@ -1851,6 +1879,10 @@ Event transcodeNextTo(MacroAwareIonWriter writer) throws IOException {
break;
}
if (event != Event.NEEDS_DATA) {
if (minorVersion > 0 && isPositionedOnSymbolTable()) {
// TODO finalize handling of Ion 1.0-style symbol tables in Ion 1.1: https://github.com/amazon-ion/ion-java/issues/1002
throw new IonException("Macro-aware transcoding of Ion 1.1 data containing Ion 1.0-style symbol tables not yet supported.");
}
// The reader is now positioned on an actual encoding value. Write the value.
writer.writeValue(asIonReader);
}
Expand Down
30 changes: 12 additions & 18 deletions src/main/java/com/amazon/ion/impl/bin/IonManagedBinaryWriter.java
Original file line number Diff line number Diff line change
@@ -1,23 +1,11 @@
/*
* Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package com.amazon.ion.impl.bin;

import static com.amazon.ion.IonType.LIST;
import static com.amazon.ion.IonType.STRUCT;
import static com.amazon.ion.SystemSymbols.IMPORTS_SID;
import static com.amazon.ion.SystemSymbols.ION_1_0;
import static com.amazon.ion.SystemSymbols.ION_1_0_MAX_ID;
import static com.amazon.ion.SystemSymbols.ION_SYMBOL_TABLE_SID;
import static com.amazon.ion.SystemSymbols.MAX_ID_SID;
Expand Down Expand Up @@ -1033,8 +1021,14 @@ public void writeSymbol(String content) throws IOException
writeSymbolToken(intern(content));
}

private boolean handleIVM(int sid) throws IOException {
if (user.isIVM(sid))
private boolean handleIVM(SymbolToken symbol) throws IOException {
if (getDepth() != 0 || user.hasAnnotations()) {
return false;
}
// A symbol's text always takes precedence over its symbol ID. Only symbols with unknown text are compared
// against SID 2.
String text = symbol.getText();
if (ION_1_0.equals(text) || (text == null && user.isIVM(symbol.getSid())))
{
if (user.hasWrittenValuesSinceFinished())
{
Expand All @@ -1054,7 +1048,7 @@ private boolean handleIVM(int sid) throws IOException {

public void writeSymbolToken(SymbolToken token) throws IOException
{
if (token != null && handleIVM(token.getSid()))
if (token != null && handleIVM(token))
{
return;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ class MacroEvaluatorAsIonReader(
?: return Collections.emptyIterator()
}

override fun isInStruct(): Boolean = TODO("Not yet implemented")
override fun isInStruct(): Boolean = containerStack.peek()?.container?.type == IonType.STRUCT

override fun getFieldId(): Int = currentFieldName?.value?.sid ?: 0
override fun getFieldName(): String? = currentFieldName?.value?.text
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import com.amazon.ion.IntegerSize;
import com.amazon.ion.IonDatagram;
import com.amazon.ion.IonEncodingVersion;
import com.amazon.ion.IonException;
import com.amazon.ion.IonLoader;
import com.amazon.ion.IonReader;
import com.amazon.ion.IonSystem;
Expand Down Expand Up @@ -44,10 +45,12 @@
import java.util.TreeMap;
import java.util.function.Consumer;

import static com.amazon.ion.BitUtils.bytes;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

/**
Expand Down Expand Up @@ -1684,6 +1687,28 @@ public void multipleIonVersionMarkersMacroAwareTranscode() throws Exception {
);
}

@Test // TODO finalize handling of Ion 1.0-style symbol tables in Ion 1.1: https://github.com/amazon-ion/ion-java/issues/1002
public void ion10SymbolTableMacroAwareTranscode() throws Exception {
byte[] data = bytes(
0xE0, 0x01, 0x01, 0xEA, // Ion 1.1 IVM
0xE4, 0x07, // $ion_symbol_table::
0xD4, // {
0x0F, // symbols:
0xB2, // [
0x91, 'a', // "a"
// ]}
0xE1, 0x01
);
ByteArrayOutputStream out = new ByteArrayOutputStream();
try (
MacroAwareIonReader reader = InputType.BYTE_ARRAY.newMacroAwareReader(data);
MacroAwareIonWriter rewriter = StreamType.BINARY.newMacroAwareWriter(out);
) {
// This may at some point be supported.
assertThrows(IonException.class, () -> reader.transcodeTo(rewriter));
}
}

// TODO cover every Ion type
// TODO annotations in macro definition (using 'annotate' system macro)
// TODO test error conditions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1175,7 +1175,7 @@ private void assertMacroAwareTranscribeProducesEquivalentStream(byte[] data, boo

@ParameterizedTest(name = "constructFromBytes={0}")
@ValueSource(booleans = {true, false})
public void encodingLevelTranscribeOfSystemMacroInvocation(boolean constructFromBytes) throws Exception {
public void encodingLevelTranscodeOfSystemMacroInvocation(boolean constructFromBytes) throws Exception {
byte[] data = withIvm(1, bytes(
0xEF, 0x0C, // system macro add_symbols
0x02, // AEB: 0b------aa; a=10, expression group
Expand All @@ -1190,7 +1190,7 @@ public void encodingLevelTranscribeOfSystemMacroInvocation(boolean constructFrom

@ParameterizedTest(name = "constructFromBytes={0}")
@ValueSource(booleans = {true, false})
public void encodingLevelTranscribeOfIon10SymbolTable(boolean constructFromBytes) throws Exception {
public void encodingLevelTranscodeOfIon10SymbolTable(boolean constructFromBytes) throws Exception {
byte[] data = withIvm(0, bytes(
0xEA, 0x81, 0x83, // $ion_symbol_table
0xD7, // {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6086,5 +6086,28 @@ public void readIon11SymbolTableAppendUsingSystemSymbolValue(boolean constructFr
closeAndCount();
}

@ParameterizedTest(name = "constructFromBytes={0}")
@ValueSource(booleans = {true, false})
public void readIon11SymbolTableWithFlexUIntFieldNames(boolean constructFromBytes) throws Exception {
reader = readerForIon11(
bytes(
0xE7, 0x01, 0x63, // One FlexSym annotation, with opcode, opcode 63 = system symbol 3 = $ion_symbol_table
0xD7, // {
0x0D, // FlexUInt 6 = imports
0xEE, 0x03, // System symbol value 3 = $ion_symbol_table (denoting symbol table append)
0x0F, // FlexUInt 7 = symbols
0xB2, 0x91, 'a', // ["a"]
0xE1, SystemSymbols_1_1.size() + 1 // first user symbol = a
),
constructFromBytes
);
assertSequence(
next(IonType.SYMBOL),
stringValue("a"),
next(null)
);
closeAndCount();
}

// TODO Ion 1.1 symbol tables with all kinds of annotation encodings (opcodes E4 - E9, inline and SID)
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// SPDX-License-Identifier: Apache-2.0
package com.amazon.ion.impl.bin;

import com.amazon.ion.FakeSymbolToken;
import com.amazon.ion.IonDatagram;
import com.amazon.ion.IonInt;
import com.amazon.ion.IonLoader;
Expand All @@ -13,7 +14,9 @@

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;

import com.amazon.ion.TestUtils;
import com.amazon.ion.system.IonBinaryWriterBuilder;
import com.amazon.ion.system.IonSystemBuilder;
import org.junit.Before;
Expand Down Expand Up @@ -457,15 +460,41 @@ public void testNestedEmptyAnnotatedContainer() throws Exception
assertValue("{bar: foo::[]}");
}

@Test
public void testSymbolWithKnownTextAndSid2IsNotConsideredIvm() throws Exception {
ByteArrayOutputStream out = new ByteArrayOutputStream();
IonWriter writer = IonBinaryWriterBuilder.standard().build(out);
writer.writeSymbol("foo");
// Should not be an IVM even though SID 2 is present because known text always takes precedence.
writer.writeSymbolToken(new FakeSymbolToken("abc", 2));
// If the previous symbol were interpreted as an IVM, then the following symbol IDs would be out of range.
writer.writeSymbolToken(new FakeSymbolToken(null, 10));
writer.writeSymbolToken(new FakeSymbolToken(null, 11));
writer.close();
assertEquivalentDataModel(
out.toByteArray(),
TestUtils.ensureBinary(system(), "foo abc foo abc".getBytes(StandardCharsets.UTF_8))
);
}

/**
* Asserts equivalence of ion data model between two provided data streams.
* @param actual represents the serialized data streams when auto-flush is enabled.
* @param expected represents the expected data streams.
* @param actual represents the actual data stream.
* @param expected represents the expected data stream.
*/
private void assertEquivalentDataModel(ByteArrayOutputStream actual, ByteArrayOutputStream expected) {
assertEquivalentDataModel(actual.toByteArray(), expected.toByteArray());
}

/**
* Asserts equivalence of ion data model between two provided data streams.
* @param actual represents the actual data stream.
* @param expected represents the expected data stream.
*/
private void assertEquivalentDataModel(byte[] actual, byte[] expected) {
IonLoader loader = IonSystemBuilder.standard().build().newLoader();
IonDatagram actualDatagram = loader.load(actual.toByteArray());
IonDatagram expectedDatagram = loader.load(expected.toByteArray());
IonDatagram actualDatagram = loader.load(actual);
IonDatagram expectedDatagram = loader.load(expected);
assertEquals(expectedDatagram, actualDatagram);
}
}

0 comments on commit 7482237

Please sign in to comment.