Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds support for eexps with 12- and 20-bit addresses #825

Merged
merged 3 commits into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 78 additions & 20 deletions src/lazy/binary/raw/v1_1/immutable_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use crate::lazy::expanded::macro_table::MacroRef;
use crate::lazy::expanded::template::ParameterEncoding;
use crate::lazy::expanded::EncodingContextRef;
use crate::lazy::text::raw::v1_1::arg_group::EExpArgExpr;
use crate::lazy::text::raw::v1_1::reader::MacroAddress;
use crate::result::IonFailure;
use crate::{v1_1, IonError, IonResult};

Expand Down Expand Up @@ -396,12 +397,11 @@ impl<'a> ImmutableBuffer<'a> {

use OpcodeType::*;
let result = match opcode.opcode_type {
EExpressionWithAddress => {
ParseValueExprResult::EExp(self.read_eexp_with_address_in_opcode(opcode))
}
EExpressionAddressFollows => todo!("eexp address follows"),
EExpressionWithLengthPrefix => {
ParseValueExprResult::EExp(self.read_eexp_with_length_prefix(opcode))
EExpressionWith6BitAddress
| EExpressionWith12BitAddress
| EExpressionWith20BitAddress
| EExpressionWithLengthPrefix => {
ParseValueExprResult::EExp(self.read_e_expression(opcode))
}
AnnotationFlexSym | AnnotationSymAddress => {
ParseValueExprResult::Value(self.read_annotated_value(opcode))
Expand Down Expand Up @@ -873,21 +873,43 @@ impl<'a> ImmutableBuffer<'a> {
#[inline]
pub fn read_e_expression(self, opcode: Opcode) -> ParseResult<'a, BinaryEExpression_1_1<'a>> {
use OpcodeType::*;
match opcode.opcode_type {
EExpressionWithAddress => return self.read_eexp_with_address_in_opcode(opcode),
EExpressionAddressFollows => todo!("e-expr with trailing address; {opcode:#0x?}",),
let (macro_address, input_after_address) = match opcode.opcode_type {
EExpressionWith6BitAddress => (opcode.byte as usize, self.consume(1)),
EExpressionWith12BitAddress => {
if self.len() < 2 {
return IonResult::incomplete("parsing a 12-bit e-exp address", self.offset);
}

let bias = ((opcode.byte as usize & 0x0F) << 8) + 64;
let fixed_uint = self.bytes()[1] as usize;
let address = fixed_uint + bias;
(address, self.consume(2))
}
EExpressionWith20BitAddress => {
if self.len() < 3 {
return IonResult::incomplete("parsing a 20-bit e-exp address", self.offset);
}
let bias = ((opcode.byte as usize & 0x0F) << 16) + 4160;
let (fixed_uint, input_after_opcode) = self.consume(1).read_fixed_uint(2)?;
let address = fixed_uint.value().expect_usize()? + bias;
(address, input_after_opcode)
}
// Length-prefixed is a special case.
EExpressionWithLengthPrefix => return self.read_eexp_with_length_prefix(opcode),
_ => unreachable!("read_e_expression called with invalid opcode"),
};
self.read_eexp_with_address(input_after_address, macro_address)
}

fn read_eexp_with_address_in_opcode(
/// Reads a complete e-expression whose address is provided.
///
/// `self` begins at the opcode, `input_after_address` begins after the opcode and any address
/// bytes, and `macro_address` is the address of the invoked macro.
fn read_eexp_with_address(
self,
opcode: Opcode,
input_after_address: ImmutableBuffer<'a>,
macro_address: MacroAddress,
) -> ParseResult<'a, BinaryEExpression_1_1<'a>> {
let input_after_opcode = self.consume(1);
let macro_address = opcode.byte as usize;

// Get a reference to the macro that lives at that address
let macro_ref = self
.context
Expand All @@ -907,9 +929,9 @@ impl<'a> ImmutableBuffer<'a> {
let bitmap_size_in_bytes = signature.bitmap_size_in_bytes();

let (bitmap_bits, input_after_bitmap) = if signature.num_variadic_params() == 0 {
(0, input_after_opcode)
(0, input_after_address)
} else {
input_after_opcode.read_eexp_bitmap(bitmap_size_in_bytes)?
input_after_address.read_eexp_bitmap(bitmap_size_in_bytes)?
};

let bitmap = ArgGroupingBitmap::new(signature.num_variadic_params(), bitmap_bits);
Expand All @@ -927,7 +949,7 @@ impl<'a> ImmutableBuffer<'a> {
let matched_eexp_bytes = self.slice(0, eexp_total_length);
let remaining_input = self.consume(matched_eexp_bytes.len());

let bitmap_offset = input_after_opcode.offset() - self.offset();
let bitmap_offset = input_after_address.offset() - self.offset();
let args_offset = input_after_bitmap.offset() - self.offset();
Ok((
{
Expand Down Expand Up @@ -973,7 +995,7 @@ impl<'a> ImmutableBuffer<'a> {
input_after_length.read_eexp_bitmap(macro_ref.signature().bitmap_size_in_bytes())?;
let args_offset = bitmap_offset + macro_ref.signature().bitmap_size_in_bytes() as u8;
let remaining_input = self.consume(total_length);
return Ok((
Ok((
BinaryEExpression_1_1::new(
MacroRef::new(macro_address, macro_ref),
bitmap_bits,
Expand All @@ -982,7 +1004,7 @@ impl<'a> ImmutableBuffer<'a> {
args_offset,
),
remaining_input,
));
))
}

fn read_eexp_bitmap(self, bitmap_size_in_bytes: usize) -> ParseResult<'a, u64> {
Expand Down Expand Up @@ -1103,7 +1125,7 @@ mod tests {
use crate::lazy::expanded::EncodingContext;
use crate::lazy::text::raw::v1_1::reader::{MacroAddress, MacroIdRef};
use crate::v1_0::RawValueRef;
use crate::{Element, ElementReader, Reader};
use crate::{AnyEncoding, Element, ElementReader, Reader, SequenceWriter, Writer};

use super::*;

Expand Down Expand Up @@ -1510,6 +1532,42 @@ mod tests {
Ok(())
}

#[test]
fn roundtrip_macro_addresses_up_to_20_bits() -> IonResult<()> {
use std::fmt::Write;

// This is a large enough value that many macros will be encoded using 20 bits.
// However, it is not large enough to fully exercise the 20-bit encoding space. To do that,
// we would need approximately 1 million macros, which takes too much time to execute in a
// debug build.
const MAX_TEST_MACRO_ADDRESS: usize = 6_000;

// Construct an encoding directive that defines this number of macros. Each macro will expand
// to its own address.
let mut macro_definitions = String::from("$ion_encoding::(\n (macro_table\n");
for address in MacroTable::FIRST_USER_MACRO_ID..MAX_TEST_MACRO_ADDRESS {
writeln!(macro_definitions, " (macro m{address} () {address})")?;
}
macro_definitions.push_str(" )\n)\n");
let encoding_directive = Element::read_one(macro_definitions)?;
let mut writer = Writer::new(v1_1::Binary, Vec::new())?;
writer.write(&encoding_directive)?;

// Invoke each of the macros we just defined in order.
for address in MacroTable::FIRST_USER_MACRO_ID..MAX_TEST_MACRO_ADDRESS {
writer.eexp_writer(address)?.close()?;
}
let data = writer.close()?;

// Read from the resulting stream and confirm that we get each value back in the expected order.
let mut reader = Reader::new(AnyEncoding, data)?;
for expected in MacroTable::FIRST_USER_MACRO_ID..MAX_TEST_MACRO_ADDRESS {
let actual = reader.expect_next()?.read()?.expect_int()?.expect_usize()?;
assert_eq!(actual, expected, "actual {actual} != expected {expected}");
}
Ok(())
}

#[test]
fn read_length_prefixed_eexp_with_star_parameter_empty() -> IonResult<()> {
let macro_source = r#"
Expand Down
25 changes: 13 additions & 12 deletions src/lazy/binary/raw/v1_1/type_code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,19 @@ use crate::IonType;
/// * Whether the next type code is reserved.
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
pub enum OpcodeType {
EExpressionWithAddress, // 0x00-0x50 -
EExpressionAddressFollows, // 0x50-0x5F -
Integer, // 0x60-0x68 - Integer up to 8 bytes wide
Float, // 0x6A-0x6D -
Boolean, // 0x6E-0x6F -
Decimal, // 0x70-0x7F -
TimestampShort, // 0x80-0x8F -
String, // 0x90-0x9F -
InlineSymbol, // 0xA0-0xAF -
List, // 0xB0-0xBF -
SExpression, // 0xC0-0xCF -
StructEmpty, // 0xD0 -
EExpressionWith6BitAddress, // 0x00-0x3F -
EExpressionWith12BitAddress, // 0x40-0x4F -
EExpressionWith20BitAddress, // 0x50-0x5F -
Integer, // 0x60-0x68 - Integer up to 8 bytes wide
Float, // 0x6A-0x6D -
Boolean, // 0x6E-0x6F -
Decimal, // 0x70-0x7F -
TimestampShort, // 0x80-0x8F -
String, // 0x90-0x9F -
InlineSymbol, // 0xA0-0xAF -
List, // 0xB0-0xBF -
SExpression, // 0xC0-0xCF -
StructEmpty, // 0xD0 -
// 0xD1 reserved
Struct, // 0xD2-0xDF -
IonVersionMarker, // 0xE0 -
Expand Down
10 changes: 7 additions & 3 deletions src/lazy/binary/raw/v1_1/type_descriptor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,9 @@ impl Opcode {
use OpcodeType::*;

let (opcode_type, length_code, ion_type) = match (high_nibble, low_nibble) {
(0x0..=0x4, _) => (EExpressionWithAddress, low_nibble, None),
(0x5, _) => (EExpressionAddressFollows, low_nibble, None),
(0x0..=0x3, _) => (EExpressionWith6BitAddress, low_nibble, None),
(0x4, _) => (EExpressionWith12BitAddress, low_nibble, None),
(0x5, _) => (EExpressionWith20BitAddress, low_nibble, None),
(0x6, 0x0..=0x8) => (Integer, low_nibble, Some(IonType::Int)),
(0x6, 0xA..=0xD) => (Float, low_nibble, Some(IonType::Float)),
(0x6, 0xE..=0xF) => (Boolean, low_nibble, Some(IonType::Bool)),
Expand Down Expand Up @@ -118,7 +119,10 @@ impl Opcode {
use OpcodeType::*;
matches!(
self.opcode_type,
EExpressionWithAddress | EExpressionAddressFollows | EExpressionWithLengthPrefix
EExpressionWith6BitAddress
| EExpressionWith12BitAddress
| EExpressionWith20BitAddress
| EExpressionWithLengthPrefix
)
}

Expand Down
45 changes: 42 additions & 3 deletions src/lazy/encoder/binary/v1_1/value_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -672,19 +672,58 @@ impl<'value, 'top> BinaryValueWriter_1_1<'value, 'top> {
self,
macro_id: impl Into<MacroIdRef<'a>>,
) -> IonResult<<Self as ValueWriter>::EExpWriter> {
// Thresholds above which an address must be encoded using each available encoding.
const MIN_4_BIT_ADDRESS: usize = 0;
const MAX_4_BIT_ADDRESS: usize = 63;

const MIN_12_BIT_ADDRESS: usize = 64;
const MAX_12_BIT_ADDRESS: usize = 4_159;

const MIN_20_BIT_ADDRESS: usize = 4_160;
const MAX_20_BIT_ADDRESS: usize = 1_052_735;

match macro_id.into() {
MacroIdRef::LocalName(_name) => {
// This would be handled by the system writer
todo!("macro invocation by name");
return IonResult::encoding_error(
"the raw binary writer cannot encode macros by name",
);
}
MacroIdRef::LocalAddress(address) if address < 64 => {
MacroIdRef::LocalAddress(address) if address <= MAX_4_BIT_ADDRESS => {
// Invoke this ID with a one-byte opcode
self.encoding_buffer.push(address as u8);
}
MacroIdRef::LocalAddress(address) if address <= MAX_12_BIT_ADDRESS => {
// Encode as an opcode with trailing 1-byte FixedUInt
const BIAS: usize = MIN_12_BIT_ADDRESS;
let biased = address - BIAS;
let low_opcode_nibble = biased >> 8;
self.encoding_buffer.extend_from_slice_copy(&[
// Opcode with low nibble setting bias
0x40 | low_opcode_nibble as u8,
// Remaining byte of magnitude in biased address
biased as u8,
]);
}
MacroIdRef::LocalAddress(address) if address <= MAX_20_BIT_ADDRESS => {
// Encode as an opcode with trailing 2-byte FixedUInt
const BIAS: usize = MIN_20_BIT_ADDRESS;
let biased = address - BIAS;
let low_opcode_nibble = biased >> 16;
let le_bytes = (biased as u16).to_le_bytes();
self.encoding_buffer.extend_from_slice_copy(&[
// Opcode with low nibble setting bias
0x50 | low_opcode_nibble as u8,
// Remaining bytes of magnitude in biased address
le_bytes[0],
le_bytes[1],
]);
}
MacroIdRef::LocalAddress(_address) => {
todo!("macros with addresses higher than 64");
todo!("macros with addresses greater than {MAX_20_BIT_ADDRESS}");
}
};

Ok(BinaryEExpWriter_1_1::new(
self.allocator,
self.encoding_buffer,
Expand Down
Loading