Skip to content

Adds read/write support for system eexps, placeholders for remaining system macros #862

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ base64 = "0.12"
# chrono < 0.5 brings in a deprecated version of the `time` crate via `oldtime` feature by default
# this makes it explicitly not do this as there is an advisory warning against this:
# See: https://github.com/chronotope/chrono/issues/602
compact_str = "0.8.0"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ There are many crates offering an optimized small string type--here's a comparison of the available options.

This one is actively developed and stack-allocates strings up to 24 bytes long, which is enough to cover most parameter and macro names.

chrono = { version = "0.4", default-features = false, features = ["clock", "std", "wasmbind"] }
delegate = "0.12.0"
thiserror = "1.0"
Expand Down
78 changes: 40 additions & 38 deletions benches/read_many_structs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,23 +29,23 @@ fn maximally_compact_1_1_data(num_values: usize) -> TestData_1_1 {
let template_definition_text: String = r#"
(macro event (timestamp thread_id thread_name client_num host_id parameters*)
{
'timestamp': timestamp,
'threadId': thread_id,
'threadName': (make_string "scheduler-thread-" thread_name),
'loggerName': "com.example.organization.product.component.ClassName",
'logLevel': (literal INFO),
'format': "Request status: {} Client ID: {} Client Host: {} Client Region: {} Timestamp: {}",
'parameters': [
timestamp: (%timestamp),
threadId: (%thread_id),
threadName: (.make_string "scheduler-thread-" (%thread_name)),
loggerName: "com.example.organization.product.component.ClassName",
logLevel: INFO,
format: "Request status: {} Client ID: {} Client Host: {} Client Region: {} Timestamp: {}",
parameters: [
"SUCCESS",
(make_string "example-client-" client_num),
(make_string "aws-us-east-5f-" host_id),
parameters
(.make_string "example-client-" (%client_num)),
(.make_string "aws-us-east-5f-" (%host_id)),
(%parameters)
]
}
)
"#.to_owned();

let text_1_1_data = r#"(:event 1670446800245 418 "6" "1" "abc123" (: "region 4" "2022-12-07T20:59:59.744000Z"))"#.repeat(num_values);
let text_1_1_data = r#"(:event 1670446800245 418 "6" "1" "abc123" (:: "region 4" "2022-12-07T20:59:59.744000Z"))"#.repeat(num_values);

let mut binary_1_1_data = vec![0xE0u8, 0x01, 0x01, 0xEA]; // IVM
#[rustfmt::skip]
Expand Down Expand Up @@ -90,23 +90,23 @@ fn moderately_compact_1_1_data(num_values: usize) -> TestData_1_1 {
let template_definition_text = r#"
(macro event (timestamp thread_id thread_name client_num host_id parameters*)
{
'timestamp': timestamp,
'threadId': thread_id,
'threadName': thread_name,
'loggerName': "com.example.organization.product.component.ClassName",
'logLevel': (literal INFO),
'format': "Request status: {} Client ID: {} Client Host: {} Client Region: {} Timestamp: {}",
'parameters': [
timestamp: (%timestamp),
threadId: (%thread_id),
threadName: (%thread_name),
loggerName: "com.example.organization.product.component.ClassName",
logLevel: INFO,
format: "Request status: {} Client ID: {} Client Host: {} Client Region: {} Timestamp: {}",
parameters: [
"SUCCESS",
client_num,
host_id,
parameters
(%client_num),
(%host_id),
(%parameters)
]
}
)
"#;

let text_1_1_data = r#"(:event 1670446800245 418 "scheduler-thread-6" "example-client-1" "aws-us-east-5f-abc123" (: "region 4" "2022-12-07T20:59:59.744000Z"))"#.repeat(num_values);
let text_1_1_data = r#"(:event 1670446800245 418 "scheduler-thread-6" "example-client-1" "aws-us-east-5f-abc123" (:: "region 4" "2022-12-07T20:59:59.744000Z"))"#.repeat(num_values);
let mut binary_1_1_data = vec![0xE0u8, 0x01, 0x01, 0xEA]; // IVM
#[rustfmt::skip]
let mut binary_1_1_data_body: Vec<u8> = [MacroTable::FIRST_USER_MACRO_ID as u8, // Macro ID
Expand Down Expand Up @@ -159,23 +159,23 @@ fn length_prefixed_moderately_compact_1_1_data(num_values: usize) -> TestData_1_
let template_definition_text = r#"
(macro event (timestamp thread_id thread_name client_num host_id parameters*)
{
'timestamp': timestamp,
'threadId': thread_id,
'threadName': thread_name,
'loggerName': "com.example.organization.product.component.ClassName",
'logLevel': (literal INFO),
'format': "Request status: {} Client ID: {} Client Host: {} Client Region: {} Timestamp: {}",
'parameters': [
timestamp: (%timestamp),
threadId: (%thread_id),
threadName: (%thread_name),
loggerName: "com.example.organization.product.component.ClassName",
logLevel: INFO,
format: "Request status: {} Client ID: {} Client Host: {} Client Region: {} Timestamp: {}",
parameters: [
"SUCCESS",
client_num,
host_id,
parameters
(%client_num),
(%host_id),
(%parameters)
]
}
)
"#;

let text_1_1_data = r#"(:event 1670446800245 418 "scheduler-thread-6" "example-client-1" "aws-us-east-5f-abc123" (: "region 4" "2022-12-07T20:59:59.744000Z"))"#.repeat(num_values);
let text_1_1_data = r#"(:event 1670446800245 418 "scheduler-thread-6" "example-client-1" "aws-us-east-5f-abc123" (:: "region 4" "2022-12-07T20:59:59.744000Z"))"#.repeat(num_values);
let mut binary_1_1_data = vec![0xE0u8, 0x01, 0x01, 0xEA]; // IVM
#[rustfmt::skip]
let mut binary_1_1_data_body: Vec<u8> = [0xF5, // LP invocation
Expand Down Expand Up @@ -307,15 +307,15 @@ mod benchmark {

// Load the Ion 1.0 values into a Sequence. We'll compare our Ion 1.1 streams' data to this
// sequence to make sure that all of the tests are working on equivalent data.
let seq_1_0 = Reader::new(v1_1::Text, text_1_0_data.as_slice())
let seq_1_0 = Reader::new(v1_0::Text, text_1_0_data.as_slice())
.unwrap()
.read_all_elements()?;

let mut text_1_0_group = c.benchmark_group("text 1.0");
// Visit each top level value in the stream without reading it.
text_1_0_group.bench_function("scan all", |b| {
b.iter(|| {
let mut reader = Reader::new(v1_1::Text, text_1_0_data.as_slice()).unwrap();
let mut reader = Reader::new(v1_0::Text, text_1_0_data.as_slice()).unwrap();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It wouldn't have mattered much if at all, but we were using a v1_1 text reader to read 1.0 text.

while let Some(item) = reader.next().unwrap() {
black_box(item);
}
Expand All @@ -324,7 +324,7 @@ mod benchmark {
// Read every value in the stream, however deeply nested.
text_1_0_group.bench_function("read all", |b| {
b.iter(|| {
let mut reader = Reader::new(v1_1::Text, text_1_0_data.as_slice()).unwrap();
let mut reader = Reader::new(v1_0::Text, text_1_0_data.as_slice()).unwrap();
let mut num_values = 0usize;
while let Some(item) = reader.next().unwrap() {
num_values += count_value_and_children(&item).unwrap();
Expand All @@ -335,7 +335,7 @@ mod benchmark {
// Read the 'format' field from each top-level struct in the stream.
text_1_0_group.bench_function("read 'format' field", |b| {
b.iter(|| {
let mut reader = Reader::new(v1_1::Text, text_1_0_data.as_slice()).unwrap();
let mut reader = Reader::new(v1_0::Text, text_1_0_data.as_slice()).unwrap();
let mut num_values = 0usize;
while let Some(value) = reader.next().unwrap() {
let s = value.read().unwrap().expect_struct().unwrap();
Expand Down Expand Up @@ -412,7 +412,9 @@ mod benchmark {
let seq_1_1 = reader_1_1.read_all_elements().unwrap();
assert!(
IonData::eq(seq_1_0, &seq_1_1),
"{name} binary Ion 1.1 sequence was not equal to the original Ion 1.0 sequence"
"{name} binary Ion 1.1 sequence was not equal to the original Ion 1.0 sequence: \n{:#?}\n !=\n{:#?}",
seq_1_0.elements().next().unwrap(),
seq_1_1.elements().next().unwrap(),
);

// === Text equivalence check ===
Expand Down
2 changes: 1 addition & 1 deletion src/lazy/binary/raw/v1_1/type_code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ pub enum OpcodeType {
TypedNull, // 0xEB -
Nop, // 0xEC-0xED -
SystemSymbolAddress, // 0xEE
SystemMacroInvoke, // 0xEF -
SystemMacroEExpression, // 0xEF -
DelimitedContainerClose, // 0xF0
ListDelimited, // 0xF1
SExpDelimited, // 0xF2
Expand Down
1 change: 1 addition & 0 deletions src/lazy/binary/raw/v1_1/type_descriptor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ impl Opcode {
InOpcode(1),
OpcodeKind::Value(IonType::Symbol),
),
(0xE, 0xF) => (SystemMacroEExpression, Unknown, OpcodeKind::EExp),
(0xF, 0x0) => (DelimitedContainerClose, InOpcode(0), OpcodeKind::Control),
(0xF, 0x1) => (ListDelimited, Unknown, OpcodeKind::Value(IonType::List)),
(0xF, 0x2) => (SExpDelimited, Unknown, OpcodeKind::Value(IonType::SExp)),
Expand Down
26 changes: 26 additions & 0 deletions src/lazy/encoder/binary/v1_1/value_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,10 @@ impl BinaryValueWriter_1_1<'_, '_> {
MacroIdRef::LocalAddress(_address) => {
todo!("macros with addresses greater than {MAX_20_BIT_ADDRESS}");
}
MacroIdRef::SystemAddress(address) => {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ Binary write logic for system macro invocations.

self.encoding_buffer
.extend_from_slice_copy(&[0xEF, address.as_u8()]); // System e-expression
}
};

Ok(BinaryEExpWriter_1_1::new(
Expand Down Expand Up @@ -955,6 +959,7 @@ mod tests {
use crate::lazy::encoder::value_writer::ValueWriter;
use crate::lazy::encoder::value_writer::{SequenceWriter, StructWriter};
use crate::lazy::encoder::write_as_ion::{WriteAsIon, WriteAsSExp};
use crate::lazy::text::raw::v1_1::reader::{system_macros, MacroIdRef};
use crate::raw_symbol_ref::AsRawSymbolRef;
use crate::types::float::{FloatRepr, SmallestFloatRepr};
use crate::{
Expand Down Expand Up @@ -2914,6 +2919,27 @@ mod tests {
Ok(())
}

#[test]
fn write_system_macro_invocation() -> IonResult<()> {
encoding_test(
|writer: &mut LazyRawBinaryWriter_1_1<&mut Vec<u8>>| {
let mut args =
writer.eexp_writer(MacroIdRef::SystemAddress(system_macros::MAKE_STRING))?;
args.write_symbol("foo")?
.write_symbol("bar")?
.write_symbol("baz")?;
args.close()
},
&[
0xEF, 0x03, // Invoke system macro address 3
0xA3, 0x66, 0x6f, 0x6f, // foo
0xA3, 0x62, 0x61, 0x72, // bar
0xA3, 0x62, 0x61, 0x7a, // baz
],
)?;
Ok(())
}

#[rstest]
#[case::boolean("true false")]
#[case::int("1 2 3 4 5")]
Expand Down
40 changes: 32 additions & 8 deletions src/lazy/encoder/text/v1_1/value_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ pub struct TextAnnotatedValueWriter_1_1<'value, W: Write> {
}

impl<'value, W: Write + 'value> AnnotatableWriter for TextValueWriter_1_1<'value, W> {
type AnnotatedValueWriter<'a> = TextAnnotatedValueWriter_1_1<'a, W> where Self: 'a;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ New rustfmt release has new opinions.

type AnnotatedValueWriter<'a>
= TextAnnotatedValueWriter_1_1<'a, W>
where
Self: 'a;

fn with_annotations<'a>(
self,
Expand Down Expand Up @@ -83,14 +86,22 @@ impl<'value, W: Write + 'value> ValueWriter for TextValueWriter_1_1<'value, W> {
}

fn eexp_writer<'a>(self, macro_id: impl Into<MacroIdRef<'a>>) -> IonResult<Self::EExpWriter> {
let id = macro_id.into();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ Text write logic for system macro invocations.

let opening_text = match id {
MacroIdRef::LocalName(name) => format!("(:{}", name),
MacroIdRef::LocalAddress(address) => format!("(:{}", address),
MacroIdRef::SystemAddress(system_address) => {
format!("(:$ion::{}", system_address.as_usize())
}
};
TextEExpWriter_1_1::new(
self.value_writer_1_0.writer,
self.value_writer_1_0.depth,
self.value_writer_1_0.parent_type,
// Pretend we're in a sexp for syntax purposes
ContainerType::SExp,
// TODO: Reusable buffer
format!("(:{}", macro_id.into()).as_str(),
opening_text.as_str(),
" ",
match self.value_writer_1_0.parent_type {
ParentType::Struct | ParentType::List => ",",
Expand All @@ -101,7 +112,10 @@ impl<'value, W: Write + 'value> ValueWriter for TextValueWriter_1_1<'value, W> {
}

impl<'value, W: Write + 'value> AnnotatableWriter for TextAnnotatedValueWriter_1_1<'value, W> {
type AnnotatedValueWriter<'a> = TextAnnotatedValueWriter_1_1<'a, W> where Self: 'a;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ From here on, the rest of the file is rustfmt changes.

type AnnotatedValueWriter<'a>
= TextAnnotatedValueWriter_1_1<'a, W>
where
Self: 'a;

fn with_annotations<'a>(
self,
Expand Down Expand Up @@ -167,7 +181,10 @@ pub struct TextListWriter_1_1<'value, W: Write> {
}

impl<W: Write> MakeValueWriter for TextListWriter_1_1<'_, W> {
type ValueWriter<'a> = TextValueWriter_1_1<'a, W> where Self: 'a;
type ValueWriter<'a>
= TextValueWriter_1_1<'a, W>
where
Self: 'a;

fn make_value_writer(&mut self) -> Self::ValueWriter<'_> {
TextValueWriter_1_1 {
Expand All @@ -189,7 +206,10 @@ pub struct TextSExpWriter_1_1<'value, W: Write> {
}

impl<W: Write> MakeValueWriter for TextSExpWriter_1_1<'_, W> {
type ValueWriter<'a> = TextValueWriter_1_1<'a, W> where Self: 'a;
type ValueWriter<'a>
= TextValueWriter_1_1<'a, W>
where
Self: 'a;

fn make_value_writer(&mut self) -> Self::ValueWriter<'_> {
TextValueWriter_1_1 {
Expand Down Expand Up @@ -217,7 +237,10 @@ impl<W: Write> FieldEncoder for TextStructWriter_1_1<'_, W> {
}

impl<W: Write> MakeValueWriter for TextStructWriter_1_1<'_, W> {
type ValueWriter<'a> = TextValueWriter_1_1<'a, W> where Self: 'a;
type ValueWriter<'a>
= TextValueWriter_1_1<'a, W>
where
Self: 'a;

fn make_value_writer(&mut self) -> Self::ValueWriter<'_> {
TextValueWriter_1_1 {
Expand Down Expand Up @@ -274,9 +297,10 @@ impl<'value, W: Write + 'value> SequenceWriter for TextEExpWriter_1_1<'value, W>
}

impl<'value, W: Write + 'value> MakeValueWriter for TextEExpWriter_1_1<'value, W> {
type ValueWriter<'a> = TextValueWriter_1_1<'a, W>
type ValueWriter<'a>
= TextValueWriter_1_1<'a, W>
where
Self: 'a,;
Self: 'a;

fn make_value_writer(&mut self) -> Self::ValueWriter<'_> {
TextValueWriter_1_1 {
Expand Down
24 changes: 21 additions & 3 deletions src/lazy/encoder/text/v1_1/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ impl<W: Write> SequenceWriter for LazyRawTextWriter_1_1<W> {
}

impl<W: Write> MakeValueWriter for LazyRawTextWriter_1_1<W> {
type ValueWriter<'a> = TextValueWriter_1_1<'a, W>
type ValueWriter<'a>
= TextValueWriter_1_1<'a, W>
where
Self: 'a;

Expand Down Expand Up @@ -117,11 +118,11 @@ mod tests {
use crate::lazy::expanded::compiler::TemplateCompiler;
use crate::lazy::expanded::macro_evaluator::RawEExpression;
use crate::lazy::expanded::EncodingContext;
use crate::lazy::text::raw::v1_1::reader::{LazyRawTextReader_1_1, MacroIdRef};
use crate::lazy::text::raw::v1_1::reader::{system_macros, LazyRawTextReader_1_1, MacroIdRef};
use crate::symbol_ref::AsSymbolRef;
use crate::{
v1_1, Annotatable, Decimal, ElementReader, IonData, IonResult, IonType, Null, RawSymbolRef,
Reader, Timestamp,
Reader, Timestamp, Writer,
};

#[test]
Expand Down Expand Up @@ -355,4 +356,21 @@ mod tests {
assert!(IonData::eq(&expected, &actual));
Ok(())
}

#[test]
fn test_system_eexp() -> IonResult<()> {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ Unit test roundtrips a system e-expression to make sure text reading and writing both work.

let mut writer = Writer::new(v1_1::Text, vec![])?;
let mut macro_args = writer.eexp_writer(system_macros::MAKE_STRING)?;
macro_args
.write("foo")?
.write("bar")?
.write("baz")?
.write_symbol("+++")?;
macro_args.close()?;
let encoded_bytes = writer.close()?;
let mut reader = Reader::new(v1_1::Text, encoded_bytes)?;
let element = reader.read_next_element()?;
assert_eq!("foobarbaz+++", element.unwrap().as_string().unwrap());
Ok(())
}
}
Loading
Loading