diff --git a/Cargo.toml b/Cargo.toml index ec2b346f..8abe0d84 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -71,8 +71,9 @@ num-integer = "0.1.44" num-traits = "0.2" arrayvec = "0.7" smallvec = {version ="1.9.0", features = ["const_generics"]} -bumpalo = {version = "3.14.0", features = ["collections", "std"]} +bumpalo = {version = "3.15.3", features = ["collections", "std"]} digest = { version = "0.9", optional = true } +ice_code = "0.1.4" sha2 = { version = "0.9", optional = true } serde = { version = "1.0", features = ["derive"], optional = true } serde_with = { version = "2.0", optional = true } @@ -86,11 +87,16 @@ test-generator = "0.3" memmap = "0.7.0" criterion = "0.5.1" rand = "0.8.5" +tempfile = "3.10.0" [[bench]] name = "read_many_structs" harness = false +[[bench]] +name = "write_many_structs" +harness = false + [[bench]] name = "encoding_primitives" harness = false diff --git a/benches/encoding_primitives.rs b/benches/encoding_primitives.rs index 661b8d77..2da47afb 100644 --- a/benches/encoding_primitives.rs +++ b/benches/encoding_primitives.rs @@ -142,7 +142,6 @@ pub fn criterion_benchmark(c: &mut Criterion) { } fn roundtrip_var_uint_test(unsigned_values: &[u64]) -> IonResult> { - println!("Roundtripping unsigned values as VarUInts to check for correctness."); let mut encoded_values_buffer = Vec::new(); for value in unsigned_values { VarUInt::write_u64(&mut encoded_values_buffer, *value)?; @@ -159,7 +158,6 @@ fn roundtrip_var_uint_test(unsigned_values: &[u64]) -> IonResult> { } fn roundtrip_var_int_test(signed_values: &[i64]) -> IonResult> { - println!("Roundtripping signed values as VarInts to check for correctness."); let mut encoded_values_buffer = Vec::new(); for value in signed_values { VarInt::write_i64(&mut encoded_values_buffer, *value)?; @@ -176,7 +174,6 @@ fn roundtrip_var_int_test(signed_values: &[i64]) -> IonResult> { } fn roundtrip_flex_uint_test(unsigned_values: &[u64]) -> IonResult> { - println!("Roundtripping unsigned values as FlexUInts to check for correctness."); let mut encoded_values_buffer = Vec::new(); for value in unsigned_values { FlexUInt::write_u64(&mut encoded_values_buffer, *value)?; @@ -193,7 +190,6 @@ fn roundtrip_flex_uint_test(unsigned_values: &[u64]) -> IonResult> { } fn roundtrip_flex_int_test(signed_values: &[i64]) -> IonResult> { - println!("Roundtripping signed values as FlexInts to check for correctness."); let mut encoded_values_buffer = Vec::new(); for value in signed_values { FlexInt::write_i64(&mut encoded_values_buffer, *value)?; diff --git a/benches/write_many_structs.rs b/benches/write_many_structs.rs new file mode 100644 index 00000000..fbb51418 --- /dev/null +++ b/benches/write_many_structs.rs @@ -0,0 +1,273 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use ion_rs::lazy::encoder::binary::v1_0::writer::LazyRawBinaryWriter_1_0; +use nom::AsBytes; + +use ion_rs::lazy::encoder::binary::v1_1::writer::LazyRawBinaryWriter_1_1; +use ion_rs::lazy::encoder::value_writer::{AnnotatableValueWriter, SequenceWriter}; +use ion_rs::lazy::encoder::value_writer::{StructWriter, ValueWriter}; +use ion_rs::RawSymbolTokenRef; + +fn write_struct_with_string_values(value_writer: impl ValueWriter) { + value_writer + .write_struct(|fields| { + fields + // $10 = timestamp + .write(10, black_box(1670446800245i64))? + // $11 = threadId + .write(11, black_box(418))? + // $12 = threadName + .write(12, black_box("scheduler-thread-6"))? + // $13 = loggerName + .write(13, black_box("com.example.organization.product.component.ClassName"))? + // $14 = logLevel + .write(14, black_box("INFO"))? + // $15 = format + .write(15, black_box("Request status: {} Client ID: {} Client Host: {} Client Region: {} Timestamp: {}"))? + // $16 = parameters + .write(16, &[ + black_box("SUCCESS"), + black_box("example-client-1"), + black_box("aws-us-east-5f-18b4fa"), + black_box("region 4"), + black_box("2022-12-07T20:59:59.744000Z"), + ])?; + Ok(()) + }).unwrap(); +} + +fn write_struct_with_symbol_values(value_writer: impl ValueWriter) { + value_writer + .write_struct(|fields| { + fields + // $10 = timestamp + .write(10, black_box(1670446800245i64))? + // $11 = threadId + .write(11, black_box(418))? + // $12 = threadName, $17 = scheduler-thread-6 + .write(12, symbol_id(black_box(17)))? + // $13 = loggerName, $18 = com.example.organization.product.component.ClassName + .write(13, symbol_id(black_box(18)))? + // $14 = logLevel, $19 = INFO + .write(14, symbol_id(black_box(19)))? + // $15 = format, $20 = Request status: {} Client ID: {} Client Host: {} Client Region: {} Timestamp: {} + .write(15, symbol_id(black_box(20)))? + // $16 = parameters + .write( + 16, + &[ + // $21 = SUCCESS + symbol_id(black_box(21)), + // $22 = example-client-1 + symbol_id(black_box(22)), + // $23 = aws-us-east-5f-18b4fa + symbol_id(black_box(23)), + // $24 = region 4 + symbol_id(black_box(24)), + // $25 = 2022-12-07T20:59:59.744000Z (string, not timestamp) + symbol_id(black_box(25)), + ], + )?; + Ok(()) + }) + .unwrap(); +} + +fn write_eexp_with_symbol_values(value_writer: impl ValueWriter) { + value_writer + .write_eexp(0, |args| { + args.write(black_box(1670446800245i64))? // timestamp + .write(black_box(418))? // thread_id + // These are still strings because they're so short that using symbols to represent + // them wouldn't be beneficial. + .write(black_box("6"))? // thread_name + .write(black_box("1"))? // client_num + .write(symbol_id(black_box(10)))? // host_id: "18b4fa" ($10) + .value_writer() + .without_annotations() + .write_eexp(1, |args| { + args + // $11 = region 4 + .write(symbol_id(black_box(11)))? + // $12 = "2022-12-07T20:59:59.744000Z" (string, not timestamp) + .write(symbol_id(black_box(12)))?; + Ok(()) + }) + .unwrap(); + Ok(()) + }) + .unwrap(); +} + +fn write_eexp_with_string_values(value_writer: impl ValueWriter) { + value_writer + .write_eexp(0, |args| { + args.write(black_box(1670446800245i64))? // timestamp + .write(black_box(418))? // thread_id + .write(black_box("6"))? // thread_name + .write(black_box("1"))? // client_num + .write(black_box("18b4fa"))? // host_id + .value_writer() + .without_annotations() + .write_eexp(1, |args| { + args.write(black_box("region 4"))? + .write(black_box("2022-12-07T20:59:59.744000Z"))?; + Ok(()) + })?; + Ok(()) + }) + .unwrap(); +} + +fn symbol_id(sid: usize) -> RawSymbolTokenRef<'static> { + RawSymbolTokenRef::SymbolId(sid) +} + +pub fn criterion_benchmark(c: &mut Criterion) { + let mut buffer = Vec::with_capacity(1024 * 1024); + + let mut binary_1_0_group = c.benchmark_group("binary 1.0"); + binary_1_0_group.bench_function("write structs with string values", |b| { + b.iter(|| { + buffer.clear(); + let mut writer = LazyRawBinaryWriter_1_0::new(&mut buffer).unwrap(); + write_struct_with_string_values(writer.value_writer().without_annotations()); + writer.flush().unwrap(); + black_box(buffer.as_bytes()); + }); + }); + // The runner allows the user to specify which benchmarks to run. If the benchmark above ran, + // then the buffer will not be empty. + // This print statement cannot live within the benchmark itself, as both `bench_function` and + // `iter` are called several times. + if !buffer.is_empty() { + println!("\nencoded 1.0 size with string values: {}\n", buffer.len()); + buffer.clear(); + } + + binary_1_0_group.bench_function("write structs with symbol values", |b| { + b.iter(|| { + buffer.clear(); + let mut writer = LazyRawBinaryWriter_1_0::new(&mut buffer).unwrap(); + write_struct_with_symbol_values(writer.value_writer().without_annotations()); + writer.flush().unwrap(); + + black_box(buffer.as_bytes()); + }); + }); + if !buffer.is_empty() { + println!("\nencoded 1.0 size with symbol values: {}\n", buffer.len()); + buffer.clear() + } + binary_1_0_group.finish(); + + let mut binary_1_1_group = c.benchmark_group("binary 1.1"); + binary_1_1_group.bench_function("write structs with string values", |b| { + b.iter(|| { + buffer.clear(); + let mut writer = LazyRawBinaryWriter_1_1::new(&mut buffer).unwrap(); + write_struct_with_string_values(writer.value_writer().without_annotations()); + writer.flush().unwrap(); + black_box(buffer.as_bytes()); + }); + }); + if !buffer.is_empty() { + println!("\nencoded 1.1 size with string values: {}\n", buffer.len()); + buffer.clear() + } + + binary_1_1_group.bench_function("write structs with symbol values", |b| { + b.iter(|| { + buffer.clear(); + let mut writer = LazyRawBinaryWriter_1_1::new(&mut buffer).unwrap(); + write_struct_with_symbol_values(writer.value_writer().without_annotations()); + writer.flush().unwrap(); + + black_box(buffer.as_bytes()); + }); + }); + if !buffer.is_empty() { + println!("\nencoded 1.1 size with symbol values: {}\n", buffer.len()); + buffer.clear() + } + + binary_1_1_group.bench_function("write delimited structs with string values", |b| { + b.iter(|| { + buffer.clear(); + let mut writer = LazyRawBinaryWriter_1_1::new(&mut buffer).unwrap(); + write_struct_with_string_values( + writer + .value_writer() + .with_delimited_containers() + .without_annotations(), + ); + writer.flush().unwrap(); + black_box(buffer.as_bytes()); + }); + }); + if !buffer.is_empty() { + println!( + "\nencoded 1.1 size, delimited structs with string values: {}\n", + buffer.len() + ); + buffer.clear() + } + + binary_1_1_group.bench_function("write delimited structs with symbol values", |b| { + b.iter(|| { + buffer.clear(); + let mut writer = LazyRawBinaryWriter_1_1::new(&mut buffer).unwrap(); + write_struct_with_symbol_values( + writer + .value_writer() + .with_delimited_containers() + .without_annotations(), + ); + writer.flush().unwrap(); + + black_box(buffer.as_bytes()); + }); + }); + if !buffer.is_empty() { + println!("\nencoded 1.1 size with symbol values: {}\n", buffer.len()); + buffer.clear() + } + + binary_1_1_group.bench_function("write structs with string values using macros", |b| { + b.iter(|| { + buffer.clear(); + let mut writer = LazyRawBinaryWriter_1_1::new(&mut buffer).unwrap(); + write_eexp_with_string_values(writer.value_writer().without_annotations()); + writer.flush().unwrap(); + black_box(buffer.as_bytes()); + }); + }); + if !buffer.is_empty() { + println!( + "\nencoded 1.1 size with string values using macros: {}\n", + buffer.len() + ); + buffer.clear() + } + + binary_1_1_group.bench_function("write structs with symbol values using macros", |b| { + b.iter(|| { + buffer.clear(); + let mut writer = LazyRawBinaryWriter_1_1::new(&mut buffer).unwrap(); + write_eexp_with_symbol_values(writer.value_writer().without_annotations()); + writer.flush().unwrap(); + black_box(buffer.as_bytes()); + }); + }); + if !buffer.is_empty() { + println!( + "\nencoded 1.1 size with symbol values using macros: {}\n", + buffer.len() + ); + buffer.clear() + } + + binary_1_1_group.finish(); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/examples/write_log_events.rs b/examples/write_log_events.rs new file mode 100644 index 00000000..76a29c7d --- /dev/null +++ b/examples/write_log_events.rs @@ -0,0 +1,336 @@ +//! This program demonstrates implementing WriteAsIon using Ion 1.1's e-expressions for a more +//! compact encoding. It uses raw-level writer APIs that end users are unlikely to leverage. +//! Ion 1.1 is not yet finalized; the encoding produced by this example and the APIs it uses +//! are very likely to change. + +use ion_rs::*; + +fn main() -> IonResult<()> { + #[cfg(not(feature = "experimental-lazy-reader"))] + panic!("This example requires the 'experimental-lazy-reader' feature to work."); + + #[cfg(feature = "experimental-lazy-reader")] + example::write_log_events() +} + +#[cfg(feature = "experimental-lazy-reader")] +mod example { + use chrono::{DateTime, FixedOffset, NaiveDateTime}; + use ion_rs::lazy::encoder::binary::v1_0::writer::LazyRawBinaryWriter_1_0; + use ion_rs::lazy::encoder::binary::v1_1::writer::LazyRawBinaryWriter_1_1; + use ion_rs::lazy::encoder::value_writer::{SequenceWriter, StructWriter, ValueWriter}; + use ion_rs::lazy::encoder::write_as_ion::WriteAsIonValue; + use ion_rs::*; + use std::env::args; + + use rand::rngs::StdRng; + use rand::seq::SliceRandom; + use rand::{Rng, SeedableRng}; + use std::io::BufWriter; + use std::ops::Range; + use tempfile::NamedTempFile; + + pub fn write_log_events() -> IonResult<()> { + // By default, this program deletes the encoded output before it ends. To keep the files + // for further review, you can pass a `--keep-files`/`-k` flag. + let args: Vec = args().collect(); + let keep_files_flag = match args.get(1).map(|a| a.as_str()) { + Some("--keep-files") | Some("-k") => true, + _ => false, + }; + + // Create a set of Log4J-style statements that might appear in a typical program. These statements + // have a fixed combination of (logger name, log level, format string) fields. + let log_statements = log_statements(); + + // Create a set of `NUM_EVENTS` log events. Each event comes from a log statement (described above) + // and provides additional one-off information like a timestamp, thread ID, thread name, and + // parameters to populate the format string. + const NUM_EVENTS: usize = 1_000_000; + const RNG_SEED: u64 = 1024; + let events = generate_events(RNG_SEED, &log_statements, NUM_EVENTS); + + // Make some files in the OS' temp directory to hold our encoded output. + let ion_1_0_file = NamedTempFile::new().expect("failed to create a temp file"); + let ion_1_1_file = NamedTempFile::new().expect("failed to create a temp file"); + + println!( + "Output files:\nIon 1.0: {}\nIon 1.1: {}", + ion_1_0_file.path().to_string_lossy(), + ion_1_1_file.path().to_string_lossy(), + ); + + // Encode the log events as Ion 1.0 data + let buf_writer = BufWriter::new(ion_1_0_file.as_file()); + let mut ion_writer = LazyRawBinaryWriter_1_0::new(buf_writer)?; + for event in &events { + ion_writer.write(SerializeWithoutMacros(event))?; + } + ion_writer.flush()?; + drop(ion_writer); + + // Encode the log events as Ion 1.1 data + let buf_writer = BufWriter::new(ion_1_1_file.as_file()); + let mut ion_writer = LazyRawBinaryWriter_1_1::new(buf_writer)?; + for event in &events { + ion_writer.write(SerializeWithMacros(event))?; + } + ion_writer.flush()?; + drop(ion_writer); + + let size_in_1_0 = ion_1_0_file + .as_file() + .metadata() + .expect("failed to read Ion 1.0 file length") + .len(); + let size_in_1_1 = ion_1_1_file + .as_file() + .metadata() + .expect("failed to read Ion 1.1 file length") + .len(); + + let percentage_smaller = ((size_in_1_0 - size_in_1_1) as f64 / size_in_1_0 as f64) * 100.0; + println!("1.0 size: {size_in_1_0}"); + println!("1.1 size: {size_in_1_1} ({percentage_smaller:.2}% smaller)"); + + if keep_files_flag { + ion_1_0_file.keep().expect("failed to persist Ion 1.0 file"); + ion_1_1_file.keep().expect("failed to persist Ion 1.1 file"); + } + + Ok(()) + } + + // ===== Data types representing elements of a log file ===== + + // A log statement in the fictional codebase + #[derive(Debug)] + // This struct has several fields that get populated but which are not (yet) used: `logger_name` + // `log_level`, and `format`. Currently, the encoded output for Ion 1.0 writes these as symbol + // IDs and Ion 1.1 refers to them as part of a macro. In both cases, however, the encoding + // context is not written out in the resulting Ion stream. + // TODO: Include the symbol/macro table definitions in the resulting output stream. + #[allow(dead_code)] + struct LogStatement { + index: usize, + logger_name: String, + log_level: String, + format: String, + parameter_types: Vec, + } + + impl LogStatement { + pub fn new( + index: usize, + class_name: &str, + log_level: &str, + format: impl Into, + parameter_types: impl Into>, + ) -> Self { + Self { + index, + logger_name: format!("{PACKAGE_NAME}.{class_name}"), + log_level: log_level.to_string(), + format: format.into(), + parameter_types: parameter_types.into(), + } + } + } + + // Each log statement expects a series of parameters to populate the format string. While the + // log statement doesn't care about their type, we configure an expected type for each + // parameter to generate log event text that makes sense. + #[derive(PartialEq, Copy, Clone, Debug)] + enum ParameterType { + Int, + String, + } + + // A Log4J-ish log event + #[derive(Debug)] + struct LogEvent<'a> { + timestamp: Timestamp, + thread_id: usize, + thread_name: String, + statement: &'a LogStatement, + parameters: Vec, + } + + // Randomly selected int or string values that will be passed as parameters to our log events + #[derive(Clone, Debug)] + enum Parameter { + Int(i64), + String(String), + } + + // ===== Serialization logic for the above types ===== + + impl WriteAsIonValue for Parameter { + fn write_as_ion_value(&self, writer: V) -> IonResult<()> { + match self { + Parameter::Int(i) => i.write_as_ion_value(writer), + Parameter::String(s) => s.as_str().write_as_ion_value(writer), + } + } + } + + // Wrapper types to explicitly opt into or out of macro usage. These will not be necessary in + // the future, as types will be able to define both a macro-ized serialization and a no-macros + // serialization, allowing the writer to choose whichever is more appropriate. + struct SerializeWithoutMacros<'a, 'b>(&'a LogEvent<'b>); + struct SerializeWithMacros<'a, 'b>(&'a LogEvent<'b>); + + // When serializing without macros (usually in Ion 1.0), we write out a struct with each + // field name/value pair. In the case of recurring strings, we take the liberty of writing + // out symbol IDs instead of the full text; this silent type coercion from string to symbol + // is technically data loss, but results in a much more compact encoding. + impl<'a, 'b> WriteAsIonValue for SerializeWithoutMacros<'a, 'b> { + fn write_as_ion_value(&self, writer: V) -> IonResult<()> { + let event = self.0; + writer.write_struct(|fields| { + fields + // v--- Each field name is a symbol ID + .write(10, &event.timestamp)? + .write(11, event.thread_id)? + .write(12, &event.thread_name)? + // v--- The fixed strings from the log statement are also SIDs + .write(13, RawSymbolToken::SymbolId(17))? // logger name + .write(14, RawSymbolToken::SymbolId(18))? // log level + .write(15, RawSymbolToken::SymbolId(19))? // format + .write(16, &event.parameters)?; + Ok(()) + }) + } + } + + // When leveraging macros, the thread name's recurring prefix can be elided from the output. + // This wrapper type is used by the `SerializeWithMacros` type to change to serialization + // behavior for the thread name. + struct ThreadName<'a>(&'a str); + + impl<'a> WriteAsIonValue for ThreadName<'a> { + fn write_as_ion_value(&self, writer: V) -> IonResult<()> { + // ID 12 chosen arbitrarily, but aligns with Ion 1.0 encoding above + writer.write_eexp(12, |args| { + // Ignore the part of the thread name that starts with the recurring prefix. + args.write(&self.0[THREAD_NAME_PREFIX.len()..])?; + Ok(()) + }) + } + } + + impl<'a, 'b> WriteAsIonValue for SerializeWithMacros<'a, 'b> { + fn write_as_ion_value(&self, writer: V) -> IonResult<()> { + let event = self.0; + writer.write_eexp(event.statement.index, |args| { + args.write(&event.timestamp)? + .write(event.thread_id)? + // Wrap the thread name in the `ThreadName` wrapper to change its serialization. + .write(ThreadName(&event.thread_name))? + .write(&event.parameters)?; + Ok(()) + }) + } + } + + // ===== Random generation of sample data ===== + + const INT_PARAMETER_RANGE: Range = 0..5_000; + fn generate_int_parameter(rng: &mut StdRng) -> Parameter { + Parameter::Int(rng.gen_range(INT_PARAMETER_RANGE)) + } + + fn generate_string_parameter(rng: &mut StdRng) -> Parameter { + const WORDS: &[&str] = &["users", "transactions", "accounts", "customers", "waffles"]; + Parameter::String(WORDS.choose(rng).unwrap().to_string()) + } + + fn log_statements() -> Vec { + use ParameterType::*; + vec![ + LogStatement::new( + 0, + "Foo", + "DEBUG", + "Database heartbeat received after {} ms", + &[Int] + ), + LogStatement::new( + 1, + "Bar", + "INFO", + "Retrieved {} results from the '{}' table in {} ms", + &[Int, String, Int], + ), + LogStatement::new( + 2, + "Baz", + "WARN", + "Query to the '{}' table took {} ms to execute, which is higher than the configured threshold", + &[String, Int], + ), + LogStatement::new( + 3, + "Quux", + "ERROR", + "Connection to database lost", + &[] + ), + ] + } + + const INITIAL_EPOCH_MILLIS: i64 = 1708617898 * 1_000; // Feb 22, 2024 + const THREAD_NAME_PREFIX: &str = "worker-thread-"; + const PACKAGE_NAME: &str = "com.example.organization.product.component"; + + fn generate_events( + seed: u64, + log_statements: &[LogStatement], + num_events: usize, + ) -> Vec> { + let mut rng = StdRng::seed_from_u64(seed); + (0..num_events) + .map(|i| generate_event(&mut rng, log_statements, i)) + .collect() + } + + fn generate_event<'rng, 'statements>( + rng: &'rng mut StdRng, + log_statements: &'statements [LogStatement], + event_index: usize, + ) -> LogEvent<'statements> { + // Each event is 1 second after the previous event + let event_epoch_millis = INITIAL_EPOCH_MILLIS + (event_index as i64 * 1000); + let naive_datetime = NaiveDateTime::from_timestamp_millis(event_epoch_millis) + .unwrap() + .into(); + + // Timestamps have an offset of UTC-5:00 + let timestamp: Timestamp = DateTime::::from_naive_utc_and_offset( + naive_datetime, + FixedOffset::east_opt(5 * 60 * 60).unwrap(), + ) + .into(); + + let thread_id = rng.gen_range(1..=128); + let thread_name = format!("{THREAD_NAME_PREFIX}{}", rng.gen_range(1..=8)); + let statement = log_statements.choose(rng).unwrap(); + + let parameters: Vec = statement + .parameter_types + .iter() + .map(|pt| match pt { + ParameterType::Int => generate_int_parameter(rng), + ParameterType::String => generate_string_parameter(rng), + }) + .collect(); + + LogEvent { + timestamp, + thread_id, + thread_name, + statement, + parameters, + } + } +} diff --git a/src/element/mod.rs b/src/element/mod.rs index cb7d0f41..c82da2c7 100644 --- a/src/element/mod.rs +++ b/src/element/mod.rs @@ -316,7 +316,7 @@ impl From for Value { /// // and then into an `Element`... /// let mut boolean_element: Element = boolean_value.into(); /// // and then adding annotations to the `Element`. -/// boolean_element = boolean_element.with_annotations(["foo", "bar"]); +/// let boolean_element = boolean_element.with_annotations(["foo", "bar"]); /// /// // Much more concise equivalent leveraging the `IntoAnnotatedElement` trait. /// let boolean_element = true.with_annotations(["foo", "bar"]); diff --git a/src/lazy/encoder/binary/v1_0/container_writers.rs b/src/lazy/encoder/binary/v1_0/container_writers.rs index 8baacce7..eb6a59e4 100644 --- a/src/lazy/encoder/binary/v1_0/container_writers.rs +++ b/src/lazy/encoder/binary/v1_0/container_writers.rs @@ -57,7 +57,7 @@ impl<'value, 'top> BinaryContainerWriter_1_0<'value, 'top> { self.parent_buffer.push(self.type_code | 0xE); VarUInt::write_u64(&mut self.parent_buffer, body_length as u64)?; } - self.parent_buffer.extend_from_slice(body); + self.parent_buffer.extend_from_slice_copy(body); Ok(()) } } @@ -67,9 +67,14 @@ pub struct BinaryContainerValuesWriter_1_0<'value> { buffer: BumpVec<'value, u8>, } +// This value was chosen somewhat arbitrarily and can be modified as needed. Choosing a value that +// is too low will lead to performance degradation as the buffer will require multiple +// reallocations/copies. +const DEFAULT_CONTAINER_BUFFER_SIZE: usize = 512; + impl<'value> BinaryContainerValuesWriter_1_0<'value> { pub fn new(allocator: &'value BumpAllocator) -> Self { - let buffer = BumpVec::new_in(allocator); + let buffer = BumpVec::with_capacity_in(DEFAULT_CONTAINER_BUFFER_SIZE, allocator); Self { allocator, buffer } } @@ -98,7 +103,7 @@ impl<'value> BinaryListValuesWriter_1_0<'value> { impl<'value> MakeValueWriter for BinaryListValuesWriter_1_0<'value> { type ValueWriter<'a> = BinaryAnnotatableValueWriter_1_0<'a, 'value> where Self: 'a; - fn value_writer(&mut self) -> Self::ValueWriter<'_> { + fn make_value_writer(&mut self) -> Self::ValueWriter<'_> { BinaryAnnotatableValueWriter_1_0::new( self.values_writer.allocator, &mut self.values_writer.buffer, @@ -155,7 +160,7 @@ impl<'value> BinarySExpValuesWriter_1_0<'value> { impl<'value> MakeValueWriter for BinarySExpValuesWriter_1_0<'value> { type ValueWriter<'a> = BinaryAnnotatableValueWriter_1_0<'a, 'value> where Self: 'a; - fn value_writer(&mut self) -> Self::ValueWriter<'_> { + fn make_value_writer(&mut self) -> Self::ValueWriter<'_> { BinaryAnnotatableValueWriter_1_0::new( self.values_writer.allocator, &mut self.values_writer.buffer, diff --git a/src/lazy/encoder/binary/v1_0/value_writer.rs b/src/lazy/encoder/binary/v1_0/value_writer.rs index 02857eea..6800d7ab 100644 --- a/src/lazy/encoder/binary/v1_0/value_writer.rs +++ b/src/lazy/encoder/binary/v1_0/value_writer.rs @@ -19,7 +19,11 @@ use crate::lazy::encoder::binary::v1_0::container_writers::{ BinaryStructWriter_1_0, }; use crate::lazy::encoder::private::Sealed; -use crate::lazy::encoder::value_writer::{AnnotatableValueWriter, ValueWriter}; +use crate::lazy::encoder::value_writer::{ + delegate_value_writer_to, delegate_value_writer_to_self, AnnotatableValueWriter, ValueWriter, +}; +use crate::lazy::never::Never; +use crate::lazy::text::raw::v1_1::reader::MacroIdRef; use crate::raw_symbol_token_ref::AsRawSymbolTokenRef; use crate::result::{EncodingError, IonFailure}; use crate::types::integer::IntData; @@ -52,7 +56,7 @@ impl<'value, 'top> BinaryValueWriter_1_0<'value, 'top> { #[inline] fn push_bytes(&mut self, bytes: &[u8]) { - self.encoding_buffer.extend_from_slice(bytes) + self.encoding_buffer.extend_from_slice_copy(bytes) } pub(crate) fn buffer(&self) -> &[u8] { @@ -296,6 +300,19 @@ impl<'value, 'top> BinaryValueWriter_1_0<'value, 'top> { ) -> IonResult<()> { self.struct_writer().write_fields(struct_fn) } + fn write_eexp< + 'macro_id, + F: for<'a> FnOnce(&mut ::MacroArgsWriter<'a>) -> IonResult<()>, + >( + self, + macro_id: impl Into>, + _macro_fn: F, + ) -> IonResult<()> { + let id = macro_id.into(); + IonResult::encoding_error(format!( + "attempted to call macro {id:?}; macros are not supported in Ion 1.0" + )) + } } impl<'value, 'top> Sealed for BinaryValueWriter_1_0<'value, 'top> {} @@ -305,36 +322,9 @@ impl<'value, 'top> ValueWriter for BinaryValueWriter_1_0<'value, 'top> { type SExpWriter<'a> = BinarySExpValuesWriter_1_0<'a>; type StructWriter<'a> = BinaryStructFieldsWriter_1_0<'a>; - delegate! { - to self { - fn write_null(self, ion_type: IonType) -> IonResult<()>; - fn write_bool(self, value: bool) -> IonResult<()>; - fn write_i64(self, value: i64) -> IonResult<()>; - fn write_int(self, value: &Int) -> IonResult<()>; - fn write_f32(self, value: f32) -> IonResult<()>; - fn write_f64(self, value: f64) -> IonResult<()>; - fn write_decimal(self, value: &Decimal) -> IonResult<()>; - fn write_timestamp(self, value: &Timestamp) -> IonResult<()>; - fn write_string(self, value: impl AsRef) -> IonResult<()>; - fn write_symbol(self, value: impl AsRawSymbolTokenRef) -> IonResult<()>; - fn write_clob(self, value: impl AsRef<[u8]>) -> IonResult<()>; - fn write_blob(self, value: impl AsRef<[u8]>) -> IonResult<()>; - fn write_list FnOnce(&mut Self::ListWriter<'a>) -> IonResult<()>>( - self, - list_fn: F, - ) -> IonResult<()>; - fn write_sexp FnOnce(&mut Self::SExpWriter<'a>) -> IonResult<()>>( - self, - sexp_fn: F, - ) -> IonResult<()>; - fn write_struct< - F: for<'a> FnOnce(&mut Self::StructWriter<'a>) -> IonResult<()>, - >( - self, - struct_fn: F, - ) -> IonResult<()>; - } - } + type MacroArgsWriter<'a> = Never; + + delegate_value_writer_to_self!(); } pub struct BinaryAnnotatableValueWriter_1_0<'value, 'top> { @@ -472,6 +462,9 @@ impl<'value, 'top, SymbolType: AsRawSymbolTokenRef> ValueWriter type StructWriter<'a> = BinaryStructFieldsWriter_1_0<'a>; + // Ion 1.0 + type MacroArgsWriter<'a> = Never; + annotate_and_delegate!( IonType => write_null, bool => write_bool, @@ -505,6 +498,17 @@ impl<'value, 'top, SymbolType: AsRawSymbolTokenRef> ValueWriter ) -> IonResult<()> { self.encode_annotated(|value_writer| value_writer.write_struct(struct_fn)) } + + fn write_eexp<'macro_id, F: for<'a> FnOnce(&mut Self::MacroArgsWriter<'a>) -> IonResult<()>>( + self, + macro_id: impl Into>, + _macro_fn: F, + ) -> IonResult<()> { + let id = macro_id.into(); + IonResult::encoding_error(format!( + "attempted to call macro {id:?}; macros are not supported in Ion 1.0" + )) + } } pub struct BinaryAnnotatedValueWriter_1_0<'value, 'top> { @@ -518,7 +522,7 @@ impl<'value, 'top> BinaryAnnotatedValueWriter_1_0<'value, 'top> { pub fn new(allocator: &'top BumpAllocator, buffer: &'value mut BumpVec<'top, u8>) -> Self { Self { allocator, buffer } } - pub(crate) fn value_writer(&mut self) -> BinaryValueWriter_1_0<'_, 'top> { + pub(crate) fn value_writer(self) -> BinaryValueWriter_1_0<'value, 'top> { BinaryValueWriter_1_0::new(self.allocator, self.buffer) } @@ -533,36 +537,11 @@ impl<'value, 'top: 'value> ValueWriter for BinaryAnnotatedValueWriter_1_0<'value type ListWriter<'a> = BinaryListValuesWriter_1_0<'a>; type SExpWriter<'a> = BinarySExpValuesWriter_1_0<'a>; type StructWriter<'a> = BinaryStructFieldsWriter_1_0<'a>; - delegate! { - to self.value_writer() { - fn write_null(mut self, ion_type: IonType) -> IonResult<()>; - fn write_bool(mut self, value: bool) -> IonResult<()>; - fn write_i64(mut self, value: i64) -> IonResult<()>; - fn write_int(mut self, value: &Int) -> IonResult<()>; - fn write_f32(mut self, value: f32) -> IonResult<()>; - fn write_f64(mut self, value: f64) -> IonResult<()>; - fn write_decimal(mut self, value: &Decimal) -> IonResult<()>; - fn write_timestamp(mut self, value: &Timestamp) -> IonResult<()>; - fn write_string(mut self, value: impl AsRef) -> IonResult<()>; - fn write_symbol(mut self, value: impl AsRawSymbolTokenRef) -> IonResult<()>; - fn write_clob(mut self, value: impl AsRef<[u8]>) -> IonResult<()>; - fn write_blob(mut self, value: impl AsRef<[u8]>) -> IonResult<()>; - fn write_list FnOnce(&mut Self::ListWriter<'a>) -> IonResult<()>>( - mut self, - list_fn: F, - ) -> IonResult<()>; - fn write_sexp FnOnce(&mut Self::SExpWriter<'a>) -> IonResult<()>>( - mut self, - sexp_fn: F, - ) -> IonResult<()>; - fn write_struct< - F: for<'a> FnOnce(&mut Self::StructWriter<'a>) -> IonResult<()>, - >( - mut self, - struct_fn: F, - ) -> IonResult<()>; - } - } + + // Ion 1.0 does not support macros + type MacroArgsWriter<'a> = Never; + + delegate_value_writer_to!(closure |self_: Self| self_.value_writer()); } #[cfg(test)] mod tests { diff --git a/src/lazy/encoder/binary/v1_0/writer.rs b/src/lazy/encoder/binary/v1_0/writer.rs index 61fc3104..9aaae3e8 100644 --- a/src/lazy/encoder/binary/v1_0/writer.rs +++ b/src/lazy/encoder/binary/v1_0/writer.rs @@ -34,6 +34,10 @@ pub struct LazyRawBinaryWriter_1_0 { encoding_buffer_ptr: Option<*mut ()>, } +/// The initial size of the backing array for the writer's bump allocator. +// This value was chosen somewhat arbitrarily and can be changed as needed. +const DEFAULT_BUMP_SIZE: usize = 16 * 1024; + impl LazyRawBinaryWriter_1_0 { /// Constructs a new binary writer and writes an Ion 1.0 Version Marker to output. pub fn new(mut output: W) -> IonResult { @@ -42,7 +46,7 @@ impl LazyRawBinaryWriter_1_0 { // Construct the writer Ok(Self { output, - allocator: BumpAllocator::new(), + allocator: BumpAllocator::with_capacity(DEFAULT_BUMP_SIZE), encoding_buffer_ptr: None, }) } @@ -128,10 +132,8 @@ impl LazyRawWriter for LazyRawBinaryWriter_1_0 { impl MakeValueWriter for LazyRawBinaryWriter_1_0 { type ValueWriter<'a> = BinaryAnnotatableValueWriter_1_0<'a, 'a> where Self: 'a; - delegate! { - to self { - fn value_writer(&mut self) -> Self::ValueWriter<'_>; - } + fn make_value_writer(&mut self) -> Self::ValueWriter<'_> { + self.value_writer() } } diff --git a/src/lazy/encoder/binary/v1_1/container_writers.rs b/src/lazy/encoder/binary/v1_1/container_writers.rs index baae4e1f..29596ef6 100644 --- a/src/lazy/encoder/binary/v1_1/container_writers.rs +++ b/src/lazy/encoder/binary/v1_1/container_writers.rs @@ -2,12 +2,13 @@ use bumpalo::collections::Vec as BumpVec; use bumpalo::Bump as BumpAllocator; use delegate::delegate; +use crate::lazy::encoder::binary::v1_1::flex_sym::FlexSym; use crate::lazy::encoder::binary::v1_1::value_writer::BinaryAnnotatableValueWriter_1_1; use crate::lazy::encoder::value_writer::internal::MakeValueWriter; -use crate::lazy::encoder::value_writer::{SequenceWriter, StructWriter}; +use crate::lazy::encoder::value_writer::{MacroArgsWriter, SequenceWriter, StructWriter}; use crate::lazy::encoder::write_as_ion::WriteAsIon; use crate::raw_symbol_token_ref::AsRawSymbolTokenRef; -use crate::{FlexInt, FlexUInt, IonResult, RawSymbolTokenRef}; +use crate::{FlexUInt, IonResult, RawSymbolTokenRef, SymbolId}; /// A helper type that holds fields and logic that is common to [`BinaryListWriter_1_1`], /// [`BinarySExpWriter_1_1`], and [`BinaryStructWriter_1_1`]. @@ -41,6 +42,7 @@ impl<'value, 'top> BinaryContainerWriter_1_1<'value, 'top> { } /// Encodes the provided `value` to the [`BinaryContainerWriter_1_1`]'s buffer. + #[inline] pub fn write(&mut self, value: V) -> IonResult<&mut Self> { let annotated_value_writer = self.value_writer(); value.write_as_ion(annotated_value_writer)?; @@ -67,7 +69,7 @@ impl<'value, 'top> BinaryListWriter_1_1<'value, 'top> { impl<'value, 'top> MakeValueWriter for BinaryListWriter_1_1<'value, 'top> { type ValueWriter<'a> = BinaryAnnotatableValueWriter_1_1<'a, 'top> where Self: 'a; - fn value_writer(&mut self) -> Self::ValueWriter<'_> { + fn make_value_writer(&mut self) -> Self::ValueWriter<'_> { self.container_writer.value_writer() } } @@ -94,7 +96,7 @@ impl<'value, 'top> BinarySExpWriter_1_1<'value, 'top> { impl<'value, 'top> MakeValueWriter for BinarySExpWriter_1_1<'value, 'top> { type ValueWriter<'a> = BinaryAnnotatableValueWriter_1_1<'a, 'top> where Self: 'a; - fn value_writer(&mut self) -> Self::ValueWriter<'_> { + fn make_value_writer(&mut self) -> Self::ValueWriter<'_> { BinaryAnnotatableValueWriter_1_1::new( self.container_writer.allocator(), self.container_writer.buffer(), @@ -120,20 +122,20 @@ pub struct BinaryStructWriter_1_1<'value, 'top> { } impl<'value, 'top> BinaryStructWriter_1_1<'value, 'top> { - pub(crate) fn new(container_writer: BinaryContainerWriter_1_1<'value, 'top>) -> Self { + pub(crate) fn new_length_prefixed( + container_writer: BinaryContainerWriter_1_1<'value, 'top>, + ) -> Self { Self { flex_uint_encoding: true, container_writer, } } - fn enable_flex_sym_encoding(&mut self) { - if self.flex_uint_encoding { - // Write a zero byte out to signal to future readers that we are switching from - // FlexUInt to FlexSym. - self.container_writer.buffer().push(0x00); - // Remember that we've already done this step. - self.flex_uint_encoding = false; + pub(crate) fn new_delimited(container_writer: BinaryContainerWriter_1_1<'value, 'top>) -> Self { + Self { + // Delimited structs always use FlexSym encoding. + flex_uint_encoding: false, + container_writer, } } @@ -141,6 +143,7 @@ impl<'value, 'top> BinaryStructWriter_1_1<'value, 'top> { self.container_writer.buffer } + #[inline] pub fn write( &mut self, name: A, @@ -148,36 +151,53 @@ impl<'value, 'top> BinaryStructWriter_1_1<'value, 'top> { ) -> IonResult<&mut Self> { use RawSymbolTokenRef::*; - // Write the field name - match name.as_raw_symbol_token_ref() { - SymbolId(0) => { - self.enable_flex_sym_encoding(); - // Encoding `$0` requires a zero byte to indicate an opcode follows, - // and then opcode 0x70, which indicates symbol ID 0. - self.buffer().extend_from_slice(&[0, 0x70]); - } - SymbolId(sid) if self.flex_uint_encoding => { - FlexUInt::write_u64(self.buffer(), sid as u64)?; + match (self.flex_uint_encoding, name.as_raw_symbol_token_ref()) { + // We're already in FlexSym encoding mode + (false, _) => self.write_flex_sym_field(name, value), + // We're still in FlexUInt encoding mode, but this value requires FlexSym encoding + (_, Text(_)) | (_, SymbolId(0)) => + // This can only happen up to once inside a length-prefixed struct + { + self.enable_flex_sym_and_write_field(name, value) } - SymbolId(sid) => { - FlexInt::write_i64(self.buffer(), sid as i64)?; - } - Text(text_token) => { - self.enable_flex_sym_encoding(); - let text = text_token.as_ref(); - let num_bytes = text.len(); - if num_bytes == 0 { - // Encoding the empty string requires a zero byte to indicate an opcode follows, - // and then opcode 0x80, which indicates a string of length 0. - self.buffer().extend_from_slice(&[0, 0x80]) - } else { - let negated_num_bytes = -(text.len() as i64); - FlexInt::write_i64(self.buffer(), negated_num_bytes)?; - self.buffer().extend_from_slice(text.as_bytes()); - } - } - }; + // We're in FlexUInt encoding mode and can write this field without switching modes + (_, SymbolId(sid)) => self.write_flex_uint_field(sid, value), + } + } + #[inline] + fn write_flex_uint_field( + &mut self, + name: SymbolId, + value: V, + ) -> IonResult<&mut Self> { + FlexUInt::encode_u64(self.buffer(), name as u64); + self.container_writer.write(value)?; + Ok(self) + } + + #[inline(never)] + fn enable_flex_sym_and_write_field( + &mut self, + name: A, + value: V, + ) -> IonResult<&mut Self> { + // This is the first time we're writing a FlexSym field. Emit a FlexUInt 0 to tell + // readers that we're switching from FlexUInt to FlexSym. + self.buffer().push(0x01); + self.flex_uint_encoding = false; + self.write_flex_sym_field(name, value) + } + + pub fn write_flex_sym_field( + &mut self, + name: A, + value: V, + ) -> IonResult<&mut Self> { + // Write the field name + FlexSym::encode_symbol(self.buffer(), name); + + // Write the value self.container_writer.write(value)?; Ok(self) } @@ -194,3 +214,19 @@ impl<'value, 'top> StructWriter for BinaryStructWriter_1_1<'value, 'top> { } } } + +pub struct BinaryMacroArgsWriter_1_1<'value, 'top> { + pub(crate) container_writer: BinaryContainerWriter_1_1<'value, 'top>, +} + +impl<'value, 'top> MakeValueWriter for BinaryMacroArgsWriter_1_1<'value, 'top> { + type ValueWriter<'a> = BinaryAnnotatableValueWriter_1_1<'a, 'top> where Self: 'a; + + fn make_value_writer(&mut self) -> Self::ValueWriter<'_> { + self.container_writer.value_writer() + } +} + +impl<'value, 'top> SequenceWriter for BinaryMacroArgsWriter_1_1<'value, 'top> {} + +impl<'value, 'top> MacroArgsWriter for BinaryMacroArgsWriter_1_1<'value, 'top> {} diff --git a/src/lazy/encoder/binary/v1_1/flex_int.rs b/src/lazy/encoder/binary/v1_1/flex_int.rs index ebdc4568..fbf486ba 100644 --- a/src/lazy/encoder/binary/v1_1/flex_int.rs +++ b/src/lazy/encoder/binary/v1_1/flex_int.rs @@ -1,4 +1,6 @@ use crate::{FlexUInt, IonResult}; +use bumpalo::collections::Vec as BumpVec; +use ice_code::ice as cold_path; use std::io::Write; const BITS_PER_I64: usize = 64; @@ -66,6 +68,31 @@ impl FlexInt { Ok(FlexInt::new(flex_uint.size_in_bytes(), signed_value)) } + // This is equivalent to calling `write_i64(my_bump_vec).unwrap()`, but optimized for writing + // to a `BumpVec` instead of a `W: Write`. Writing to a BumpVec cannot fail (barring out-of- + // memory errors and the like), which eliminates some branching, a loop inside + // `io::Write::write_all`, and the construction of a return value. + #[inline] + pub fn encode_i64(output: &mut BumpVec, value: i64) { + let encoded_size_in_bytes = if value < 0 { + BYTES_NEEDED_CACHE[value.leading_ones() as usize] + } else { + BYTES_NEEDED_CACHE[value.leading_zeros() as usize] + } as usize; + if encoded_size_in_bytes <= 8 { + // The entire encoding (including continuation bits) will fit in a u64. + // `encoded_size_in_bytes` is also the number of continuation bits we need to include + let mut encoded = value << encoded_size_in_bytes; + // Set the `end` flag to 1 + encoded += 1 << (encoded_size_in_bytes - 1); + output.extend_from_slice_copy(&encoded.to_le_bytes()[..encoded_size_in_bytes]); + return; + } + cold_path! {{ + let _ = Self::write_large_i64(output, value, encoded_size_in_bytes); + }} + } + #[inline] pub fn write_i64(output: &mut W, value: i64) -> IonResult { let encoded_size_in_bytes = if value < 0 { @@ -82,7 +109,9 @@ impl FlexInt { output.write_all(&encoded.to_le_bytes()[..encoded_size_in_bytes])?; return Ok(encoded_size_in_bytes); } - Self::write_large_i64(output, value, encoded_size_in_bytes) + cold_path! { + Self::write_large_i64(output, value, encoded_size_in_bytes) + } } /// Helper method that encodes a signed values that require 9 or 10 bytes to represent. diff --git a/src/lazy/encoder/binary/v1_1/flex_sym.rs b/src/lazy/encoder/binary/v1_1/flex_sym.rs new file mode 100644 index 00000000..c9d74c69 --- /dev/null +++ b/src/lazy/encoder/binary/v1_1/flex_sym.rs @@ -0,0 +1,46 @@ +use bumpalo::collections::Vec as BumpVec; +use ice_code::ice as cold_path; + +use crate::raw_symbol_token_ref::AsRawSymbolTokenRef; +use crate::RawSymbolTokenRef::{SymbolId, Text}; +use crate::{FlexInt, RawSymbolTokenRef}; + +/// An Ion 1.1 encoding primitive that can compactly represent a symbol ID or inline text. +#[derive(Debug)] +pub struct FlexSym { + // No fields yet; these may be added when we get to read support. +} + +impl FlexSym { + /// A FlexSym-encoded logical zero: the byte `0x01u8` + pub const ZERO: u8 = 0x01; + + /// Encode the provided `symbol` as a FlexSym and write it to the provided [`BumpVec`]. + pub fn encode_symbol(output: &mut BumpVec, symbol: impl AsRawSymbolTokenRef) { + let symbol_token = symbol.as_raw_symbol_token_ref(); + // Write the field name + match symbol_token { + SymbolId(sid) if sid != 0 => { + FlexInt::encode_i64(output, sid as i64); + } + Text(text) if !text.is_empty() => { + let negated_num_bytes = -(text.len() as i64); + FlexInt::encode_i64(output, negated_num_bytes); + output.extend_from_slice_copy(text.as_bytes()); + } + _ => cold_path! { + Self::encode_special_case(output, symbol_token) + }, + }; + } + + /// Encodes the empty string or symbol ID zero as a FlexSym. The caller is responsible for + /// confirming that `symbol` is one of those two cases before calling. + fn encode_special_case(output: &mut BumpVec, symbol: RawSymbolTokenRef) { + let encoding: &[u8] = match symbol { + SymbolId(_) => &[FlexSym::ZERO, 0xE1, 0x00], + Text(_) => &[FlexSym::ZERO, 0x80], + }; + output.extend_from_slice_copy(encoding); + } +} diff --git a/src/lazy/encoder/binary/v1_1/flex_uint.rs b/src/lazy/encoder/binary/v1_1/flex_uint.rs index c2e3916f..78484ed9 100644 --- a/src/lazy/encoder/binary/v1_1/flex_uint.rs +++ b/src/lazy/encoder/binary/v1_1/flex_uint.rs @@ -1,6 +1,8 @@ use crate::result::IonFailure; use crate::types::integer::UIntData; use crate::{IonResult, UInt}; +use bumpalo::collections::Vec as BumpVec; +use ice_code::ice as cold_path; use num_bigint::BigUint; use num_traits::ToBytes; use std::io::Write; @@ -146,48 +148,50 @@ impl FlexUInt { return Ok(flex_uint); } - // If we reach this point, the first byte was a zero. The FlexUInt is at least 9 bytes in size. - // We need to inspect the second byte to see how many more prefix bits there are. - if bytes_available < 2 { - return incomplete(); - } - let second_byte = bytes[1]; - - if second_byte & 0b11 == 0b00 { - // The flag bits in the second byte indicate at least two more bytes, meaning the total - // length is more than 10 bytes. We're not equipped to handle this. - return IonResult::decoding_error( - "found a >10 byte Flex(U)Int too large to fit in 64 bits", - ); - } - - if second_byte & 0b11 == 0b10 { - // The lowest bit of the second byte is empty, the next lowest is not. The encoding - // is 10 bytes. - - if bytes_available < 10 { + cold_path! {{ + // If we reach this point, the first byte was a zero. The FlexUInt is at least 9 bytes in size. + // We need to inspect the second byte to see how many more prefix bits there are. + if bytes_available < 2 { return incomplete(); } + let second_byte = bytes[1]; - let flex_uint = Self::read_10_byte_flex_primitive_as_uint( - support_sign_extension, - bytes, - second_byte, - )?; - return Ok(flex_uint); - } + if second_byte & 0b11 == 0b00 { + // The flag bits in the second byte indicate at least two more bytes, meaning the total + // length is more than 10 bytes. We're not equipped to handle this. + return IonResult::decoding_error( + "found a >10 byte Flex(U)Int too large to fit in 64 bits", + ); + } - // The lowest bit of the second byte is set. The encoding is 9 bytes. - if bytes_available < 9 { - return incomplete(); - } - // There are 57-63 bits of magnitude. We can decode the remaining bytes in a u64. - let remaining_data = &bytes[1..9]; - // We know that the slice is 8 bytes long, so we can unwrap() the conversion to [u8; 8] - // Lop off the lowest bit to discard the `end` flag. - let value = u64::from_le_bytes(remaining_data[..8].try_into().unwrap()) >> 1; - let flex_uint = FlexUInt::new(9, value); - Ok(flex_uint) + if second_byte & 0b11 == 0b10 { + // The lowest bit of the second byte is empty, the next lowest is not. The encoding + // is 10 bytes. + + if bytes_available < 10 { + return incomplete(); + } + + let flex_uint = Self::read_10_byte_flex_primitive_as_uint( + support_sign_extension, + bytes, + second_byte, + )?; + return Ok(flex_uint); + } + + // The lowest bit of the second byte is set. The encoding is 9 bytes. + if bytes_available < 9 { + return incomplete(); + } + // There are 57-63 bits of magnitude. We can decode the remaining bytes in a u64. + let remaining_data = &bytes[1..9]; + // We know that the slice is 8 bytes long, so we can unwrap() the conversion to [u8; 8] + // Lop off the lowest bit to discard the `end` flag. + let value = u64::from_le_bytes(remaining_data[..8].try_into().unwrap()) >> 1; + let flex_uint = FlexUInt::new(9, value); + Ok(flex_uint) + }} } /// Helper method to handle flex primitives whose encoding requires 10 bytes. This case is @@ -258,13 +262,17 @@ impl FlexUInt { Ok(flex_uint) } + #[inline] pub fn write(output: &mut W, value: &UInt) -> IonResult { match &value.data { UIntData::U64(uint) => Self::write_u64(output, *uint), - UIntData::BigUInt(uint) => Self::write_big_uint(output, uint), + UIntData::BigUInt(uint) => cold_path! { + Self::write_big_uint(output, uint) + }, } } + #[cold] fn write_big_uint(output: &mut W, value: &BigUint) -> IonResult { // There's lots of room for optimization here, but this code path is rarely taken. let le_bytes = value.to_bytes_le(); @@ -277,7 +285,62 @@ impl FlexUInt { Ok(encoding.len()) } + /// This is equivalent to calling `write_u64(my_bump_vec).unwrap()`, but optimized for writing + /// to a `BumpVec` instead of a `W: Write`. Writing to a BumpVec cannot fail (barring out-of- + /// memory errors and the like), which eliminates some branching, a loop inside + /// `io::Write::write_all`, and the construction of a return value. + #[inline] + pub(crate) fn encode_u64(output: &mut BumpVec, value: u64) { + // This code will be inlined at the call site... + if value < 127 { + let flex_uint_byte = (value << 1) as u8 + 1; + return output.extend_from_slice_copy(&[flex_uint_byte]); + } + + //...while this code will not. + cold_path! { + general_case => Self::encode_u64_general_case(output, value) + } + } + + /// Can encode a u64 of any size to the provided [`BumpVec`]. Some other methods are optimized + /// for encoding small u64s and fall back to this method to encode values that are larger. + fn encode_u64_general_case(output: &mut BumpVec, value: u64) { + let leading_zeros = value.leading_zeros(); + let num_encoded_bytes = BYTES_NEEDED_CACHE[leading_zeros as usize] as usize; + if num_encoded_bytes <= 8 { + let flag_bits = 1u64 << (num_encoded_bytes - 1); + // Left shift the value to accommodate the trailing flag bits and then OR them together + let encoded_value = (value << num_encoded_bytes) | flag_bits; + output.extend_from_slice_copy(&encoded_value.to_le_bytes()[..num_encoded_bytes]); + return; + } + cold_path! { + // NB: There is not a BumpVec-specialized `encoding_*` version of `write_large_u64` as + // it is very rarely called. + encode_xl_u64 => { + let _ = Self::write_large_u64(output, value, num_encoded_bytes); + } + } + } + #[inline] + pub(crate) fn encode_opcode_and_length(output: &mut BumpVec, opcode: u8, length: u64) { + // In the common case, the length fits in a single FlexUInt byte. We can perform a single + // `reserve`/`memcopy` to get both the opcode and the length into the buffer. + if length < 127 { + let flex_uint_byte = (length << 1) as u8 + 1; + return output.extend_from_slice_copy(&[opcode, flex_uint_byte]); + } + + // If there's call for it, we could also do this for 2-byte FlexUInts. For now, we fall + // back to the general-purpose. + cold_path! { encode_opcode_and_length_general_case => { + output.push(opcode); + FlexUInt::encode_u64_general_case(output, length) + }} + } + pub fn write_u64(output: &mut W, value: u64) -> IonResult { let leading_zeros = value.leading_zeros(); let num_encoded_bytes = BYTES_NEEDED_CACHE[leading_zeros as usize] as usize; @@ -288,7 +351,9 @@ impl FlexUInt { output.write_all(&encoded_value.to_le_bytes()[..num_encoded_bytes])?; return Ok(num_encoded_bytes); } - Self::write_large_u64(output, value, num_encoded_bytes) + cold_path! { + write_xl_u64 => Self::write_large_u64(output, value, num_encoded_bytes) + } } /// Helper method that encodes a signed values that require 9 or 10 bytes to represent. diff --git a/src/lazy/encoder/binary/v1_1/mod.rs b/src/lazy/encoder/binary/v1_1/mod.rs index d4b2f04e..8a13349b 100644 --- a/src/lazy/encoder/binary/v1_1/mod.rs +++ b/src/lazy/encoder/binary/v1_1/mod.rs @@ -7,6 +7,7 @@ pub mod container_writers; pub mod fixed_int; pub mod fixed_uint; pub mod flex_int; +pub mod flex_sym; pub mod flex_uint; pub mod value_writer; pub mod writer; diff --git a/src/lazy/encoder/binary/v1_1/value_writer.rs b/src/lazy/encoder/binary/v1_1/value_writer.rs index 410b8595..eff1f851 100644 --- a/src/lazy/encoder/binary/v1_1/value_writer.rs +++ b/src/lazy/encoder/binary/v1_1/value_writer.rs @@ -2,17 +2,23 @@ use arrayvec::ArrayVec; use bumpalo::collections::Vec as BumpVec; use bumpalo::Bump as BumpAllocator; use delegate::delegate; +use ice_code::ice as cold_path; use num_bigint::BigInt; use num_traits::ToPrimitive; use crate::lazy::encoder::binary::annotate_and_delegate; use crate::lazy::encoder::binary::v1_1::container_writers::{ - BinaryContainerWriter_1_1, BinaryListWriter_1_1, BinarySExpWriter_1_1, BinaryStructWriter_1_1, + BinaryContainerWriter_1_1, BinaryListWriter_1_1, BinaryMacroArgsWriter_1_1, + BinarySExpWriter_1_1, BinaryStructWriter_1_1, }; use crate::lazy::encoder::binary::v1_1::fixed_int::FixedInt; use crate::lazy::encoder::binary::v1_1::fixed_uint::FixedUInt; +use crate::lazy::encoder::binary::v1_1::flex_sym::FlexSym; use crate::lazy::encoder::private::Sealed; -use crate::lazy::encoder::value_writer::{AnnotatableValueWriter, ValueWriter}; +use crate::lazy::encoder::value_writer::{ + delegate_value_writer_to_self, AnnotatableValueWriter, ValueWriter, +}; +use crate::lazy::text::raw::v1_1::reader::MacroIdRef; use crate::raw_symbol_token_ref::AsRawSymbolTokenRef; use crate::result::IonFailure; use crate::types::integer::IntData; @@ -20,6 +26,12 @@ use crate::{ Decimal, FlexInt, FlexUInt, Int, IonResult, IonType, RawSymbolTokenRef, SymbolId, Timestamp, }; +/// The initial size of the bump-allocated buffer created to hold a container's child elements. +// This number was chosen somewhat arbitrarily and can be updated as needed. +// TODO: Writers could track the largest container size they've seen and use that as their initial +// size to minimize reallocations. +const DEFAULT_CONTAINER_BUFFER_SIZE: usize = 512; + pub struct BinaryValueWriter_1_1<'value, 'top> { allocator: &'top BumpAllocator, encoding_buffer: &'value mut BumpVec<'top, u8>, @@ -50,7 +62,7 @@ impl<'value, 'top> BinaryValueWriter_1_1<'value, 'top> { #[inline] fn push_bytes(&mut self, bytes: &[u8]) { - self.encoding_buffer.extend_from_slice(bytes) + self.encoding_buffer.extend_from_slice_copy(bytes) } pub(crate) fn buffer(&self) -> &[u8] { @@ -481,59 +493,73 @@ impl<'value, 'top> BinaryValueWriter_1_1<'value, 'top> { Ok(()) } + #[inline] pub fn write_string>(mut self, value: A) -> IonResult<()> { const STRING_OPCODE: u8 = 0x80; const STRING_FLEX_UINT_LEN_OPCODE: u8 = 0xF8; - self.write_text(STRING_OPCODE, STRING_FLEX_UINT_LEN_OPCODE, value.as_ref()) + self.write_text(STRING_OPCODE, STRING_FLEX_UINT_LEN_OPCODE, value.as_ref()); + Ok(()) } + #[inline] pub fn write_symbol(mut self, value: A) -> IonResult<()> { const SYMBOL_OPCODE: u8 = 0x90; const SYMBOL_FLEX_UINT_LEN_OPCODE: u8 = 0xF9; match value.as_raw_symbol_token_ref() { RawSymbolTokenRef::SymbolId(sid) => self.write_symbol_id(sid), RawSymbolTokenRef::Text(text) => { - self.write_text(SYMBOL_OPCODE, SYMBOL_FLEX_UINT_LEN_OPCODE, text.as_ref()) + self.write_text(SYMBOL_OPCODE, SYMBOL_FLEX_UINT_LEN_OPCODE, text.as_ref()); + Ok(()) } } } #[inline] fn write_symbol_id(&mut self, symbol_id: SymbolId) -> IonResult<()> { - match symbol_id { + let mut buffer = [0u8; 4]; + let encoded_bytes = match symbol_id { 0..=255 => { - self.push_bytes(&[0xE1, symbol_id as u8]); + buffer[0] = 0xE1; // Symbol ID value opcode; one-byte FixedUInt follows + buffer[1] = symbol_id as u8; + &buffer[0..2] } // The u16::MAX range, but biased by 256. 256..=65_791 => { - self.push_byte(0xE2); // Two-byte biased FixedUInt follows - let encoded_length = ((symbol_id - 256) as u16).to_le_bytes(); - self.push_bytes(encoded_length.as_slice()); + let le_bytes = ((symbol_id - 256) as u16).to_le_bytes(); + buffer[0] = 0xE2; // Symbol ID value opcode; two-byte FixedUInt follows + buffer[1] = le_bytes[0]; + buffer[2] = le_bytes[1]; + &buffer[0..3] } // 65,792 and higher _ => { - self.push_byte(0xE3); // Biased FlexUInt follows - FlexUInt::write_u64(self.encoding_buffer, symbol_id as u64 - 65_792)?; + cold_path! { + xl_symbol_id_value => { + self.push_byte(0xE3); // Biased FlexUInt follows + FlexUInt::encode_u64(self.encoding_buffer, symbol_id as u64 - 65_792) + } + }; + return Ok(()); } - } + }; + self.push_bytes(encoded_bytes); Ok(()) } /// Helper method for writing strings and symbols with inline UTF8 bytes. #[inline] - fn write_text(&mut self, opcode: u8, var_len_opcode: u8, text: &str) -> IonResult<()> { - match text.len() { - num_utf8_bytes @ 0..=15 => { - // The length is small enough to safely cast it to u8 and include it in the opcode. - self.push_byte(opcode | num_utf8_bytes as u8); - } - num_utf8_bytes => { - self.push_byte(var_len_opcode); - FlexUInt::write_u64(self.encoding_buffer, num_utf8_bytes as u64)?; - } - }; + fn write_text(&mut self, opcode: u8, var_len_opcode: u8, text: &str) { + if text.len() < 16 { + self.encoding_buffer + .extend_from_slice_copy(&[opcode | text.len() as u8]); + } else { + FlexUInt::encode_opcode_and_length( + self.encoding_buffer, + var_len_opcode, + text.len() as u64, + ); + } self.push_bytes(text.as_bytes()); - Ok(()) } pub fn write_clob>(self, value: A) -> IonResult<()> { @@ -571,9 +597,9 @@ impl<'value, 'top> BinaryValueWriter_1_1<'value, 'top> { list_fn: F, ) -> IonResult<()> { // We're writing a length-prefixed list, so we need to set up a space to encode the list's children. - let child_encoding_buffer = self - .allocator - .alloc_with(|| BumpVec::new_in(self.allocator)); + let child_encoding_buffer = self.allocator.alloc_with(|| { + BumpVec::with_capacity_in(DEFAULT_CONTAINER_BUFFER_SIZE, self.allocator) + }); // Create a BinaryListWriter_1_1 to pass to the user's closure. let container_writer = BinaryContainerWriter_1_1::new(self.allocator, child_encoding_buffer); @@ -593,8 +619,7 @@ impl<'value, 'top> BinaryValueWriter_1_1<'value, 'top> { FlexUInt::write_u64(self.encoding_buffer, encoded_length as u64)?; } } - self.encoding_buffer - .extend_from_slice(list_writer.container_writer.buffer()); + self.push_bytes(list_writer.container_writer.buffer()); Ok(()) } @@ -633,9 +658,9 @@ impl<'value, 'top> BinaryValueWriter_1_1<'value, 'top> { sexp_fn: F, ) -> IonResult<()> { // We're writing a length-prefixed sexp, so we need to set up a space to encode the sexp's children. - let child_encoding_buffer = self - .allocator - .alloc_with(|| BumpVec::new_in(self.allocator)); + let child_encoding_buffer = self.allocator.alloc_with(|| { + BumpVec::with_capacity_in(DEFAULT_CONTAINER_BUFFER_SIZE, self.allocator) + }); // Create a BinarySExpWriter_1_1 to pass to the user's closure. let container_writer = BinaryContainerWriter_1_1::new(self.allocator, child_encoding_buffer); @@ -655,8 +680,7 @@ impl<'value, 'top> BinaryValueWriter_1_1<'value, 'top> { FlexUInt::write_u64(self.encoding_buffer, encoded_length as u64)?; } } - self.encoding_buffer - .extend_from_slice(sexp_writer.container_writer.buffer()); + self.push_bytes(sexp_writer.container_writer.buffer()); Ok(()) } @@ -698,7 +722,7 @@ impl<'value, 'top> BinaryValueWriter_1_1<'value, 'top> { let fields_encoding_buffer = self.encoding_buffer; let container_writer = BinaryContainerWriter_1_1::new(self.allocator, fields_encoding_buffer); - let struct_writer = &mut BinaryStructWriter_1_1::new(container_writer); + let struct_writer = &mut BinaryStructWriter_1_1::new_delimited(container_writer); struct_writer.buffer().push(0xF3); // Start delimited Struct struct_fn(struct_writer)?; struct_writer.buffer().push(0xF0); // End delimited container @@ -712,13 +736,13 @@ impl<'value, 'top> BinaryValueWriter_1_1<'value, 'top> { struct_fn: F, ) -> IonResult<()> { // We're writing a length-prefixed struct, so we need to set up a space to encode the struct's fields. - let field_encoding_buffer = self - .allocator - .alloc_with(|| BumpVec::new_in(self.allocator)); + let field_encoding_buffer = self.allocator.alloc_with(|| { + BumpVec::with_capacity_in(DEFAULT_CONTAINER_BUFFER_SIZE, self.allocator) + }); // Create a BinaryStructWriter_1_1 to pass to the user's closure. let container_writer = BinaryContainerWriter_1_1::new(self.allocator, field_encoding_buffer); - let mut struct_writer = BinaryStructWriter_1_1::new(container_writer); + let mut struct_writer = BinaryStructWriter_1_1::new_length_prefixed(container_writer); // Pass it to the closure, allowing the user to encode field names/values. struct_fn(&mut struct_writer)?; // Write the appropriate opcode for a struct of this length @@ -734,10 +758,35 @@ impl<'value, 'top> BinaryValueWriter_1_1<'value, 'top> { FlexUInt::write_u64(self.encoding_buffer, encoded_length as u64)?; } } - self.encoding_buffer - .extend_from_slice(struct_writer.buffer()); + self.push_bytes(struct_writer.buffer()); Ok(()) } + + fn write_eexp< + 'macro_id, + F: for<'a> FnOnce(&mut ::MacroArgsWriter<'a>) -> IonResult<()>, + >( + self, + macro_id: impl Into>, + macro_fn: F, + ) -> IonResult<()> { + match macro_id.into() { + MacroIdRef::LocalName(_name) => { + // This would be handled by the system writer + todo!("macro invocation by name"); + } + MacroIdRef::LocalAddress(address) if address < 64 => { + // Invoke this ID with a one-byte opcode + self.encoding_buffer.push(address as u8); + } + MacroIdRef::LocalAddress(_address) => { + todo!("macros with addresses higher than 64"); + } + } + let container_writer = BinaryContainerWriter_1_1::new(self.allocator, self.encoding_buffer); + let mut args_writer = BinaryMacroArgsWriter_1_1 { container_writer }; + macro_fn(&mut args_writer) + } } impl<'value, 'top> Sealed for BinaryValueWriter_1_1<'value, 'top> {} @@ -747,36 +796,9 @@ impl<'value, 'top> ValueWriter for BinaryValueWriter_1_1<'value, 'top> { type SExpWriter<'a> = BinarySExpWriter_1_1<'value, 'top>; type StructWriter<'a> = BinaryStructWriter_1_1<'value, 'top>; - delegate! { - to self { - fn write_null(self, ion_type: IonType) -> IonResult<()>; - fn write_bool(self, value: bool) -> IonResult<()>; - fn write_i64(self, value: i64) -> IonResult<()>; - fn write_int(self, value: &Int) -> IonResult<()>; - fn write_f32(self, value: f32) -> IonResult<()>; - fn write_f64(self, value: f64) -> IonResult<()>; - fn write_decimal(self, value: &Decimal) -> IonResult<()>; - fn write_timestamp(self, value: &Timestamp) -> IonResult<()>; - fn write_string(self, value: impl AsRef) -> IonResult<()>; - fn write_symbol(self, value: impl AsRawSymbolTokenRef) -> IonResult<()>; - fn write_clob(self, value: impl AsRef<[u8]>) -> IonResult<()>; - fn write_blob(self, value: impl AsRef<[u8]>) -> IonResult<()>; - fn write_list FnOnce(&mut Self::ListWriter<'a>) -> IonResult<()>>( - self, - list_fn: F, - ) -> IonResult<()>; - fn write_sexp FnOnce(&mut Self::SExpWriter<'a>) -> IonResult<()>>( - self, - sexp_fn: F, - ) -> IonResult<()>; - fn write_struct< - F: for<'a> FnOnce(&mut Self::StructWriter<'a>) -> IonResult<()>, - >( - self, - struct_fn: F, - ) -> IonResult<()>; - } - } + type MacroArgsWriter<'a> = BinaryMacroArgsWriter_1_1<'value, 'top>; + + delegate_value_writer_to_self!(); } pub struct BinaryAnnotatableValueWriter_1_1<'value, 'top> { @@ -852,16 +874,16 @@ impl<'value, 'top, SymbolType: AsRawSymbolTokenRef> [a] => { // Opcode 0xE7: A single FlexSym annotation follows self.buffer.push(0xE7); - Self::write_flex_sym_annotation(self.buffer, a)?; + FlexSym::encode_symbol(self.buffer, a); } [a1, a2] => { // Opcode 0xE8: Two FlexSym annotations follow self.buffer.push(0xE8); - Self::write_flex_sym_annotation(self.buffer, a1)?; - Self::write_flex_sym_annotation(self.buffer, a2)?; + FlexSym::encode_symbol(self.buffer, a1); + FlexSym::encode_symbol(self.buffer, a2); } _ => { - self.write_length_prefixed_flex_sym_annotation_sequence()?; + self.write_length_prefixed_flex_sym_annotation_sequence(); } } // We've encoded the annotations, now create a no-annotations ValueWriter to encode the value itself. @@ -872,42 +894,22 @@ impl<'value, 'top, SymbolType: AsRawSymbolTokenRef> fn write_flex_sym_annotation( buffer: &mut BumpVec<'top, u8>, annotation: impl AsRawSymbolTokenRef, - ) -> IonResult<()> { - match annotation.as_raw_symbol_token_ref() { - RawSymbolTokenRef::SymbolId(0) => { - // FlexSym 0x00 indicates that an opcode follows - // Opcode 0x70 is a symbol of length 0, i.e. `$0`. - buffer.extend_from_slice(&[0x00, 0x70]); - } - RawSymbolTokenRef::SymbolId(sid) => { - FlexInt::write_i64(buffer, sid as i64)?; - } - RawSymbolTokenRef::Text(cow_text) => { - let text = cow_text.as_ref(); - if text.is_empty() { - buffer.extend_from_slice(&[0x00, 0x80]); - return Ok(()); - } - let utf8_byte_length = -(text.len() as i64); - FlexInt::write_i64(buffer, utf8_byte_length)?; - buffer.extend_from_slice(text.as_bytes()); - } - } - Ok(()) + ) { + FlexSym::encode_symbol(buffer, annotation); } #[cold] - fn write_length_prefixed_flex_sym_annotation_sequence(&mut self) -> IonResult<()> { + fn write_length_prefixed_flex_sym_annotation_sequence(&mut self) { // A FlexUInt follows with the byte length of the FlexSym sequence that follows let mut annotations_buffer = BumpVec::new_in(self.allocator); for annotation in self.annotations { - Self::write_flex_sym_annotation(&mut annotations_buffer, annotation)?; + FlexSym::encode_symbol(&mut annotations_buffer, annotation); } // A FlexUInt follows that represents the length of a sequence of FlexSym-encoded annotations self.buffer.push(0xE9); - FlexUInt::write_u64(self.buffer, annotations_buffer.len() as u64)?; - self.buffer.extend_from_slice(annotations_buffer.as_slice()); - Ok(()) + FlexUInt::encode_u64(self.buffer, annotations_buffer.len() as u64); + self.buffer + .extend_from_slice_copy(annotations_buffer.as_slice()); } } @@ -923,6 +925,7 @@ impl<'value, 'top, SymbolType: AsRawSymbolTokenRef> ValueWriter type ListWriter<'a> = BinaryListWriter_1_1<'value, 'top>; type SExpWriter<'a> = BinarySExpWriter_1_1<'value, 'top>; type StructWriter<'a> = BinaryStructWriter_1_1<'value, 'top>; + type MacroArgsWriter<'a> = BinaryMacroArgsWriter_1_1<'value, 'top>; annotate_and_delegate!( IonType => write_null, @@ -957,6 +960,13 @@ impl<'value, 'top, SymbolType: AsRawSymbolTokenRef> ValueWriter ) -> IonResult<()> { self.encode_annotated(|value_writer| value_writer.write_struct(struct_fn)) } + fn write_eexp<'macro_id, F: for<'a> FnOnce(&mut Self::MacroArgsWriter<'a>) -> IonResult<()>>( + self, + macro_id: impl Into>, + macro_fn: F, + ) -> IonResult<()> { + self.encode_annotated(|value_writer| value_writer.write_eexp(macro_id, macro_fn)) + } } impl<'value, 'top, SymbolType: AsRawSymbolTokenRef> @@ -2571,7 +2581,7 @@ mod tests { // 8-byte struct 0xC9, // Enable FlexSym field name encoding - 0x00, + 0x01, // Inline 3-byte field name // ↓ f o o 0xFB, 0x66, 0x6F, 0x6F, @@ -2589,7 +2599,7 @@ mod tests { // FlexUInt 18 0x25, // Enable FlexSym field name encoding - 0x00, + 0x01, // Inline 3-byte field name // ↓ f o o 0xFB, 0x66, 0x6F, 0x6F, @@ -2623,7 +2633,7 @@ mod tests { // ↓ b a r 0x93, 0x62, 0x61, 0x72, // Enable FlexSym field name encoding - 0x00, + 0x01, // Inline 4-byte field name // ↓ q u u x 0xF9, 0x71, 0x75, 0x75, 0x78, @@ -2651,7 +2661,7 @@ mod tests { let test_cases: &[(TestStruct, &[u8])] = &[ // Empty struct (&[], &[0xF3, 0xF0]), - // Struct with a single FlexUInt field name + // Struct with a single symbol ID field name ( &[field(4, "foo")], &[ @@ -2666,7 +2676,7 @@ mod tests { 0xF0, ], ), - // Struct with multiple FlexUInt field names + // Struct with multiple symbol ID field names ( &[field(4, "foo"), field(5, "bar"), field(6, "baz")], &[ @@ -2692,14 +2702,12 @@ mod tests { 0xF0, ], ), - // Struct with single FlexSym field name + // Struct with single inline-text field name ( &[field("foo", "bar")], &[ // Delimited struct 0xF3, - // Enable FlexSym field name encoding - 0x00, // Inline 3-byte field name // ↓ f o o 0xFB, 0x66, 0x6F, 0x6F, @@ -2710,14 +2718,12 @@ mod tests { 0xF0, ], ), - // Struct with multiple FlexSym field names + // Struct with multiple inline-text field names ( &[field("foo", "bar"), field("baz", "quux")], &[ // Delimited struct 0xF3, - // Enable FlexSym field name encoding - 0x00, // Inline 3-byte field name // ↓ f o o 0xFB, 0x66, 0x6F, 0x6F, @@ -2734,7 +2740,7 @@ mod tests { 0xF0, ], ), - // Struct with multiple FlexUInt field names followed by a FlexSym field name + // Struct with multiple symbol ID field names followed by an inline text field name ( &[field(4, "foo"), field(5, "bar"), field("quux", "quuz")], &[ @@ -2750,8 +2756,6 @@ mod tests { // 3-byte symbol // ↓ b a r 0x93, 0x62, 0x61, 0x72, - // Enable FlexSym field name encoding - 0x00, // Inline 4-byte field name // ↓ q u u x 0xF9, 0x71, 0x75, 0x75, 0x78, @@ -2937,14 +2941,40 @@ mod tests { 0.annotated_with(&[RawSymbolToken::Text("".into()), RawSymbolToken::SymbolId(0)]), &[ 0xE8, // Two FlexSym annotations follow - 0x00, // Opcode follows + 0x01, // Opcode follows 0x80, // String of length 0 - 0x00, // Opcode follows - 0x70, // Symbol ID $0 + 0x01, // Opcode follows + 0xE1, // 1-byte FixedUInt symbol ID follows + 0x00, // Symbol ID 0 0x50, // Integer 0 ], )?; Ok(()) } + + #[test] + fn write_macro_invocations() -> IonResult<()> { + encoding_test( + |writer: &mut LazyRawBinaryWriter_1_1<&mut Vec>| { + writer + .value_writer() + .without_annotations() + .write_eexp(0, |args| { + args.write_symbol("foo")? + .write_symbol("bar")? + .write_symbol("baz")?; + Ok(()) + })?; + Ok(()) + }, + &[ + 0x00, // Invoke macro address 0 + 0x93, 0x66, 0x6f, 0x6f, // foo + 0x93, 0x62, 0x61, 0x72, // bar + 0x93, 0x62, 0x61, 0x7a, // baz + ], + )?; + Ok(()) + } } diff --git a/src/lazy/encoder/binary/v1_1/writer.rs b/src/lazy/encoder/binary/v1_1/writer.rs index df8f637b..5df16a64 100644 --- a/src/lazy/encoder/binary/v1_1/writer.rs +++ b/src/lazy/encoder/binary/v1_1/writer.rs @@ -34,6 +34,10 @@ pub struct LazyRawBinaryWriter_1_1 { encoding_buffer_ptr: Option<*mut ()>, } +/// The initial size of the backing array for the writer's bump allocator. +// This value was chosen somewhat arbitrarily and can be changed as needed. +const DEFAULT_BUMP_SIZE: usize = 16 * 1024; + impl LazyRawBinaryWriter_1_1 { /// Constructs a new binary writer and writes an Ion 1.1 Version Marker to output. pub fn new(mut output: W) -> IonResult { @@ -42,7 +46,7 @@ impl LazyRawBinaryWriter_1_1 { // Construct the writer Ok(Self { output, - allocator: BumpAllocator::new(), + allocator: BumpAllocator::with_capacity(DEFAULT_BUMP_SIZE), encoding_buffer_ptr: None, }) } @@ -90,9 +94,12 @@ impl LazyRawBinaryWriter_1_1 { Some(ptr) => unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(ptr) }, // Otherwise, allocate a new encoding buffer and set the pointer to refer to it. None => { - let buffer = self - .allocator - .alloc_with(|| BumpVec::new_in(&self.allocator)); + let buffer = self.allocator.alloc_with(|| { + // Use half of the bump allocator's backing array as an encoding space for this + // top level value. The other half of the bump can be used for incidental + // bookkeeping. + BumpVec::with_capacity_in(DEFAULT_BUMP_SIZE / 2, &self.allocator) + }); self.encoding_buffer_ptr = Some(mut_ref_to_ptr(buffer)); buffer } @@ -132,10 +139,8 @@ impl LazyRawWriter for LazyRawBinaryWriter_1_1 { impl MakeValueWriter for LazyRawBinaryWriter_1_1 { type ValueWriter<'a> = BinaryAnnotatableValueWriter_1_1<'a, 'a> where Self: 'a; - delegate! { - to self { - fn value_writer(&mut self) -> Self::ValueWriter<'_>; - } + fn make_value_writer(&mut self) -> Self::ValueWriter<'_> { + self.value_writer() } } diff --git a/src/lazy/encoder/mod.rs b/src/lazy/encoder/mod.rs index a5c436f0..33562e9c 100644 --- a/src/lazy/encoder/mod.rs +++ b/src/lazy/encoder/mod.rs @@ -143,7 +143,7 @@ mod tests { ] "#; let test = |writer: &mut LazyRawTextWriter_1_0<&mut Vec>| { - writer.value_writer().write_list(|list| { + writer.make_value_writer().write_list(|list| { list.write(1)? .write(false)? .write(3f32)? @@ -173,7 +173,7 @@ mod tests { ) "#; let test = |writer: &mut LazyRawTextWriter_1_0<&mut Vec>| { - writer.value_writer().write_sexp(|sexp| { + writer.make_value_writer().write_sexp(|sexp| { sexp.write(1)? .write(false)? .write(3f32)? @@ -202,7 +202,7 @@ mod tests { } "#; let test = |writer: &mut LazyRawTextWriter_1_0<&mut Vec>| { - writer.value_writer().write_struct(|struct_| { + writer.make_value_writer().write_struct(|struct_| { struct_ .write("a", 1)? .write("b", false)? diff --git a/src/lazy/encoder/text/mod.rs b/src/lazy/encoder/text/mod.rs index 561be09a..19473332 100644 --- a/src/lazy/encoder/text/mod.rs +++ b/src/lazy/encoder/text/mod.rs @@ -68,7 +68,7 @@ impl MakeValueWriter for LazyRawTextWriter_1_0 { where Self: 'a; - fn value_writer(&mut self) -> Self::ValueWriter<'_> { + fn make_value_writer(&mut self) -> Self::ValueWriter<'_> { let value_writer = TextValueWriter_1_0::new(self, 0); TextAnnotatableValueWriter_1_0::new(value_writer) } diff --git a/src/lazy/encoder/text/value_writer.rs b/src/lazy/encoder/text/value_writer.rs index 3d07be59..0011b770 100644 --- a/src/lazy/encoder/text/value_writer.rs +++ b/src/lazy/encoder/text/value_writer.rs @@ -2,11 +2,13 @@ use crate::lazy::encoder::private::Sealed; use crate::lazy::encoder::text::LazyRawTextWriter_1_0; use crate::lazy::encoder::value_writer::internal::MakeValueWriter; use crate::lazy::encoder::value_writer::{ - AnnotatableValueWriter, SequenceWriter, StructWriter, ValueWriter, + delegate_value_writer_to, AnnotatableValueWriter, SequenceWriter, StructWriter, ValueWriter, }; use crate::lazy::encoder::write_as_ion::WriteAsIon; +use crate::lazy::never::Never; +use crate::lazy::text::raw::v1_1::reader::MacroIdRef; use crate::raw_symbol_token_ref::{AsRawSymbolTokenRef, RawSymbolTokenRef}; -use crate::result::IonResult; +use crate::result::{IonFailure, IonResult}; use crate::text::raw_text_writer::{RawTextWriter, WhitespaceConfig}; use crate::text::text_formatter::IonValueFormatter; use crate::types::IonType; @@ -237,7 +239,7 @@ impl<'top, W: Write> TextListWriter_1_0<'top, W> { impl<'top, W: Write> MakeValueWriter for TextListWriter_1_0<'top, W> { type ValueWriter<'a> = TextAnnotatableValueWriter_1_0<'a, W> where Self: 'a; - fn value_writer(&mut self) -> Self::ValueWriter<'_> { + fn make_value_writer(&mut self) -> Self::ValueWriter<'_> { self.container_writer.annotatable_value_writer() } } @@ -275,7 +277,7 @@ impl<'a, W: Write> TextSExpWriter_1_0<'a, W> { impl<'value, W: Write> MakeValueWriter for TextSExpWriter_1_0<'value, W> { type ValueWriter<'a> = TextAnnotatableValueWriter_1_0<'a, W> where Self: 'a; - fn value_writer(&mut self) -> Self::ValueWriter<'_> { + fn make_value_writer(&mut self) -> Self::ValueWriter<'_> { self.container_writer.annotatable_value_writer() } } @@ -333,42 +335,19 @@ impl<'value, W: Write + 'value, SymbolType: AsRawSymbolTokenRef> ValueWriter type SExpWriter<'a> = TextSExpWriter_1_0<'value, W>; type StructWriter<'a> = TextStructWriter_1_0<'value, W>; - delegate! { - to self.encode_annotations()? { - fn write_null(self, ion_type: IonType) -> IonResult<()>; - fn write_bool(self, value: bool) -> IonResult<()>; - fn write_i64(self, value: i64) -> IonResult<()>; - fn write_int(self, value: &Int) -> IonResult<()>; - fn write_f32(self, value: f32) -> IonResult<()>; - fn write_f64(self, value: f64) -> IonResult<()>; - fn write_decimal(self, value: &Decimal) -> IonResult<()>; - fn write_timestamp(self, value: &Timestamp) -> IonResult<()>; - fn write_string(self, value: impl AsRef) -> IonResult<()>; - fn write_symbol(self, value: impl AsRawSymbolTokenRef) -> IonResult<()>; - fn write_clob(self, value: impl AsRef<[u8]>) -> IonResult<()>; - fn write_blob(self, value: impl AsRef<[u8]>) -> IonResult<()>; - fn write_list FnOnce(&mut Self::ListWriter<'a>) -> IonResult<()>>( - self, - list_fn: F, - ) -> IonResult<()>; - fn write_sexp FnOnce(&mut Self::SExpWriter<'a>) -> IonResult<()>>( - self, - sexp_fn: F, - ) -> IonResult<()>; - fn write_struct< - F: for<'a> FnOnce(&mut Self::StructWriter<'a>) -> IonResult<()>, - >( - self, - struct_fn: F, - ) -> IonResult<()>; - } - } + // Ion 1.0 does not support macros + type MacroArgsWriter<'a> = Never; + + delegate_value_writer_to!(fallible closure |self_: Self| self_.encode_annotations()); } impl<'value, W: Write> ValueWriter for TextValueWriter_1_0<'value, W> { type ListWriter<'a> = TextListWriter_1_0<'value, W>; type SExpWriter<'a> = TextSExpWriter_1_0<'value, W>; type StructWriter<'a> = TextStructWriter_1_0<'value, W>; + + // Ion 1.0 does not support macros + type MacroArgsWriter<'a> = Never; fn write_null(mut self, ion_type: IonType) -> IonResult<()> { use crate::IonType::*; let null_text = match ion_type { @@ -512,4 +491,15 @@ impl<'value, W: Write> ValueWriter for TextValueWriter_1_0<'value, W> { struct_fn(&mut struct_writer)?; struct_writer.end() } + + fn write_eexp<'macro_id, F: for<'a> FnOnce(&mut Self::MacroArgsWriter<'a>) -> IonResult<()>>( + self, + macro_id: impl Into>, + _macro_fn: F, + ) -> IonResult<()> { + let id = macro_id.into(); + IonResult::encoding_error(format!( + "attempted to call macro {id:?}; macros are not supported in Ion 1.0" + )) + } } diff --git a/src/lazy/encoder/value_writer.rs b/src/lazy/encoder/value_writer.rs index 4d280444..eb9a94c2 100644 --- a/src/lazy/encoder/value_writer.rs +++ b/src/lazy/encoder/value_writer.rs @@ -1,10 +1,11 @@ use crate::lazy::encoder::value_writer::internal::MakeValueWriter; use crate::lazy::encoder::write_as_ion::{WriteAsIon, WriteAsIonValue}; +use crate::lazy::text::raw::v1_1::reader::MacroIdRef; use crate::raw_symbol_token_ref::AsRawSymbolTokenRef; use crate::{Decimal, Int, IonResult, IonType, Timestamp}; use delegate::delegate; -pub(crate) mod internal { +pub mod internal { use crate::lazy::encoder::value_writer::AnnotatableValueWriter; pub trait MakeValueWriter { @@ -12,7 +13,7 @@ pub(crate) mod internal { where Self: 'a; - fn value_writer(&mut self) -> Self::ValueWriter<'_>; + fn make_value_writer(&mut self) -> Self::ValueWriter<'_>; } } @@ -40,6 +41,7 @@ pub trait AnnotatableValueWriter: Sized { // Users can call `ValueWriter` methods on the `AnnotatedValueWriter` directly. Doing so // will implicitly call `without_annotations`. + delegate! { to self.without_annotations() { fn write_null(self, ion_type: IonType) -> IonResult<()>; @@ -69,14 +71,24 @@ pub trait AnnotatableValueWriter: Sized { struct_fn: F, ) -> IonResult<()>; fn write(self, value: impl WriteAsIonValue) -> IonResult<()>; + fn write_eexp<'macro_id, F: for<'a> FnOnce(&mut ::MacroArgsWriter<'a>) -> IonResult<()>>( + self, + _macro_id: impl Into>, + _macro_fn: F, + ) -> IonResult<()>; } } } +pub trait MacroArgsWriter: SequenceWriter { + // TODO: methods for writing tagless encodings +} + pub trait ValueWriter: Sized { type ListWriter<'a>: SequenceWriter; type SExpWriter<'a>: SequenceWriter; type StructWriter<'a>: StructWriter; + type MacroArgsWriter<'a>: MacroArgsWriter; fn write_null(self, ion_type: IonType) -> IonResult<()>; fn write_bool(self, value: bool) -> IonResult<()>; @@ -106,11 +118,127 @@ pub trait ValueWriter: Sized { struct_fn: F, ) -> IonResult<()>; + fn write_eexp<'macro_id, F: for<'a> FnOnce(&mut Self::MacroArgsWriter<'a>) -> IonResult<()>>( + self, + _macro_id: impl Into>, + _macro_fn: F, + ) -> IonResult<()>; + fn write(self, value: impl WriteAsIonValue) -> IonResult<()> { value.write_as_ion_value(self) } } +/// There are several implementations of `ValueWriter` that simply delegate calls to an expression. +/// This macro takes an expression and calls the `delegate!` proc macro on it for all of the methods +/// in the ValueWriter trait. For example: +/// ```text +/// delegate_value_writer_to!() => delegate! { to self { ...signatures ... } } +/// delegate_value_writer_to!(foo) => delegate! { to self.foo { ...signatures ... } } +/// delegate_value_writer_to!(0) => delegate! { to self.0 { ...signatures ... } } +/// delegate_value_writer_to!( +/// closure +/// |self_: Self| { +/// self_.value_writer() +/// } +/// ) => delegate! { to self.value_writer() { ...signatures ... } } +/// delegate_value_writer_to!( +/// fallible closure +/// |self_: Self| { +/// self_.returns_result() +/// } +/// ) => delegate! { to self.returns_result()? { ...signatures ... } } +/// ``` +/// +/// Notice that if no parameter expression is passed, it results in delegation to `self`, which is helpful if +/// the trait is implemented by calling methods on the type's inherent impls. +/// +/// Using this macro for such use cases centralizes the method signatures of ValueWriter, simplifying refactoring. +macro_rules! delegate_value_writer_to { + // Declarative Rust macros (those defined with `macro_rules!`) cannot work with a `self` instance + // from the enclosing context. Callers can pass `self` as an argument, but the macro's parameter + // cannot be named `self`. The `delegate!` macro circumvents this by being a proc macro, which + // does not have to adhere to the same macro hygiene rules as declarative macros. + // + // All of the patterns that this macro accepts are transformed into invocations of the final + // `fallible closure` pattern, allowing us to only write out all of the trait method signatures + // once. + // + // If no arguments are passed, trait method calls are delegated to inherent impl methods on `self`. + () => { + $crate::lazy::encoder::value_writer::delegate_value_writer_to!(closure |self_: Self| self_); + }; + // If an identifier is passed, it is treated as the name of a subfield of `self`. + ($name:ident) => { + $crate::lazy::encoder::value_writer::delegate_value_writer_to!(closure |self_: Self| self_.$name); + }; + // If a closure is provided, trait method calls are delegated to the closure's return value. + (closure $f:expr) => { + // In order to forward this call to the `fallible closure` pattern, the provided closure is + // wrapped in another closure that wraps the closure's output in IonResult::Ok(_). The + // compiler can eliminate the redundant closure call. + $crate::lazy::encoder::value_writer::delegate_value_writer_to!(fallible closure |self_: Self| { + let infallible_closure = $f; + $crate::IonResult::Ok(infallible_closure(self_)) + }); + }; + // If a fallible closure is provided, it will be called. If it returns an `Err`, the method + // will return. Otherwise, trait method calls are delegated to the `Ok(_)` value. + (fallible closure $f:expr) => { + // The `self` keyword can only be used within the `delegate!` proc macro. + delegate! { + to {let f = $f; f(self)?} { + fn write_null(self, ion_type: IonType) -> IonResult<()>; + fn write_bool(self, value: bool) -> IonResult<()>; + fn write_i64(self, value: i64) -> IonResult<()>; + fn write_int(self, value: &Int) -> IonResult<()>; + fn write_f32(self, value: f32) -> IonResult<()>; + fn write_f64(self, value: f64) -> IonResult<()>; + fn write_decimal(self, value: &Decimal) -> IonResult<()>; + fn write_timestamp(self, value: &Timestamp) -> IonResult<()>; + fn write_string(self, value: impl AsRef) -> IonResult<()>; + fn write_symbol(self, value: impl AsRawSymbolTokenRef) -> IonResult<()>; + fn write_clob(self, value: impl AsRef<[u8]>) -> IonResult<()>; + fn write_blob(self, value: impl AsRef<[u8]>) -> IonResult<()>; + fn write_list FnOnce(&mut Self::ListWriter<'a>) -> IonResult<()>>( + self, + list_fn: F, + ) -> IonResult<()>; + fn write_sexp FnOnce(&mut Self::SExpWriter<'a>) -> IonResult<()>>( + self, + sexp_fn: F, + ) -> IonResult<()>; + fn write_struct< + F: for<'a> FnOnce(&mut Self::StructWriter<'a>) -> IonResult<()>, + >( + self, + struct_fn: F, + ) -> IonResult<()>; + fn write_eexp< + 'macro_id, + F: for<'a> FnOnce(&mut Self::MacroArgsWriter<'a>) -> IonResult<()> + >( + self, + macro_id: impl Into>, + macro_fn: F + ) -> IonResult<()>; + } + } + }; +} + +/// [`delegate_value_writer_to`] allows you to omit arguments altogether, but that makes its effect +/// a bit unclear. This macro calls [`delegate_value_writer_to`] with no parameters but has a more +/// informative name. +macro_rules! delegate_value_writer_to_self { + () => { + $crate::lazy::encoder::value_writer::delegate_value_writer_to!(); + }; +} + +pub(crate) use delegate_value_writer_to; +pub(crate) use delegate_value_writer_to_self; + pub trait StructWriter { /// Writes a struct field using the provided name/value pair. fn write( @@ -136,6 +264,10 @@ macro_rules! delegate_and_return_self { } pub trait SequenceWriter: MakeValueWriter { + fn value_writer(&mut self) -> Self::ValueWriter<'_> { + ::make_value_writer(self) + } + fn annotate<'a, A: AsRawSymbolTokenRef>( &'a mut self, annotations: &'a [A], @@ -147,7 +279,7 @@ pub trait SequenceWriter: MakeValueWriter { /// Writes a value in the current context (list, s-expression, or stream) and upon success /// returns another reference to `self` to enable method chaining. fn write(&mut self, value: V) -> IonResult<&mut Self> { - value.write_as_ion(self.value_writer())?; + value.write_as_ion(self.make_value_writer())?; Ok(self) } @@ -168,10 +300,15 @@ pub trait SequenceWriter: MakeValueWriter { impl AsRef<[u8]> => write_blob, ); - // XXX: For now, it's not possible to offer versions of `write_list`, `write_sexp`, or - // `write_struct`. This is due to a point-in-time limitation in the borrow checker[1]. + // XXX: For now, it's not possible to offer versions of `write_list`, `write_sexp`, + // `write_struct` or `write_eexp`. This is due to a point-in-time limitation in the borrow checker[1]. // It is still possible to call (e.g.) // self.value_writer().list_writer(...) // as a workaround. // [1]: https://blog.rust-lang.org/2022/10/28/gats-stabilization.html#implied-static-requirement-from-higher-ranked-trait-bounds + // + // The ValueWriter implementation of these methods moves `self`. In contrast, all of the methods + // in the SequenceWriter interface take `&mut self`, which adds another lifetime to the mix. The + // borrow checker is not currently able to tell that `&mut self`'s lifetime will outlive the + // closure argument's. } diff --git a/src/lazy/encoder/write_as_ion.rs b/src/lazy/encoder/write_as_ion.rs index f8b35f7c..7df19176 100644 --- a/src/lazy/encoder/write_as_ion.rs +++ b/src/lazy/encoder/write_as_ion.rs @@ -69,6 +69,7 @@ macro_rules! impl_write_as_ion_value { // The caller defined an expression to write other than `self` (e.g. `*self`, `*self.0`, etc) ($target_type:ty => $method:ident with $self:ident as $value:expr, $($rest:tt)*) => { impl WriteAsIonValue for $target_type { + #[inline] fn write_as_ion_value(&$self, writer: V) -> IonResult<()> { writer.$method($value) } @@ -78,6 +79,7 @@ macro_rules! impl_write_as_ion_value { // We're writing the expression `self` ($target_type:ty => $method:ident, $($rest:tt)*) => { impl WriteAsIonValue for $target_type { + #[inline] fn write_as_ion_value(&self, writer: V) -> IonResult<()> { writer.$method(self) } @@ -106,12 +108,14 @@ impl_write_as_ion_value!( ); impl<'b> WriteAsIonValue for RawSymbolTokenRef<'b> { + #[inline] fn write_as_ion_value(&self, writer: V) -> IonResult<()> { writer.write_symbol(self) } } impl<'b> WriteAsIonValue for SymbolRef<'b> { + #[inline] fn write_as_ion_value(&self, writer: V) -> IonResult<()> { writer.write_symbol(self) } @@ -124,6 +128,7 @@ impl WriteAsIonValue for [u8; N] { } impl WriteAsIonValue for &T { + #[inline] fn write_as_ion_value(&self, writer: V) -> IonResult<()> { (*self).write_as_ion_value(writer) } diff --git a/src/lazy/never.rs b/src/lazy/never.rs index 9281046d..fd80001c 100644 --- a/src/lazy/never.rs +++ b/src/lazy/never.rs @@ -1,9 +1,15 @@ use std::fmt::Debug; use crate::lazy::decoder::{LazyDecoder, LazyRawValueExpr}; +use crate::lazy::encoder::value_writer::internal::MakeValueWriter; +use crate::lazy::encoder::value_writer::{ + AnnotatableValueWriter, MacroArgsWriter, SequenceWriter, StructWriter, ValueWriter, +}; +use crate::lazy::encoder::write_as_ion::WriteAsIon; use crate::lazy::expanded::macro_evaluator::{MacroExpr, RawEExpression}; use crate::lazy::text::raw::v1_1::reader::MacroIdRef; -use crate::IonResult; +use crate::raw_symbol_token_ref::AsRawSymbolTokenRef; +use crate::{Decimal, Int, IonResult, IonType, Timestamp}; /// An uninhabited type that signals to the compiler that related code paths are not reachable. #[derive(Debug, Copy, Clone)] @@ -31,3 +37,128 @@ impl<'top, D: LazyDecoder> From for MacroExpr<'top, D> { unreachable!("macro in Ion 1.0 (method: into)") } } + +impl AnnotatableValueWriter for Never { + type ValueWriter = Never; + type AnnotatedValueWriter<'a, SymbolType: AsRawSymbolTokenRef + 'a> = Never where Self: 'a; + + fn with_annotations<'a, SymbolType: AsRawSymbolTokenRef>( + self, + _annotations: &'a [SymbolType], + ) -> Self::AnnotatedValueWriter<'a, SymbolType> + where + Self: 'a, + { + unreachable!("AnnotatableValueWriter::with_annotations in Never") + } + + fn without_annotations(self) -> Self::ValueWriter { + unreachable!("AnnotatableValueWriter::without_annotations in Never") + } +} + +impl SequenceWriter for Never {} + +impl StructWriter for Never { + fn write( + &mut self, + _name: A, + _value: V, + ) -> IonResult<&mut Self> { + unreachable!("StructWriter::write in Never") + } +} + +impl MakeValueWriter for Never { + type ValueWriter<'a> = Never where Self: 'a; + + fn make_value_writer(&mut self) -> Self::ValueWriter<'_> { + unreachable!("MakeValueWriter::value_writer in Never") + } +} + +impl MacroArgsWriter for Never {} + +impl ValueWriter for Never { + type ListWriter<'a> = Never; + type SExpWriter<'a> = Never; + type StructWriter<'a> = Never; + type MacroArgsWriter<'a> = Never; + + fn write_null(self, _ion_type: IonType) -> IonResult<()> { + unreachable!("ValueWriter::write_null in Never") + } + + fn write_bool(self, _value: bool) -> IonResult<()> { + unreachable!("ValueWriter::write_bool in Never") + } + + fn write_i64(self, _value: i64) -> IonResult<()> { + unreachable!("ValueWriter::write_i64 in Never") + } + + fn write_int(self, _value: &Int) -> IonResult<()> { + unreachable!("ValueWriter::write_int in Never") + } + + fn write_f32(self, _value: f32) -> IonResult<()> { + unreachable!("ValueWriter::write_f32 in Never") + } + + fn write_f64(self, _value: f64) -> IonResult<()> { + unreachable!("ValueWriter::write_f64 in Never") + } + + fn write_decimal(self, _value: &Decimal) -> IonResult<()> { + unreachable!("ValueWriter::write_decimal in Never") + } + + fn write_timestamp(self, _value: &Timestamp) -> IonResult<()> { + unreachable!("ValueWriter::write_timestamp in Never") + } + + fn write_string(self, _value: impl AsRef) -> IonResult<()> { + unreachable!("ValueWriter::write_string in Never") + } + + fn write_symbol(self, _value: impl AsRawSymbolTokenRef) -> IonResult<()> { + unreachable!("ValueWriter::write_symbol in Never") + } + + fn write_clob(self, _value: impl AsRef<[u8]>) -> IonResult<()> { + unreachable!("ValueWriter::write_clob in Never") + } + + fn write_blob(self, _value: impl AsRef<[u8]>) -> IonResult<()> { + unreachable!("ValueWriter::write_blob in Never") + } + + fn write_list FnOnce(&mut Self::ListWriter<'a>) -> IonResult<()>>( + self, + _list_fn: F, + ) -> IonResult<()> { + unreachable!("ValueWriter::write_list in Never") + } + + fn write_sexp FnOnce(&mut Self::SExpWriter<'a>) -> IonResult<()>>( + self, + _sexp_fn: F, + ) -> IonResult<()> { + unreachable!("ValueWriter::write_sexp in Never") + } + + fn write_struct FnOnce(&mut Self::StructWriter<'a>) -> IonResult<()>>( + self, + _struct_fn: F, + ) -> IonResult<()> { + unreachable!("ValueWriter::write_struct in Never") + } + + fn write_eexp<'macro_id, F: for<'a> FnOnce(&mut Self::MacroArgsWriter<'a>) -> IonResult<()>>( + self, + _macro_id: impl Into>, + _macro_fn: F, + ) -> IonResult<()> { + unreachable!("ValueWriter::write_eexp in Never") + } +} diff --git a/src/lazy/text/raw/v1_1/reader.rs b/src/lazy/text/raw/v1_1/reader.rs index cd0baec4..37f70ff9 100644 --- a/src/lazy/text/raw/v1_1/reader.rs +++ b/src/lazy/text/raw/v1_1/reader.rs @@ -34,6 +34,7 @@ pub type MacroAddress = usize; /// The index at which a value expression can be found within a template's body. pub type TemplateBodyExprAddress = usize; + #[derive(Copy, Clone, Debug, PartialEq)] pub enum MacroIdRef<'data> { LocalName(&'data str), @@ -41,6 +42,18 @@ pub enum MacroIdRef<'data> { // TODO: Addresses and qualified names } +impl<'data> From for MacroIdRef<'data> { + fn from(address: usize) -> Self { + MacroIdRef::LocalAddress(address) + } +} + +impl<'data> From<&'data str> for MacroIdRef<'data> { + fn from(name: &'data str) -> Self { + MacroIdRef::LocalName(name) + } +} + #[derive(Copy, Clone)] pub struct RawTextEExpression_1_1<'top> { pub(crate) encoded_expr: EncodedTextMacroInvocation, diff --git a/src/reader.rs b/src/reader.rs index 0486fdf6..7bf38544 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -22,8 +22,6 @@ use std::fmt::{Display, Formatter}; use crate::types::Str; /// Configures and constructs new instances of [Reader]. pub struct ReaderBuilder { - // This will be set to to `Some` catalog when the reader builder is created with catalog information. - // Default value for this field will be set to `None`. catalog: Box, } @@ -31,6 +29,8 @@ impl ReaderBuilder { /// Constructs a [ReaderBuilder] pre-populated with common default settings. pub fn new() -> ReaderBuilder { ReaderBuilder { + // `EmptyCatalog` is a zero-sized type; creating a Box does not actually + // cause a heap allocation. catalog: Box::::default(), } } diff --git a/src/result/encoding_error.rs b/src/result/encoding_error.rs index ea75dc55..eecf786f 100644 --- a/src/result/encoding_error.rs +++ b/src/result/encoding_error.rs @@ -1,3 +1,4 @@ +use ice_code::ice as cold_path; use std::borrow::Cow; use thiserror::Error; @@ -11,7 +12,7 @@ pub struct EncodingError { impl EncodingError { pub(crate) fn new(description: impl Into>) -> Self { EncodingError { - description: description.into(), + description: cold_path! { encoding_error => description.into()}, } } } diff --git a/src/result/io_error.rs b/src/result/io_error.rs index e344f4be..266bd658 100644 --- a/src/result/io_error.rs +++ b/src/result/io_error.rs @@ -1,14 +1,22 @@ use std::io; +use std::io::Error; use thiserror::Error; /// Indicates that a read or write operation failed due to an I/O error. #[derive(Debug, Error)] #[error("{source:?}")] pub struct IoError { - #[from] source: io::Error, } +impl From for IoError { + #[cold] + #[inline(never)] + fn from(value: Error) -> Self { + IoError { source: value } + } +} + impl IoError { pub fn source(&self) -> &io::Error { &self.source