Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implements writing e-expressions in binary 1.1 #722

Merged
merged 5 commits into from
Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,9 @@ num-integer = "0.1.44"
num-traits = "0.2"
arrayvec = "0.7"
smallvec = {version ="1.9.0", features = ["const_generics"]}
bumpalo = {version = "3.14.0", features = ["collections", "std"]}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bumpalo = {version = "3.15.3", features = ["collections", "std"]}
digest = { version = "0.9", optional = true }
ice_code = "0.1.4"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you sure we can trust the author of this dependency? 😉

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems pretty shady to me!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe that's why it's so cold.

sha2 = { version = "0.9", optional = true }
serde = { version = "1.0", features = ["derive"], optional = true }
serde_with = { version = "2.0", optional = true }
Expand All @@ -86,11 +87,16 @@ test-generator = "0.3"
memmap = "0.7.0"
criterion = "0.5.1"
rand = "0.8.5"
tempfile = "3.10.0"

[[bench]]
name = "read_many_structs"
harness = false

[[bench]]
name = "write_many_structs"
harness = false

[[bench]]
name = "encoding_primitives"
harness = false
Expand Down
4 changes: 0 additions & 4 deletions benches/encoding_primitives.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,6 @@ pub fn criterion_benchmark(c: &mut Criterion) {
}

fn roundtrip_var_uint_test(unsigned_values: &[u64]) -> IonResult<Vec<u8>> {
println!("Roundtripping unsigned values as VarUInts to check for correctness.");
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ These println! statements were being run even when the associated benchmark was not, leading to weird output.

let mut encoded_values_buffer = Vec::new();
for value in unsigned_values {
VarUInt::write_u64(&mut encoded_values_buffer, *value)?;
Expand All @@ -159,7 +158,6 @@ fn roundtrip_var_uint_test(unsigned_values: &[u64]) -> IonResult<Vec<u8>> {
}

fn roundtrip_var_int_test(signed_values: &[i64]) -> IonResult<Vec<u8>> {
println!("Roundtripping signed values as VarInts to check for correctness.");
let mut encoded_values_buffer = Vec::new();
for value in signed_values {
VarInt::write_i64(&mut encoded_values_buffer, *value)?;
Expand All @@ -176,7 +174,6 @@ fn roundtrip_var_int_test(signed_values: &[i64]) -> IonResult<Vec<u8>> {
}

fn roundtrip_flex_uint_test(unsigned_values: &[u64]) -> IonResult<Vec<u8>> {
println!("Roundtripping unsigned values as FlexUInts to check for correctness.");
let mut encoded_values_buffer = Vec::new();
for value in unsigned_values {
FlexUInt::write_u64(&mut encoded_values_buffer, *value)?;
Expand All @@ -193,7 +190,6 @@ fn roundtrip_flex_uint_test(unsigned_values: &[u64]) -> IonResult<Vec<u8>> {
}

fn roundtrip_flex_int_test(signed_values: &[i64]) -> IonResult<Vec<u8>> {
println!("Roundtripping signed values as FlexInts to check for correctness.");
let mut encoded_values_buffer = Vec::new();
for value in signed_values {
FlexInt::write_i64(&mut encoded_values_buffer, *value)?;
Expand Down
273 changes: 273 additions & 0 deletions benches/write_many_structs.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,273 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use ion_rs::lazy::encoder::binary::v1_0::writer::LazyRawBinaryWriter_1_0;
use nom::AsBytes;

use ion_rs::lazy::encoder::binary::v1_1::writer::LazyRawBinaryWriter_1_1;
use ion_rs::lazy::encoder::value_writer::{AnnotatableValueWriter, SequenceWriter};
use ion_rs::lazy::encoder::value_writer::{StructWriter, ValueWriter};
use ion_rs::RawSymbolTokenRef;

fn write_struct_with_string_values(value_writer: impl ValueWriter) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ Each of these methods takes an impl ValueWriter, allowing us to pass in any writer in the set of {text, binary} x {v1.0, v1.1} and ensuring that the same logic is used for each.

value_writer
.write_struct(|fields| {
fields
// $10 = timestamp
.write(10, black_box(1670446800245i64))?
// $11 = threadId
.write(11, black_box(418))?
// $12 = threadName
.write(12, black_box("scheduler-thread-6"))?
// $13 = loggerName
.write(13, black_box("com.example.organization.product.component.ClassName"))?
// $14 = logLevel
.write(14, black_box("INFO"))?
// $15 = format
.write(15, black_box("Request status: {} Client ID: {} Client Host: {} Client Region: {} Timestamp: {}"))?
// $16 = parameters
.write(16, &[
black_box("SUCCESS"),
black_box("example-client-1"),
black_box("aws-us-east-5f-18b4fa"),
black_box("region 4"),
black_box("2022-12-07T20:59:59.744000Z"),
])?;
Ok(())
}).unwrap();
}

fn write_struct_with_symbol_values(value_writer: impl ValueWriter) {
value_writer
.write_struct(|fields| {
fields
// $10 = timestamp
.write(10, black_box(1670446800245i64))?
// $11 = threadId
.write(11, black_box(418))?
// $12 = threadName, $17 = scheduler-thread-6
.write(12, symbol_id(black_box(17)))?
// $13 = loggerName, $18 = com.example.organization.product.component.ClassName
.write(13, symbol_id(black_box(18)))?
// $14 = logLevel, $19 = INFO
.write(14, symbol_id(black_box(19)))?
// $15 = format, $20 = Request status: {} Client ID: {} Client Host: {} Client Region: {} Timestamp: {}
.write(15, symbol_id(black_box(20)))?
// $16 = parameters
.write(
16,
&[
// $21 = SUCCESS
symbol_id(black_box(21)),
// $22 = example-client-1
symbol_id(black_box(22)),
// $23 = aws-us-east-5f-18b4fa
symbol_id(black_box(23)),
// $24 = region 4
symbol_id(black_box(24)),
// $25 = 2022-12-07T20:59:59.744000Z (string, not timestamp)
symbol_id(black_box(25)),
],
)?;
Ok(())
})
.unwrap();
}

fn write_eexp_with_symbol_values(value_writer: impl ValueWriter) {
value_writer
.write_eexp(0, |args| {
args.write(black_box(1670446800245i64))? // timestamp
.write(black_box(418))? // thread_id
// These are still strings because they're so short that using symbols to represent
// them wouldn't be beneficial.
.write(black_box("6"))? // thread_name
.write(black_box("1"))? // client_num
.write(symbol_id(black_box(10)))? // host_id: "18b4fa" ($10)
.value_writer()
.without_annotations()
.write_eexp(1, |args| {
args
// $11 = region 4
.write(symbol_id(black_box(11)))?
// $12 = "2022-12-07T20:59:59.744000Z" (string, not timestamp)
.write(symbol_id(black_box(12)))?;
Ok(())
})
.unwrap();
Ok(())
})
.unwrap();
}

fn write_eexp_with_string_values(value_writer: impl ValueWriter) {
value_writer
.write_eexp(0, |args| {
args.write(black_box(1670446800245i64))? // timestamp
.write(black_box(418))? // thread_id
.write(black_box("6"))? // thread_name
.write(black_box("1"))? // client_num
.write(black_box("18b4fa"))? // host_id
.value_writer()
.without_annotations()
.write_eexp(1, |args| {
args.write(black_box("region 4"))?
.write(black_box("2022-12-07T20:59:59.744000Z"))?;
Ok(())
})?;
Ok(())
})
.unwrap();
}

fn symbol_id(sid: usize) -> RawSymbolTokenRef<'static> {
RawSymbolTokenRef::SymbolId(sid)
}

pub fn criterion_benchmark(c: &mut Criterion) {
let mut buffer = Vec::with_capacity(1024 * 1024);

let mut binary_1_0_group = c.benchmark_group("binary 1.0");
binary_1_0_group.bench_function("write structs with string values", |b| {
b.iter(|| {
buffer.clear();
let mut writer = LazyRawBinaryWriter_1_0::new(&mut buffer).unwrap();
write_struct_with_string_values(writer.value_writer().without_annotations());
writer.flush().unwrap();
black_box(buffer.as_bytes());
});
});
// The runner allows the user to specify which benchmarks to run. If the benchmark above ran,
// then the buffer will not be empty.
// This print statement cannot live within the benchmark itself, as both `bench_function` and
// `iter` are called several times.
if !buffer.is_empty() {
println!("\nencoded 1.0 size with string values: {}\n", buffer.len());
buffer.clear();
}

binary_1_0_group.bench_function("write structs with symbol values", |b| {
b.iter(|| {
buffer.clear();
let mut writer = LazyRawBinaryWriter_1_0::new(&mut buffer).unwrap();
write_struct_with_symbol_values(writer.value_writer().without_annotations());
writer.flush().unwrap();

black_box(buffer.as_bytes());
});
});
if !buffer.is_empty() {
println!("\nencoded 1.0 size with symbol values: {}\n", buffer.len());
buffer.clear()
}
binary_1_0_group.finish();

let mut binary_1_1_group = c.benchmark_group("binary 1.1");
binary_1_1_group.bench_function("write structs with string values", |b| {
b.iter(|| {
buffer.clear();
let mut writer = LazyRawBinaryWriter_1_1::new(&mut buffer).unwrap();
write_struct_with_string_values(writer.value_writer().without_annotations());
writer.flush().unwrap();
black_box(buffer.as_bytes());
});
});
if !buffer.is_empty() {
println!("\nencoded 1.1 size with string values: {}\n", buffer.len());
buffer.clear()
}

binary_1_1_group.bench_function("write structs with symbol values", |b| {
b.iter(|| {
buffer.clear();
let mut writer = LazyRawBinaryWriter_1_1::new(&mut buffer).unwrap();
write_struct_with_symbol_values(writer.value_writer().without_annotations());
writer.flush().unwrap();

black_box(buffer.as_bytes());
});
});
if !buffer.is_empty() {
println!("\nencoded 1.1 size with symbol values: {}\n", buffer.len());
buffer.clear()
}

binary_1_1_group.bench_function("write delimited structs with string values", |b| {
b.iter(|| {
buffer.clear();
let mut writer = LazyRawBinaryWriter_1_1::new(&mut buffer).unwrap();
write_struct_with_string_values(
writer
.value_writer()
.with_delimited_containers()
.without_annotations(),
);
writer.flush().unwrap();
black_box(buffer.as_bytes());
});
});
if !buffer.is_empty() {
println!(
"\nencoded 1.1 size, delimited structs with string values: {}\n",
buffer.len()
);
buffer.clear()
}

binary_1_1_group.bench_function("write delimited structs with symbol values", |b| {
b.iter(|| {
buffer.clear();
let mut writer = LazyRawBinaryWriter_1_1::new(&mut buffer).unwrap();
write_struct_with_symbol_values(
writer
.value_writer()
.with_delimited_containers()
.without_annotations(),
);
writer.flush().unwrap();

black_box(buffer.as_bytes());
});
});
if !buffer.is_empty() {
println!("\nencoded 1.1 size with symbol values: {}\n", buffer.len());
buffer.clear()
}

binary_1_1_group.bench_function("write structs with string values using macros", |b| {
b.iter(|| {
buffer.clear();
let mut writer = LazyRawBinaryWriter_1_1::new(&mut buffer).unwrap();
write_eexp_with_string_values(writer.value_writer().without_annotations());
writer.flush().unwrap();
black_box(buffer.as_bytes());
});
});
if !buffer.is_empty() {
println!(
"\nencoded 1.1 size with string values using macros: {}\n",
buffer.len()
);
buffer.clear()
}

binary_1_1_group.bench_function("write structs with symbol values using macros", |b| {
b.iter(|| {
buffer.clear();
let mut writer = LazyRawBinaryWriter_1_1::new(&mut buffer).unwrap();
write_eexp_with_symbol_values(writer.value_writer().without_annotations());
writer.flush().unwrap();
black_box(buffer.as_bytes());
});
});
if !buffer.is_empty() {
println!(
"\nencoded 1.1 size with symbol values using macros: {}\n",
buffer.len()
);
buffer.clear()
}

binary_1_1_group.finish();
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
Loading
Loading