-
Notifications
You must be signed in to change notification settings - Fork 36
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implements writing e-expressions in binary 1.1 #722
Changes from all commits
7788449
859105f
1ecbc0b
5b9b936
d7fb96a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -71,8 +71,9 @@ num-integer = "0.1.44" | |
num-traits = "0.2" | ||
arrayvec = "0.7" | ||
smallvec = {version ="1.9.0", features = ["const_generics"]} | ||
bumpalo = {version = "3.14.0", features = ["collections", "std"]} | ||
bumpalo = {version = "3.15.3", features = ["collections", "std"]} | ||
digest = { version = "0.9", optional = true } | ||
ice_code = "0.1.4" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are you sure we can trust the author of this dependency? 😉 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems pretty shady to me! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe that's why it's so cold. |
||
sha2 = { version = "0.9", optional = true } | ||
serde = { version = "1.0", features = ["derive"], optional = true } | ||
serde_with = { version = "2.0", optional = true } | ||
|
@@ -86,11 +87,16 @@ test-generator = "0.3" | |
memmap = "0.7.0" | ||
criterion = "0.5.1" | ||
rand = "0.8.5" | ||
tempfile = "3.10.0" | ||
|
||
[[bench]] | ||
name = "read_many_structs" | ||
harness = false | ||
|
||
[[bench]] | ||
name = "write_many_structs" | ||
harness = false | ||
|
||
[[bench]] | ||
name = "encoding_primitives" | ||
harness = false | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -142,7 +142,6 @@ pub fn criterion_benchmark(c: &mut Criterion) { | |
} | ||
|
||
fn roundtrip_var_uint_test(unsigned_values: &[u64]) -> IonResult<Vec<u8>> { | ||
println!("Roundtripping unsigned values as VarUInts to check for correctness."); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ These |
||
let mut encoded_values_buffer = Vec::new(); | ||
for value in unsigned_values { | ||
VarUInt::write_u64(&mut encoded_values_buffer, *value)?; | ||
|
@@ -159,7 +158,6 @@ fn roundtrip_var_uint_test(unsigned_values: &[u64]) -> IonResult<Vec<u8>> { | |
} | ||
|
||
fn roundtrip_var_int_test(signed_values: &[i64]) -> IonResult<Vec<u8>> { | ||
println!("Roundtripping signed values as VarInts to check for correctness."); | ||
let mut encoded_values_buffer = Vec::new(); | ||
for value in signed_values { | ||
VarInt::write_i64(&mut encoded_values_buffer, *value)?; | ||
|
@@ -176,7 +174,6 @@ fn roundtrip_var_int_test(signed_values: &[i64]) -> IonResult<Vec<u8>> { | |
} | ||
|
||
fn roundtrip_flex_uint_test(unsigned_values: &[u64]) -> IonResult<Vec<u8>> { | ||
println!("Roundtripping unsigned values as FlexUInts to check for correctness."); | ||
let mut encoded_values_buffer = Vec::new(); | ||
for value in unsigned_values { | ||
FlexUInt::write_u64(&mut encoded_values_buffer, *value)?; | ||
|
@@ -193,7 +190,6 @@ fn roundtrip_flex_uint_test(unsigned_values: &[u64]) -> IonResult<Vec<u8>> { | |
} | ||
|
||
fn roundtrip_flex_int_test(signed_values: &[i64]) -> IonResult<Vec<u8>> { | ||
println!("Roundtripping signed values as FlexInts to check for correctness."); | ||
let mut encoded_values_buffer = Vec::new(); | ||
for value in signed_values { | ||
FlexInt::write_i64(&mut encoded_values_buffer, *value)?; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,273 @@ | ||
use criterion::{black_box, criterion_group, criterion_main, Criterion}; | ||
use ion_rs::lazy::encoder::binary::v1_0::writer::LazyRawBinaryWriter_1_0; | ||
use nom::AsBytes; | ||
|
||
use ion_rs::lazy::encoder::binary::v1_1::writer::LazyRawBinaryWriter_1_1; | ||
use ion_rs::lazy::encoder::value_writer::{AnnotatableValueWriter, SequenceWriter}; | ||
use ion_rs::lazy::encoder::value_writer::{StructWriter, ValueWriter}; | ||
use ion_rs::RawSymbolTokenRef; | ||
|
||
fn write_struct_with_string_values(value_writer: impl ValueWriter) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ Each of these methods takes an |
||
value_writer | ||
.write_struct(|fields| { | ||
fields | ||
// $10 = timestamp | ||
.write(10, black_box(1670446800245i64))? | ||
// $11 = threadId | ||
.write(11, black_box(418))? | ||
// $12 = threadName | ||
.write(12, black_box("scheduler-thread-6"))? | ||
// $13 = loggerName | ||
.write(13, black_box("com.example.organization.product.component.ClassName"))? | ||
// $14 = logLevel | ||
.write(14, black_box("INFO"))? | ||
// $15 = format | ||
.write(15, black_box("Request status: {} Client ID: {} Client Host: {} Client Region: {} Timestamp: {}"))? | ||
// $16 = parameters | ||
.write(16, &[ | ||
black_box("SUCCESS"), | ||
black_box("example-client-1"), | ||
black_box("aws-us-east-5f-18b4fa"), | ||
black_box("region 4"), | ||
black_box("2022-12-07T20:59:59.744000Z"), | ||
])?; | ||
Ok(()) | ||
}).unwrap(); | ||
} | ||
|
||
fn write_struct_with_symbol_values(value_writer: impl ValueWriter) { | ||
value_writer | ||
.write_struct(|fields| { | ||
fields | ||
// $10 = timestamp | ||
.write(10, black_box(1670446800245i64))? | ||
// $11 = threadId | ||
.write(11, black_box(418))? | ||
// $12 = threadName, $17 = scheduler-thread-6 | ||
.write(12, symbol_id(black_box(17)))? | ||
// $13 = loggerName, $18 = com.example.organization.product.component.ClassName | ||
.write(13, symbol_id(black_box(18)))? | ||
// $14 = logLevel, $19 = INFO | ||
.write(14, symbol_id(black_box(19)))? | ||
// $15 = format, $20 = Request status: {} Client ID: {} Client Host: {} Client Region: {} Timestamp: {} | ||
.write(15, symbol_id(black_box(20)))? | ||
// $16 = parameters | ||
.write( | ||
16, | ||
&[ | ||
// $21 = SUCCESS | ||
symbol_id(black_box(21)), | ||
// $22 = example-client-1 | ||
symbol_id(black_box(22)), | ||
// $23 = aws-us-east-5f-18b4fa | ||
symbol_id(black_box(23)), | ||
// $24 = region 4 | ||
symbol_id(black_box(24)), | ||
// $25 = 2022-12-07T20:59:59.744000Z (string, not timestamp) | ||
symbol_id(black_box(25)), | ||
], | ||
)?; | ||
Ok(()) | ||
}) | ||
.unwrap(); | ||
} | ||
|
||
fn write_eexp_with_symbol_values(value_writer: impl ValueWriter) { | ||
value_writer | ||
.write_eexp(0, |args| { | ||
args.write(black_box(1670446800245i64))? // timestamp | ||
.write(black_box(418))? // thread_id | ||
// These are still strings because they're so short that using symbols to represent | ||
// them wouldn't be beneficial. | ||
.write(black_box("6"))? // thread_name | ||
.write(black_box("1"))? // client_num | ||
.write(symbol_id(black_box(10)))? // host_id: "18b4fa" ($10) | ||
.value_writer() | ||
.without_annotations() | ||
.write_eexp(1, |args| { | ||
args | ||
// $11 = region 4 | ||
.write(symbol_id(black_box(11)))? | ||
// $12 = "2022-12-07T20:59:59.744000Z" (string, not timestamp) | ||
.write(symbol_id(black_box(12)))?; | ||
Ok(()) | ||
}) | ||
.unwrap(); | ||
Ok(()) | ||
}) | ||
.unwrap(); | ||
} | ||
|
||
fn write_eexp_with_string_values(value_writer: impl ValueWriter) { | ||
value_writer | ||
.write_eexp(0, |args| { | ||
args.write(black_box(1670446800245i64))? // timestamp | ||
.write(black_box(418))? // thread_id | ||
.write(black_box("6"))? // thread_name | ||
.write(black_box("1"))? // client_num | ||
.write(black_box("18b4fa"))? // host_id | ||
.value_writer() | ||
.without_annotations() | ||
.write_eexp(1, |args| { | ||
args.write(black_box("region 4"))? | ||
.write(black_box("2022-12-07T20:59:59.744000Z"))?; | ||
Ok(()) | ||
})?; | ||
Ok(()) | ||
}) | ||
.unwrap(); | ||
} | ||
|
||
fn symbol_id(sid: usize) -> RawSymbolTokenRef<'static> { | ||
RawSymbolTokenRef::SymbolId(sid) | ||
} | ||
|
||
pub fn criterion_benchmark(c: &mut Criterion) { | ||
let mut buffer = Vec::with_capacity(1024 * 1024); | ||
|
||
let mut binary_1_0_group = c.benchmark_group("binary 1.0"); | ||
binary_1_0_group.bench_function("write structs with string values", |b| { | ||
b.iter(|| { | ||
buffer.clear(); | ||
let mut writer = LazyRawBinaryWriter_1_0::new(&mut buffer).unwrap(); | ||
write_struct_with_string_values(writer.value_writer().without_annotations()); | ||
writer.flush().unwrap(); | ||
black_box(buffer.as_bytes()); | ||
}); | ||
}); | ||
// The runner allows the user to specify which benchmarks to run. If the benchmark above ran, | ||
// then the buffer will not be empty. | ||
// This print statement cannot live within the benchmark itself, as both `bench_function` and | ||
// `iter` are called several times. | ||
if !buffer.is_empty() { | ||
println!("\nencoded 1.0 size with string values: {}\n", buffer.len()); | ||
buffer.clear(); | ||
} | ||
|
||
binary_1_0_group.bench_function("write structs with symbol values", |b| { | ||
b.iter(|| { | ||
buffer.clear(); | ||
let mut writer = LazyRawBinaryWriter_1_0::new(&mut buffer).unwrap(); | ||
write_struct_with_symbol_values(writer.value_writer().without_annotations()); | ||
writer.flush().unwrap(); | ||
|
||
black_box(buffer.as_bytes()); | ||
}); | ||
}); | ||
if !buffer.is_empty() { | ||
println!("\nencoded 1.0 size with symbol values: {}\n", buffer.len()); | ||
buffer.clear() | ||
} | ||
binary_1_0_group.finish(); | ||
|
||
let mut binary_1_1_group = c.benchmark_group("binary 1.1"); | ||
binary_1_1_group.bench_function("write structs with string values", |b| { | ||
b.iter(|| { | ||
buffer.clear(); | ||
let mut writer = LazyRawBinaryWriter_1_1::new(&mut buffer).unwrap(); | ||
write_struct_with_string_values(writer.value_writer().without_annotations()); | ||
writer.flush().unwrap(); | ||
black_box(buffer.as_bytes()); | ||
}); | ||
}); | ||
if !buffer.is_empty() { | ||
println!("\nencoded 1.1 size with string values: {}\n", buffer.len()); | ||
buffer.clear() | ||
} | ||
|
||
binary_1_1_group.bench_function("write structs with symbol values", |b| { | ||
b.iter(|| { | ||
buffer.clear(); | ||
let mut writer = LazyRawBinaryWriter_1_1::new(&mut buffer).unwrap(); | ||
write_struct_with_symbol_values(writer.value_writer().without_annotations()); | ||
writer.flush().unwrap(); | ||
|
||
black_box(buffer.as_bytes()); | ||
}); | ||
}); | ||
if !buffer.is_empty() { | ||
println!("\nencoded 1.1 size with symbol values: {}\n", buffer.len()); | ||
buffer.clear() | ||
} | ||
|
||
binary_1_1_group.bench_function("write delimited structs with string values", |b| { | ||
b.iter(|| { | ||
buffer.clear(); | ||
let mut writer = LazyRawBinaryWriter_1_1::new(&mut buffer).unwrap(); | ||
write_struct_with_string_values( | ||
writer | ||
.value_writer() | ||
.with_delimited_containers() | ||
.without_annotations(), | ||
); | ||
writer.flush().unwrap(); | ||
black_box(buffer.as_bytes()); | ||
}); | ||
}); | ||
if !buffer.is_empty() { | ||
println!( | ||
"\nencoded 1.1 size, delimited structs with string values: {}\n", | ||
buffer.len() | ||
); | ||
buffer.clear() | ||
} | ||
|
||
binary_1_1_group.bench_function("write delimited structs with symbol values", |b| { | ||
b.iter(|| { | ||
buffer.clear(); | ||
let mut writer = LazyRawBinaryWriter_1_1::new(&mut buffer).unwrap(); | ||
write_struct_with_symbol_values( | ||
writer | ||
.value_writer() | ||
.with_delimited_containers() | ||
.without_annotations(), | ||
); | ||
writer.flush().unwrap(); | ||
|
||
black_box(buffer.as_bytes()); | ||
}); | ||
}); | ||
if !buffer.is_empty() { | ||
println!("\nencoded 1.1 size with symbol values: {}\n", buffer.len()); | ||
buffer.clear() | ||
} | ||
|
||
binary_1_1_group.bench_function("write structs with string values using macros", |b| { | ||
b.iter(|| { | ||
buffer.clear(); | ||
let mut writer = LazyRawBinaryWriter_1_1::new(&mut buffer).unwrap(); | ||
write_eexp_with_string_values(writer.value_writer().without_annotations()); | ||
writer.flush().unwrap(); | ||
black_box(buffer.as_bytes()); | ||
}); | ||
}); | ||
if !buffer.is_empty() { | ||
println!( | ||
"\nencoded 1.1 size with string values using macros: {}\n", | ||
buffer.len() | ||
); | ||
buffer.clear() | ||
} | ||
|
||
binary_1_1_group.bench_function("write structs with symbol values using macros", |b| { | ||
b.iter(|| { | ||
buffer.clear(); | ||
let mut writer = LazyRawBinaryWriter_1_1::new(&mut buffer).unwrap(); | ||
write_eexp_with_symbol_values(writer.value_writer().without_annotations()); | ||
writer.flush().unwrap(); | ||
black_box(buffer.as_bytes()); | ||
}); | ||
}); | ||
if !buffer.is_empty() { | ||
println!( | ||
"\nencoded 1.1 size with symbol values using macros: {}\n", | ||
buffer.len() | ||
); | ||
buffer.clear() | ||
} | ||
|
||
binary_1_1_group.finish(); | ||
} | ||
|
||
criterion_group!(benches, criterion_benchmark); | ||
criterion_main!(benches); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🗺️ Version
3.15.3
includesVec
optimizations that led to substantial speed-ups:Vec::extend_from_slice
optimized forT: Copy
fitzgen/bumpalo#236reserve
fn structure to improve inlining fitzgen/bumpalo#239