From 702393c0a2319843227adcc8fabc6e61129af6a4 Mon Sep 17 00:00:00 2001 From: Parker Timmerman Date: Mon, 3 Jan 2022 14:02:41 -0500 Subject: [PATCH 01/18] branch start, some scafolding --- compact_str/Cargo.toml | 1 + compact_str/src/features/mod.rs | 6 ++++++ compact_str/src/features/pb_jelly.rs | 18 ++++++++++++++++++ compact_str/src/{ => features}/serde.rs | 2 +- compact_str/src/lib.rs | 5 ++--- 5 files changed, 28 insertions(+), 4 deletions(-) create mode 100644 compact_str/src/features/mod.rs create mode 100644 compact_str/src/features/pb_jelly.rs rename compact_str/src/{ => features}/serde.rs (98%) diff --git a/compact_str/Cargo.toml b/compact_str/Cargo.toml index 0ca96f03..20f13268 100644 --- a/compact_str/Cargo.toml +++ b/compact_str/Cargo.toml @@ -14,6 +14,7 @@ categories = ["encoding", "parsing", "memory-management", "text-processing"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +pb-jelly = { version = "0.0.11" } serde = { version = "1", optional = true } static_assertions = "1" diff --git a/compact_str/src/features/mod.rs b/compact_str/src/features/mod.rs new file mode 100644 index 00000000..ecda7a54 --- /dev/null +++ b/compact_str/src/features/mod.rs @@ -0,0 +1,6 @@ +//! A module that contains the implementations for optional features. For example `serde` support + +// #[cfg(feature = "pb_jelly")] +mod pb_jelly; +#[cfg(feature = "serde")] +mod serde; diff --git a/compact_str/src/features/pb_jelly.rs b/compact_str/src/features/pb_jelly.rs new file mode 100644 index 00000000..39fa33c8 --- /dev/null +++ b/compact_str/src/features/pb_jelly.rs @@ -0,0 +1,18 @@ +use crate::CompactStr; +use pb_jelly::Message; + +impl Message for CompactStr { + fn compute_size(&self) -> usize { + self.len() + } + + fn serialize(&self, w: &mut W) -> std::io::Result<()> { + w.write_all(self.as_bytes())?; + Ok(()) + } + + fn deserialize(&mut self, r: &mut B) -> std::io::Result<()> { + + todo!() + } +} diff --git a/compact_str/src/serde.rs b/compact_str/src/features/serde.rs similarity index 98% rename from compact_str/src/serde.rs rename to compact_str/src/features/serde.rs index 5c4572ed..bbcaec7e 100644 --- a/compact_str/src/serde.rs +++ b/compact_str/src/features/serde.rs @@ -7,7 +7,7 @@ use serde::de::{ Visitor, }; -use super::CompactStr; +use crate::CompactStr; fn compact_str<'de: 'a, 'a, D: Deserializer<'de>>(deserializer: D) -> Result { struct CompactStrVisitor; diff --git a/compact_str/src/lib.rs b/compact_str/src/lib.rs index 82596313..01364325 100644 --- a/compact_str/src/lib.rs +++ b/compact_str/src/lib.rs @@ -22,12 +22,11 @@ use core::iter::FromIterator; use core::ops::Deref; use core::str::FromStr; +mod features; + mod repr; use repr::Repr; -#[cfg(feature = "serde")] -mod serde; - #[cfg(test)] mod tests; From 6240dfb8943a80b34adab4b0b2e987e5d8be4a9a Mon Sep 17 00:00:00 2001 From: Parker Timmerman Date: Mon, 3 Jan 2022 15:21:23 -0500 Subject: [PATCH 02/18] MVP, adding bytes feature, using this to make pb-jelly impl --- compact_str/Cargo.toml | 1 + compact_str/src/features/bytes.rs | 71 +++++++++++++++ compact_str/src/features/mod.rs | 1 + compact_str/src/features/pb_jelly.rs | 12 ++- compact_str/src/repr/bytes.rs | 126 +++++++++++++++++++++++++++ compact_str/src/repr/mod.rs | 1 + 6 files changed, 209 insertions(+), 3 deletions(-) create mode 100644 compact_str/src/features/bytes.rs create mode 100644 compact_str/src/repr/bytes.rs diff --git a/compact_str/Cargo.toml b/compact_str/Cargo.toml index 20f13268..af624400 100644 --- a/compact_str/Cargo.toml +++ b/compact_str/Cargo.toml @@ -14,6 +14,7 @@ categories = ["encoding", "parsing", "memory-management", "text-processing"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +bytes = { version = "1" } pb-jelly = { version = "0.0.11" } serde = { version = "1", optional = true } static_assertions = "1" diff --git a/compact_str/src/features/bytes.rs b/compact_str/src/features/bytes.rs new file mode 100644 index 00000000..ee89fa50 --- /dev/null +++ b/compact_str/src/features/bytes.rs @@ -0,0 +1,71 @@ +use core::str::Utf8Error; + +use bytes::Buf; + +use crate::{ + CompactStr, + Repr, +}; + +impl CompactStr { + /// Converts a buffer of bytes to a `CompactStr` + pub fn from_utf8_buf(buf: &mut B) -> Result { + Repr::from_utf8_buf(buf).map(|repr| CompactStr { repr }) + } +} + +#[cfg(test)] +mod test { + use crate::CompactStr; + use proptest::prelude::*; + use proptest::strategy::Strategy; + use std::io::Cursor; + + const MAX_INLINED_SIZE: usize = core::mem::size_of::(); + + // generates random unicode strings, upto 80 chars long + fn rand_unicode() -> impl Strategy { + proptest::collection::vec(proptest::char::any(), 0..80).prop_map(|v| v.into_iter().collect()) + } + + proptest! { + #[test] + #[cfg_attr(miri, ignore)] + fn test_buffers_roundtrip(word in rand_unicode()) { + let mut buf = Cursor::new(word.as_bytes()); + let compact = CompactStr::from_utf8_buf(&mut buf).unwrap(); + + prop_assert_eq!(&word, &compact); + } + + #[test] + #[cfg_attr(miri, ignore)] + fn test_allocated_properly(word in rand_unicode()) { + let mut buf = Cursor::new(word.as_bytes()); + let compact = CompactStr::from_utf8_buf(&mut buf).unwrap(); + + if word.len() < MAX_INLINED_SIZE { + prop_assert!(!compact.is_heap_allocated()) + } else if word.len() == MAX_INLINED_SIZE && word.as_bytes()[0] <= 127 { + prop_assert!(!compact.is_heap_allocated()) + } else { + prop_assert!(compact.is_heap_allocated()) + } + } + + #[test] + #[cfg_attr(miri, ignore)] + fn test_only_accept_valid_utf8(bytes in proptest::collection::vec(any::(), 0..80)) { + let mut buf = Cursor::new(bytes.as_slice()); + + let compact_result = CompactStr::from_utf8_buf(&mut buf); + let str_result = core::str::from_utf8(bytes.as_slice()); + + match (compact_result, str_result) { + (Ok(c), Ok(s)) => prop_assert_eq!(c, s), + (Err(c_err), Err(s_err)) => prop_assert_eq!(c_err, s_err), + _ => panic!("CompactStr and core::str read UTF-8 differently?"), + } + } + } +} diff --git a/compact_str/src/features/mod.rs b/compact_str/src/features/mod.rs index ecda7a54..d564021f 100644 --- a/compact_str/src/features/mod.rs +++ b/compact_str/src/features/mod.rs @@ -1,5 +1,6 @@ //! A module that contains the implementations for optional features. For example `serde` support +mod bytes; // #[cfg(feature = "pb_jelly")] mod pb_jelly; #[cfg(feature = "serde")] diff --git a/compact_str/src/features/pb_jelly.rs b/compact_str/src/features/pb_jelly.rs index 39fa33c8..51ecfbdc 100644 --- a/compact_str/src/features/pb_jelly.rs +++ b/compact_str/src/features/pb_jelly.rs @@ -1,6 +1,7 @@ -use crate::CompactStr; use pb_jelly::Message; +use crate::CompactStr; + impl Message for CompactStr { fn compute_size(&self) -> usize { self.len() @@ -12,7 +13,12 @@ impl Message for CompactStr { } fn deserialize(&mut self, r: &mut B) -> std::io::Result<()> { - - todo!() + match CompactStr::from_utf8_buf(r) { + Ok(compact) => { + *self = compact; + Ok(()) + } + Err(_) => Err(std::io::ErrorKind::InvalidData.into()), + } } } diff --git a/compact_str/src/repr/bytes.rs b/compact_str/src/repr/bytes.rs new file mode 100644 index 00000000..4ccf9ce7 --- /dev/null +++ b/compact_str/src/repr/bytes.rs @@ -0,0 +1,126 @@ +use core::str::Utf8Error; + +use bytes::Buf; + +use super::{ + Repr, + MAX_SIZE, +}; + +#[cfg(target_pointer_width = "32")] +const DEFAULT_TEXT: str = "000000000000"; +#[cfg(target_pointer_width = "64")] +const DEFAULT_TEXT: &str = "000000000000000000000000"; + +const DEFAULT_PACKED: Repr = Repr::new_const(DEFAULT_TEXT); + +impl Repr { + /// Converts a buffer of bytes to a `Repr` + pub fn from_utf8_buf(buf: &mut B) -> Result { + let size = buf.remaining(); + let chunk = buf.chunk(); + + // Check to make sure we're not empty, so accessing the first byte below doesn't panic + if chunk.is_empty() { + // If the chunk is empty, then we should have 0 remaining bytes + debug_assert_eq!(size, 0); + return Ok(super::EMPTY); + } + let first_byte = buf.chunk()[0]; + + // Get an "empty" Repr we can write into + // + // HACK: There currently isn't a way to provide an "empty" Packed repr, so we do this check + // and return a "default" Packed repr if the buffer can fit + let mut repr = if size == MAX_SIZE && first_byte <= 127 { + // Note: No need to reserve additional bytes here, because we know we can fit all + // remaining bytes of `buf` into a Packed repr + DEFAULT_PACKED + } else { + let mut default = super::EMPTY; + debug_assert_eq!(default.len(), 0); + + // Reserve enough bytes, possibly allocating on the heap, to store the text + default.reserve(size); + + default + }; + + // SAFETY: Before returning this Repr we check to make sure the provided bytes are valid + // UTF-8 + let slice = unsafe { repr.as_mut_slice() }; + // Copy the bytes from the buffer into our Repr! + buf.copy_to_slice(&mut slice[..size]); + + // Set the length of the Repr + // SAFETY: We just wrote `size` bytes into the Repr + unsafe { repr.set_len(size) }; + + // Check to make sure the provided bytes are valid UTF-8, return the Repr if they are! + // + // TODO: Add an `as_slice()` method to Repr and refactor this call + match core::str::from_utf8(repr.as_str().as_bytes()) { + Ok(_) => Ok(repr), + Err(e) => Err(e), + } + } +} + +#[cfg(test)] +mod test { + use std::io::Cursor; + + use super::Repr; + + #[test] + fn test_smoke() { + let word = "hello world"; + let mut buf = Cursor::new(word.as_bytes()); + + let repr = Repr::from_utf8_buf(&mut buf).unwrap(); + assert_eq!(repr.as_str(), word); + } + + #[test] + fn test_heap_allocated() { + let word = "hello, this is a long string which should be heap allocated"; + let mut buf = Cursor::new(word.as_bytes()); + + let repr = Repr::from_utf8_buf(&mut buf).unwrap(); + assert_eq!(repr.as_str(), word); + } + + #[test] + fn test_empty() { + let mut buf: Cursor<&[u8]> = Cursor::new(&[]); + + let repr = Repr::from_utf8_buf(&mut buf).unwrap(); + assert_eq!(repr.len(), 0); + assert_eq!(repr.as_str(), ""); + } + + #[test] + fn test_packed() { + #[cfg(target_pointer_width = "64")] + let packed = "this string is 24 chars!"; + #[cfg(target_pointer_width = "32")] + let packed = "i am 12 char"; + + let mut buf = Cursor::new(packed.as_bytes()); + + let repr = Repr::from_utf8_buf(&mut buf).unwrap(); + assert_eq!(repr.as_str(), packed); + + // This repr should __not__ be heap allocated + assert!(!repr.is_heap_allocated()); + } + + #[test] + #[should_panic(expected = "Utf8Error")] + fn test_invalid_utf8() { + let invalid = &[0, 159]; + let mut buf: Cursor<&[u8]> = Cursor::new(invalid); + + Repr::from_utf8_buf(&mut buf).unwrap(); + } +} diff --git a/compact_str/src/repr/mod.rs b/compact_str/src/repr/mod.rs index b9533b0f..dcf517c4 100644 --- a/compact_str/src/repr/mod.rs +++ b/compact_str/src/repr/mod.rs @@ -5,6 +5,7 @@ use static_assertions::{ const_assert_eq, }; +mod bytes; mod iter; mod discriminant; From b0576c1307d2a8bfbea5857b064944fd3f9e9ac5 Mon Sep 17 00:00:00 2001 From: Parker Timmerman Date: Mon, 3 Jan 2022 16:21:13 -0500 Subject: [PATCH 03/18] MVP --- .gitignore | 4 +- Cargo.toml | 2 +- compact_str/Cargo.toml | 7 +- compact_str/src/features/mod.rs | 3 +- compact_str/src/features/pb_jelly.rs | 7 +- compact_str/src/repr/heap/arc.rs | 2 + compact_str/src/repr/heap/mod.rs | 2 + compact_str/src/repr/mod.rs | 2 + compact_str/src/tests.rs | 6 + examples/pb-jelly/Cargo.toml | 11 + examples/pb-jelly/gen/Cargo.toml | 9 + examples/pb-jelly/gen/src/main.rs | 11 + .../pb-jelly/protos/gen/proto_user/Cargo.toml | 10 + .../protos/gen/proto_user/src/basic.rs | 327 ++++++++++++++++++ .../pb-jelly/protos/gen/proto_user/src/lib.rs | 27 ++ examples/pb-jelly/protos/user/basic.proto | 16 + examples/pb-jelly/src/main.rs | 22 ++ 17 files changed, 461 insertions(+), 7 deletions(-) create mode 100644 examples/pb-jelly/Cargo.toml create mode 100644 examples/pb-jelly/gen/Cargo.toml create mode 100644 examples/pb-jelly/gen/src/main.rs create mode 100644 examples/pb-jelly/protos/gen/proto_user/Cargo.toml create mode 100644 examples/pb-jelly/protos/gen/proto_user/src/basic.rs create mode 100644 examples/pb-jelly/protos/gen/proto_user/src/lib.rs create mode 100644 examples/pb-jelly/protos/user/basic.proto create mode 100644 examples/pb-jelly/src/main.rs diff --git a/.gitignore b/.gitignore index 96ef6c0b..972b0c47 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ -/target -Cargo.lock +**/target +**/Cargo.lock diff --git a/Cargo.toml b/Cargo.toml index d934e31f..81baf451 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,2 +1,2 @@ [workspace] -members = ["examples/serde", "compact_str", "tracing_alloc"] +members = ["examples/pb-jelly","examples/pb-jelly/gen","examples/pb-jelly/protos/gen/proto_user", "examples/serde", "compact_str", "tracing_alloc"] diff --git a/compact_str/Cargo.toml b/compact_str/Cargo.toml index af624400..f3632d3f 100644 --- a/compact_str/Cargo.toml +++ b/compact_str/Cargo.toml @@ -14,11 +14,14 @@ categories = ["encoding", "parsing", "memory-management", "text-processing"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -bytes = { version = "1" } -pb-jelly = { version = "0.0.11" } +bytes = { version = "1", optional = true } +pb-jelly = { version = "0.0.11", optional = true } serde = { version = "1", optional = true } static_assertions = "1" +[features] +proto = ["bytes", "pb-jelly"] + [dev-dependencies] criterion = { version = "0.3", features = ["html_reports"] } proptest = "1" diff --git a/compact_str/src/features/mod.rs b/compact_str/src/features/mod.rs index d564021f..cd2875ee 100644 --- a/compact_str/src/features/mod.rs +++ b/compact_str/src/features/mod.rs @@ -1,7 +1,8 @@ //! A module that contains the implementations for optional features. For example `serde` support +#[cfg(feature = "bytes")] mod bytes; -// #[cfg(feature = "pb_jelly")] +#[cfg(feature = "proto")] mod pb_jelly; #[cfg(feature = "serde")] mod serde; diff --git a/compact_str/src/features/pb_jelly.rs b/compact_str/src/features/pb_jelly.rs index 51ecfbdc..b6715913 100644 --- a/compact_str/src/features/pb_jelly.rs +++ b/compact_str/src/features/pb_jelly.rs @@ -1,7 +1,12 @@ -use pb_jelly::Message; +use pb_jelly::{ + Message, + Reflection, +}; use crate::CompactStr; +impl Reflection for CompactStr {} + impl Message for CompactStr { fn compute_size(&self) -> usize { self.len() diff --git a/compact_str/src/repr/heap/arc.rs b/compact_str/src/repr/heap/arc.rs index 3b1c8ef8..6e3ed03c 100644 --- a/compact_str/src/repr/heap/arc.rs +++ b/compact_str/src/repr/heap/arc.rs @@ -22,6 +22,8 @@ pub struct ArcString { len: usize, ptr: ptr::NonNull, } +unsafe impl Sync for ArcString {} +unsafe impl Send for ArcString {} impl ArcString { #[inline] diff --git a/compact_str/src/repr/heap/mod.rs b/compact_str/src/repr/heap/mod.rs index f0244b9d..6bf484a9 100644 --- a/compact_str/src/repr/heap/mod.rs +++ b/compact_str/src/repr/heap/mod.rs @@ -1,5 +1,7 @@ use std::mem; +use std::sync::Arc; + use super::{ HEAP_MASK, MAX_SIZE, diff --git a/compact_str/src/repr/mod.rs b/compact_str/src/repr/mod.rs index dcf517c4..e17c71a1 100644 --- a/compact_str/src/repr/mod.rs +++ b/compact_str/src/repr/mod.rs @@ -5,7 +5,9 @@ use static_assertions::{ const_assert_eq, }; +#[cfg(feature = "bytes")] mod bytes; + mod iter; mod discriminant; diff --git a/compact_str/src/tests.rs b/compact_str/src/tests.rs index 19529423..780a67e3 100644 --- a/compact_str/src/tests.rs +++ b/compact_str/src/tests.rs @@ -220,3 +220,9 @@ fn test_from_char_iter() { assert!(compact.is_heap_allocated()); assert_eq!(s, compact); } + +#[test] +fn test_compact_str_is_send_and_sync() { + fn is_send_and_sync() {} + is_send_and_sync::(); +} diff --git a/examples/pb-jelly/Cargo.toml b/examples/pb-jelly/Cargo.toml new file mode 100644 index 00000000..8111c377 --- /dev/null +++ b/examples/pb-jelly/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "pb-jelly" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +compact_str = { path = "../../compact_str", features = ["proto"] } +pb-jelly = "0.0.11" +proto_user = { path = "protos/gen/proto_user" } diff --git a/examples/pb-jelly/gen/Cargo.toml b/examples/pb-jelly/gen/Cargo.toml new file mode 100644 index 00000000..44b544b5 --- /dev/null +++ b/examples/pb-jelly/gen/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "gen" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +pb-jelly-gen = "0.0.11" diff --git a/examples/pb-jelly/gen/src/main.rs b/examples/pb-jelly/gen/src/main.rs new file mode 100644 index 00000000..25070332 --- /dev/null +++ b/examples/pb-jelly/gen/src/main.rs @@ -0,0 +1,11 @@ +use pb_jelly_gen::GenProtos; + +fn main() -> std::io::Result<()> { + GenProtos::builder() + .out_path("../protos/gen") + .src_path("../protos") + .cleanup_out_path(true) + .gen_protos(); + + Ok(()) +} diff --git a/examples/pb-jelly/protos/gen/proto_user/Cargo.toml b/examples/pb-jelly/protos/gen/proto_user/Cargo.toml new file mode 100644 index 00000000..3de65d6c --- /dev/null +++ b/examples/pb-jelly/protos/gen/proto_user/Cargo.toml @@ -0,0 +1,10 @@ +# @generated, do not edit +[package] +name = "proto_user" +version = "0.0.1" +edition = "2018" + +[dependencies] +compact_str = { path = "../../../../../compact_str", features = ["proto"] } +lazy_static = { version = "1.4.0" } +pb-jelly = { version = "0.0.11" } diff --git a/examples/pb-jelly/protos/gen/proto_user/src/basic.rs b/examples/pb-jelly/protos/gen/proto_user/src/basic.rs new file mode 100644 index 00000000..e17be3ff --- /dev/null +++ b/examples/pb-jelly/protos/gen/proto_user/src/basic.rs @@ -0,0 +1,327 @@ +// @generated, do not edit +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct Address { + pub street: ::compact_str::CompactStr, + pub city: ::compact_str::CompactStr, +} +impl ::std::default::Default for Address { + fn default() -> Self { + Address { + street: ::std::default::Default::default(), + city: ::std::default::Default::default(), + } + } +} +lazy_static! { + pub static ref Address_default: Address = Address::default(); +} +impl ::pb_jelly::Message for Address { + fn descriptor(&self) -> ::std::option::Option<::pb_jelly::MessageDescriptor> { + Some(::pb_jelly::MessageDescriptor { + name: "Address", + full_name: "basic.Address", + fields: &[ + ::pb_jelly::FieldDescriptor { + name: "street", + full_name: "basic.Address.street", + index: 0, + number: 1, + typ: ::pb_jelly::wire_format::Type::LengthDelimited, + label: ::pb_jelly::Label::Optional, + oneof_index: None, + }, + ::pb_jelly::FieldDescriptor { + name: "city", + full_name: "basic.Address.city", + index: 1, + number: 2, + typ: ::pb_jelly::wire_format::Type::LengthDelimited, + label: ::pb_jelly::Label::Optional, + oneof_index: None, + }, + ], + oneofs: &[ + ], + }) + } + fn compute_size(&self) -> usize { + let mut size = 0; + let mut street_size = 0; + if self.street != <::compact_str::CompactStr as ::std::default::Default>::default() { + let val = &self.street; + let l = ::pb_jelly::Message::compute_size(val); + street_size += ::pb_jelly::wire_format::serialized_length(1); + street_size += ::pb_jelly::varint::serialized_length(l as u64); + street_size += l; + } + size += street_size; + let mut city_size = 0; + if self.city != <::compact_str::CompactStr as ::std::default::Default>::default() { + let val = &self.city; + let l = ::pb_jelly::Message::compute_size(val); + city_size += ::pb_jelly::wire_format::serialized_length(2); + city_size += ::pb_jelly::varint::serialized_length(l as u64); + city_size += l; + } + size += city_size; + size + } + fn compute_grpc_slices_size(&self) -> usize { + let mut size = 0; + if self.street != <::compact_str::CompactStr as ::std::default::Default>::default() { + let val = &self.street; + size += ::pb_jelly::Message::compute_grpc_slices_size(val); + } + if self.city != <::compact_str::CompactStr as ::std::default::Default>::default() { + let val = &self.city; + size += ::pb_jelly::Message::compute_grpc_slices_size(val); + } + size + } + fn serialize(&self, w: &mut W) -> ::std::io::Result<()> { + if self.street != <::compact_str::CompactStr as ::std::default::Default>::default() { + let val = &self.street; + ::pb_jelly::wire_format::write(1, ::pb_jelly::wire_format::Type::LengthDelimited, w)?; + let l = ::pb_jelly::Message::compute_size(val); + ::pb_jelly::varint::write(l as u64, w)?; + ::pb_jelly::Message::serialize(val, w)?; + } + if self.city != <::compact_str::CompactStr as ::std::default::Default>::default() { + let val = &self.city; + ::pb_jelly::wire_format::write(2, ::pb_jelly::wire_format::Type::LengthDelimited, w)?; + let l = ::pb_jelly::Message::compute_size(val); + ::pb_jelly::varint::write(l as u64, w)?; + ::pb_jelly::Message::serialize(val, w)?; + } + Ok(()) + } + fn deserialize(&mut self, mut buf: &mut B) -> ::std::io::Result<()> { + while let Some((field_number, typ)) = ::pb_jelly::wire_format::read(&mut buf)? { + match field_number { + 1 => { + ::pb_jelly::ensure_wire_format(typ, ::pb_jelly::wire_format::Type::LengthDelimited, "Address", 1)?; + let len = ::pb_jelly::varint::ensure_read(&mut buf)?; + let mut next = ::pb_jelly::ensure_split(buf, len as usize)?; + let mut val: ::compact_str::CompactStr = ::std::default::Default::default(); + ::pb_jelly::Message::deserialize(&mut val, &mut next)?; + self.street = val; + } + 2 => { + ::pb_jelly::ensure_wire_format(typ, ::pb_jelly::wire_format::Type::LengthDelimited, "Address", 2)?; + let len = ::pb_jelly::varint::ensure_read(&mut buf)?; + let mut next = ::pb_jelly::ensure_split(buf, len as usize)?; + let mut val: ::compact_str::CompactStr = ::std::default::Default::default(); + ::pb_jelly::Message::deserialize(&mut val, &mut next)?; + self.city = val; + } + _ => { + ::pb_jelly::skip(typ, &mut buf)?; + } + } + } + Ok(()) + } +} +impl ::pb_jelly::Reflection for Address { + fn which_one_of(&self, oneof_name: &str) -> ::std::option::Option<&'static str> { + match oneof_name { + _ => { + panic!("unknown oneof name given"); + } + } + } + fn get_field_mut(&mut self, field_name: &str) -> ::pb_jelly::reflection::FieldMut<'_> { + match field_name { + "street" => { + ::pb_jelly::reflection::FieldMut::Value(&mut self.street) + } + "city" => { + ::pb_jelly::reflection::FieldMut::Value(&mut self.city) + } + _ => { + panic!("unknown field name given") + } + } + } +} + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct User { + pub name: ::compact_str::CompactStr, + pub age: u32, + pub address: ::std::option::Option
, +} +impl ::std::default::Default for User { + fn default() -> Self { + User { + name: ::std::default::Default::default(), + age: ::std::default::Default::default(), + address: ::std::default::Default::default(), + } + } +} +lazy_static! { + pub static ref User_default: User = User::default(); +} +impl ::pb_jelly::Message for User { + fn descriptor(&self) -> ::std::option::Option<::pb_jelly::MessageDescriptor> { + Some(::pb_jelly::MessageDescriptor { + name: "User", + full_name: "basic.User", + fields: &[ + ::pb_jelly::FieldDescriptor { + name: "name", + full_name: "basic.User.name", + index: 0, + number: 1, + typ: ::pb_jelly::wire_format::Type::LengthDelimited, + label: ::pb_jelly::Label::Optional, + oneof_index: None, + }, + ::pb_jelly::FieldDescriptor { + name: "age", + full_name: "basic.User.age", + index: 1, + number: 2, + typ: ::pb_jelly::wire_format::Type::Varint, + label: ::pb_jelly::Label::Optional, + oneof_index: None, + }, + ::pb_jelly::FieldDescriptor { + name: "address", + full_name: "basic.User.address", + index: 2, + number: 3, + typ: ::pb_jelly::wire_format::Type::LengthDelimited, + label: ::pb_jelly::Label::Optional, + oneof_index: None, + }, + ], + oneofs: &[ + ], + }) + } + fn compute_size(&self) -> usize { + let mut size = 0; + let mut name_size = 0; + if self.name != <::compact_str::CompactStr as ::std::default::Default>::default() { + let val = &self.name; + let l = ::pb_jelly::Message::compute_size(val); + name_size += ::pb_jelly::wire_format::serialized_length(1); + name_size += ::pb_jelly::varint::serialized_length(l as u64); + name_size += l; + } + size += name_size; + let mut age_size = 0; + if self.age != ::default() { + let val = &self.age; + let l = ::pb_jelly::Message::compute_size(val); + age_size += ::pb_jelly::wire_format::serialized_length(2); + age_size += l; + } + size += age_size; + let mut address_size = 0; + for val in &self.address { + let l = ::pb_jelly::Message::compute_size(val); + address_size += ::pb_jelly::wire_format::serialized_length(3); + address_size += ::pb_jelly::varint::serialized_length(l as u64); + address_size += l; + } + size += address_size; + size + } + fn compute_grpc_slices_size(&self) -> usize { + let mut size = 0; + if self.name != <::compact_str::CompactStr as ::std::default::Default>::default() { + let val = &self.name; + size += ::pb_jelly::Message::compute_grpc_slices_size(val); + } + if self.age != ::default() { + let val = &self.age; + size += ::pb_jelly::Message::compute_grpc_slices_size(val); + } + for val in &self.address { + size += ::pb_jelly::Message::compute_grpc_slices_size(val); + } + size + } + fn serialize(&self, w: &mut W) -> ::std::io::Result<()> { + if self.name != <::compact_str::CompactStr as ::std::default::Default>::default() { + let val = &self.name; + ::pb_jelly::wire_format::write(1, ::pb_jelly::wire_format::Type::LengthDelimited, w)?; + let l = ::pb_jelly::Message::compute_size(val); + ::pb_jelly::varint::write(l as u64, w)?; + ::pb_jelly::Message::serialize(val, w)?; + } + if self.age != ::default() { + let val = &self.age; + ::pb_jelly::wire_format::write(2, ::pb_jelly::wire_format::Type::Varint, w)?; + ::pb_jelly::Message::serialize(val, w)?; + } + for val in &self.address { + ::pb_jelly::wire_format::write(3, ::pb_jelly::wire_format::Type::LengthDelimited, w)?; + let l = ::pb_jelly::Message::compute_size(val); + ::pb_jelly::varint::write(l as u64, w)?; + ::pb_jelly::Message::serialize(val, w)?; + } + Ok(()) + } + fn deserialize(&mut self, mut buf: &mut B) -> ::std::io::Result<()> { + while let Some((field_number, typ)) = ::pb_jelly::wire_format::read(&mut buf)? { + match field_number { + 1 => { + ::pb_jelly::ensure_wire_format(typ, ::pb_jelly::wire_format::Type::LengthDelimited, "User", 1)?; + let len = ::pb_jelly::varint::ensure_read(&mut buf)?; + let mut next = ::pb_jelly::ensure_split(buf, len as usize)?; + let mut val: ::compact_str::CompactStr = ::std::default::Default::default(); + ::pb_jelly::Message::deserialize(&mut val, &mut next)?; + self.name = val; + } + 2 => { + ::pb_jelly::ensure_wire_format(typ, ::pb_jelly::wire_format::Type::Varint, "User", 2)?; + let mut val: u32 = ::std::default::Default::default(); + ::pb_jelly::Message::deserialize(&mut val, buf)?; + self.age = val; + } + 3 => { + ::pb_jelly::ensure_wire_format(typ, ::pb_jelly::wire_format::Type::LengthDelimited, "User", 3)?; + let len = ::pb_jelly::varint::ensure_read(&mut buf)?; + let mut next = ::pb_jelly::ensure_split(buf, len as usize)?; + let mut val: Address = ::std::default::Default::default(); + ::pb_jelly::Message::deserialize(&mut val, &mut next)?; + self.address = Some(val); + } + _ => { + ::pb_jelly::skip(typ, &mut buf)?; + } + } + } + Ok(()) + } +} +impl ::pb_jelly::Reflection for User { + fn which_one_of(&self, oneof_name: &str) -> ::std::option::Option<&'static str> { + match oneof_name { + _ => { + panic!("unknown oneof name given"); + } + } + } + fn get_field_mut(&mut self, field_name: &str) -> ::pb_jelly::reflection::FieldMut<'_> { + match field_name { + "name" => { + ::pb_jelly::reflection::FieldMut::Value(&mut self.name) + } + "age" => { + ::pb_jelly::reflection::FieldMut::Value(&mut self.age) + } + "address" => { + ::pb_jelly::reflection::FieldMut::Value(self.address.get_or_insert_with(::std::default::Default::default)) + } + _ => { + panic!("unknown field name given") + } + } + } +} + diff --git a/examples/pb-jelly/protos/gen/proto_user/src/lib.rs b/examples/pb-jelly/protos/gen/proto_user/src/lib.rs new file mode 100644 index 00000000..513495f0 --- /dev/null +++ b/examples/pb-jelly/protos/gen/proto_user/src/lib.rs @@ -0,0 +1,27 @@ +// @generated, do not edit + +#![warn(rust_2018_idioms)] +#![allow(irrefutable_let_patterns)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] +#![allow(non_upper_case_globals)] +#![allow(unused_imports)] +#![allow(unused_variables)] +#![allow(irrefutable_let_patterns)] +#![allow(broken_intra_doc_links)] + +// Modules are generated based on the naming conventions of protobuf, which might cause "module inception" +#![allow(clippy::module_inception)] +// This is all generated code, so "manually" implementing derivable impls is okay +#![allow(clippy::derivable_impls)] +// For enums with many variants, the matches!(...) macro isn't obviously better +#![allow(clippy::match_like_matches_macro)] +// TODO: Ideally we don't allow this +#![allow(clippy::option_as_ref_deref)] +// TODO: Ideally we don't allow this +#![allow(clippy::match_single_binding)] + +#[macro_use] +extern crate lazy_static; + +pub mod basic; diff --git a/examples/pb-jelly/protos/user/basic.proto b/examples/pb-jelly/protos/user/basic.proto new file mode 100644 index 00000000..57a45c8f --- /dev/null +++ b/examples/pb-jelly/protos/user/basic.proto @@ -0,0 +1,16 @@ +syntax = "proto3"; +package basic; + +// rust/extensions.proto is included by default +import "rust/extensions.proto"; + +message Address { + string street = 1 [(rust.type)="::compact_str::CompactStr"]; + string city = 2 [(rust.type)="::compact_str::CompactStr"]; +} + +message User { + string name = 1 [(rust.type)="::compact_str::CompactStr"]; + uint32 age = 2; + Address address = 3; +} diff --git a/examples/pb-jelly/src/main.rs b/examples/pb-jelly/src/main.rs new file mode 100644 index 00000000..126ab408 --- /dev/null +++ b/examples/pb-jelly/src/main.rs @@ -0,0 +1,22 @@ +use compact_str::CompactStr; +use pb_jelly::Message; +use proto_user::basic::{ + Address, + User, +}; + +fn main() { + let user = User { + name: CompactStr::new_inline("John"), + age: 42, + address: Some(Address { + street: "432 Park Ave".into(), + city: "New York City".into(), + }) + }; + let bytes = user.serialize_to_vec(); + + let roundtrip_user = User::deserialize_from_slice(&bytes).unwrap(); + println!("{:#?}", roundtrip_user); + assert_eq!(user, roundtrip_user); +} From 537422e8e3a45ee2dd48a7e22ad71854dcf53c02 Mon Sep 17 00:00:00 2001 From: Parker Timmerman Date: Mon, 3 Jan 2022 17:50:51 -0500 Subject: [PATCH 04/18] clippy and fmt fixes --- compact_str/src/features/bytes.rs | 9 ++++++--- compact_str/src/repr/heap/mod.rs | 2 -- examples/pb-jelly/src/main.rs | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/compact_str/src/features/bytes.rs b/compact_str/src/features/bytes.rs index ee89fa50..d7a3c70a 100644 --- a/compact_str/src/features/bytes.rs +++ b/compact_str/src/features/bytes.rs @@ -16,16 +16,19 @@ impl CompactStr { #[cfg(test)] mod test { - use crate::CompactStr; + use std::io::Cursor; + use proptest::prelude::*; use proptest::strategy::Strategy; - use std::io::Cursor; + + use crate::CompactStr; const MAX_INLINED_SIZE: usize = core::mem::size_of::(); // generates random unicode strings, upto 80 chars long fn rand_unicode() -> impl Strategy { - proptest::collection::vec(proptest::char::any(), 0..80).prop_map(|v| v.into_iter().collect()) + proptest::collection::vec(proptest::char::any(), 0..80) + .prop_map(|v| v.into_iter().collect()) } proptest! { diff --git a/compact_str/src/repr/heap/mod.rs b/compact_str/src/repr/heap/mod.rs index 6bf484a9..f0244b9d 100644 --- a/compact_str/src/repr/heap/mod.rs +++ b/compact_str/src/repr/heap/mod.rs @@ -1,7 +1,5 @@ use std::mem; -use std::sync::Arc; - use super::{ HEAP_MASK, MAX_SIZE, diff --git a/examples/pb-jelly/src/main.rs b/examples/pb-jelly/src/main.rs index 126ab408..203fa72d 100644 --- a/examples/pb-jelly/src/main.rs +++ b/examples/pb-jelly/src/main.rs @@ -12,7 +12,7 @@ fn main() { address: Some(Address { street: "432 Park Ave".into(), city: "New York City".into(), - }) + }), }; let bytes = user.serialize_to_vec(); From d3fb90dc114a63ff22775fd52091ebcf47e08f1f Mon Sep 17 00:00:00 2001 From: Parker Timmerman Date: Mon, 3 Jan 2022 17:56:11 -0500 Subject: [PATCH 05/18] some clippy fixes --- Cargo.toml | 2 +- examples/pb-jelly/gen/Cargo.toml | 2 ++ examples/pb-jelly/protos/gen/proto_user/src/lib.rs | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 81baf451..62233f6d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,2 +1,2 @@ [workspace] -members = ["examples/pb-jelly","examples/pb-jelly/gen","examples/pb-jelly/protos/gen/proto_user", "examples/serde", "compact_str", "tracing_alloc"] +members = ["examples/pb-jelly", "examples/serde", "compact_str", "tracing_alloc"] diff --git a/examples/pb-jelly/gen/Cargo.toml b/examples/pb-jelly/gen/Cargo.toml index 44b544b5..382c0209 100644 --- a/examples/pb-jelly/gen/Cargo.toml +++ b/examples/pb-jelly/gen/Cargo.toml @@ -3,6 +3,8 @@ name = "gen" version = "0.1.0" edition = "2021" +[workspace] + # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] diff --git a/examples/pb-jelly/protos/gen/proto_user/src/lib.rs b/examples/pb-jelly/protos/gen/proto_user/src/lib.rs index 513495f0..707043d8 100644 --- a/examples/pb-jelly/protos/gen/proto_user/src/lib.rs +++ b/examples/pb-jelly/protos/gen/proto_user/src/lib.rs @@ -8,7 +8,7 @@ #![allow(unused_imports)] #![allow(unused_variables)] #![allow(irrefutable_let_patterns)] -#![allow(broken_intra_doc_links)] +#![allow(rustdoc::broken_intra_doc_links)] // Modules are generated based on the naming conventions of protobuf, which might cause "module inception" #![allow(clippy::module_inception)] From a8c9384f9646e1d12f318eae71c4df8d1fc9b9c0 Mon Sep 17 00:00:00 2001 From: Parker Timmerman Date: Mon, 3 Jan 2022 18:03:04 -0500 Subject: [PATCH 06/18] fix issue with edition 2021, and type issue with 32-bit archs --- compact_str/src/repr/bytes.rs | 2 +- examples/pb-jelly/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/compact_str/src/repr/bytes.rs b/compact_str/src/repr/bytes.rs index 4ccf9ce7..4dc0c546 100644 --- a/compact_str/src/repr/bytes.rs +++ b/compact_str/src/repr/bytes.rs @@ -8,7 +8,7 @@ use super::{ }; #[cfg(target_pointer_width = "32")] -const DEFAULT_TEXT: str = "000000000000"; +const DEFAULT_TEXT: &str = "000000000000"; #[cfg(target_pointer_width = "64")] const DEFAULT_TEXT: &str = "000000000000000000000000"; diff --git a/examples/pb-jelly/Cargo.toml b/examples/pb-jelly/Cargo.toml index 8111c377..02ec4d80 100644 --- a/examples/pb-jelly/Cargo.toml +++ b/examples/pb-jelly/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pb-jelly" version = "0.1.0" -edition = "2021" +edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html From 3d7eeb9b1d7c5a3c4e4b8d4ffdd9e303f17e9f65 Mon Sep 17 00:00:00 2001 From: Parker Timmerman Date: Sat, 8 Jan 2022 13:52:23 -0500 Subject: [PATCH 07/18] get rid of pb-jelly, only impl bytes --- Cargo.toml | 2 +- compact_str/Cargo.toml | 4 - compact_str/src/features/mod.rs | 2 - compact_str/src/features/pb_jelly.rs | 29 -- examples/pb-jelly/Cargo.toml | 11 - examples/pb-jelly/gen/Cargo.toml | 11 - examples/pb-jelly/gen/src/main.rs | 11 - .../pb-jelly/protos/gen/proto_user/Cargo.toml | 10 - .../protos/gen/proto_user/src/basic.rs | 327 ------------------ .../pb-jelly/protos/gen/proto_user/src/lib.rs | 27 -- examples/pb-jelly/protos/user/basic.proto | 16 - examples/pb-jelly/src/main.rs | 22 -- 12 files changed, 1 insertion(+), 471 deletions(-) delete mode 100644 compact_str/src/features/pb_jelly.rs delete mode 100644 examples/pb-jelly/Cargo.toml delete mode 100644 examples/pb-jelly/gen/Cargo.toml delete mode 100644 examples/pb-jelly/gen/src/main.rs delete mode 100644 examples/pb-jelly/protos/gen/proto_user/Cargo.toml delete mode 100644 examples/pb-jelly/protos/gen/proto_user/src/basic.rs delete mode 100644 examples/pb-jelly/protos/gen/proto_user/src/lib.rs delete mode 100644 examples/pb-jelly/protos/user/basic.proto delete mode 100644 examples/pb-jelly/src/main.rs diff --git a/Cargo.toml b/Cargo.toml index 62233f6d..d934e31f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,2 +1,2 @@ [workspace] -members = ["examples/pb-jelly", "examples/serde", "compact_str", "tracing_alloc"] +members = ["examples/serde", "compact_str", "tracing_alloc"] diff --git a/compact_str/Cargo.toml b/compact_str/Cargo.toml index f3632d3f..b442c993 100644 --- a/compact_str/Cargo.toml +++ b/compact_str/Cargo.toml @@ -15,13 +15,9 @@ categories = ["encoding", "parsing", "memory-management", "text-processing"] [dependencies] bytes = { version = "1", optional = true } -pb-jelly = { version = "0.0.11", optional = true } serde = { version = "1", optional = true } static_assertions = "1" -[features] -proto = ["bytes", "pb-jelly"] - [dev-dependencies] criterion = { version = "0.3", features = ["html_reports"] } proptest = "1" diff --git a/compact_str/src/features/mod.rs b/compact_str/src/features/mod.rs index cd2875ee..6474acba 100644 --- a/compact_str/src/features/mod.rs +++ b/compact_str/src/features/mod.rs @@ -2,7 +2,5 @@ #[cfg(feature = "bytes")] mod bytes; -#[cfg(feature = "proto")] -mod pb_jelly; #[cfg(feature = "serde")] mod serde; diff --git a/compact_str/src/features/pb_jelly.rs b/compact_str/src/features/pb_jelly.rs deleted file mode 100644 index b6715913..00000000 --- a/compact_str/src/features/pb_jelly.rs +++ /dev/null @@ -1,29 +0,0 @@ -use pb_jelly::{ - Message, - Reflection, -}; - -use crate::CompactStr; - -impl Reflection for CompactStr {} - -impl Message for CompactStr { - fn compute_size(&self) -> usize { - self.len() - } - - fn serialize(&self, w: &mut W) -> std::io::Result<()> { - w.write_all(self.as_bytes())?; - Ok(()) - } - - fn deserialize(&mut self, r: &mut B) -> std::io::Result<()> { - match CompactStr::from_utf8_buf(r) { - Ok(compact) => { - *self = compact; - Ok(()) - } - Err(_) => Err(std::io::ErrorKind::InvalidData.into()), - } - } -} diff --git a/examples/pb-jelly/Cargo.toml b/examples/pb-jelly/Cargo.toml deleted file mode 100644 index 02ec4d80..00000000 --- a/examples/pb-jelly/Cargo.toml +++ /dev/null @@ -1,11 +0,0 @@ -[package] -name = "pb-jelly" -version = "0.1.0" -edition = "2018" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -compact_str = { path = "../../compact_str", features = ["proto"] } -pb-jelly = "0.0.11" -proto_user = { path = "protos/gen/proto_user" } diff --git a/examples/pb-jelly/gen/Cargo.toml b/examples/pb-jelly/gen/Cargo.toml deleted file mode 100644 index 382c0209..00000000 --- a/examples/pb-jelly/gen/Cargo.toml +++ /dev/null @@ -1,11 +0,0 @@ -[package] -name = "gen" -version = "0.1.0" -edition = "2021" - -[workspace] - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -pb-jelly-gen = "0.0.11" diff --git a/examples/pb-jelly/gen/src/main.rs b/examples/pb-jelly/gen/src/main.rs deleted file mode 100644 index 25070332..00000000 --- a/examples/pb-jelly/gen/src/main.rs +++ /dev/null @@ -1,11 +0,0 @@ -use pb_jelly_gen::GenProtos; - -fn main() -> std::io::Result<()> { - GenProtos::builder() - .out_path("../protos/gen") - .src_path("../protos") - .cleanup_out_path(true) - .gen_protos(); - - Ok(()) -} diff --git a/examples/pb-jelly/protos/gen/proto_user/Cargo.toml b/examples/pb-jelly/protos/gen/proto_user/Cargo.toml deleted file mode 100644 index 3de65d6c..00000000 --- a/examples/pb-jelly/protos/gen/proto_user/Cargo.toml +++ /dev/null @@ -1,10 +0,0 @@ -# @generated, do not edit -[package] -name = "proto_user" -version = "0.0.1" -edition = "2018" - -[dependencies] -compact_str = { path = "../../../../../compact_str", features = ["proto"] } -lazy_static = { version = "1.4.0" } -pb-jelly = { version = "0.0.11" } diff --git a/examples/pb-jelly/protos/gen/proto_user/src/basic.rs b/examples/pb-jelly/protos/gen/proto_user/src/basic.rs deleted file mode 100644 index e17be3ff..00000000 --- a/examples/pb-jelly/protos/gen/proto_user/src/basic.rs +++ /dev/null @@ -1,327 +0,0 @@ -// @generated, do not edit -#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] -pub struct Address { - pub street: ::compact_str::CompactStr, - pub city: ::compact_str::CompactStr, -} -impl ::std::default::Default for Address { - fn default() -> Self { - Address { - street: ::std::default::Default::default(), - city: ::std::default::Default::default(), - } - } -} -lazy_static! { - pub static ref Address_default: Address = Address::default(); -} -impl ::pb_jelly::Message for Address { - fn descriptor(&self) -> ::std::option::Option<::pb_jelly::MessageDescriptor> { - Some(::pb_jelly::MessageDescriptor { - name: "Address", - full_name: "basic.Address", - fields: &[ - ::pb_jelly::FieldDescriptor { - name: "street", - full_name: "basic.Address.street", - index: 0, - number: 1, - typ: ::pb_jelly::wire_format::Type::LengthDelimited, - label: ::pb_jelly::Label::Optional, - oneof_index: None, - }, - ::pb_jelly::FieldDescriptor { - name: "city", - full_name: "basic.Address.city", - index: 1, - number: 2, - typ: ::pb_jelly::wire_format::Type::LengthDelimited, - label: ::pb_jelly::Label::Optional, - oneof_index: None, - }, - ], - oneofs: &[ - ], - }) - } - fn compute_size(&self) -> usize { - let mut size = 0; - let mut street_size = 0; - if self.street != <::compact_str::CompactStr as ::std::default::Default>::default() { - let val = &self.street; - let l = ::pb_jelly::Message::compute_size(val); - street_size += ::pb_jelly::wire_format::serialized_length(1); - street_size += ::pb_jelly::varint::serialized_length(l as u64); - street_size += l; - } - size += street_size; - let mut city_size = 0; - if self.city != <::compact_str::CompactStr as ::std::default::Default>::default() { - let val = &self.city; - let l = ::pb_jelly::Message::compute_size(val); - city_size += ::pb_jelly::wire_format::serialized_length(2); - city_size += ::pb_jelly::varint::serialized_length(l as u64); - city_size += l; - } - size += city_size; - size - } - fn compute_grpc_slices_size(&self) -> usize { - let mut size = 0; - if self.street != <::compact_str::CompactStr as ::std::default::Default>::default() { - let val = &self.street; - size += ::pb_jelly::Message::compute_grpc_slices_size(val); - } - if self.city != <::compact_str::CompactStr as ::std::default::Default>::default() { - let val = &self.city; - size += ::pb_jelly::Message::compute_grpc_slices_size(val); - } - size - } - fn serialize(&self, w: &mut W) -> ::std::io::Result<()> { - if self.street != <::compact_str::CompactStr as ::std::default::Default>::default() { - let val = &self.street; - ::pb_jelly::wire_format::write(1, ::pb_jelly::wire_format::Type::LengthDelimited, w)?; - let l = ::pb_jelly::Message::compute_size(val); - ::pb_jelly::varint::write(l as u64, w)?; - ::pb_jelly::Message::serialize(val, w)?; - } - if self.city != <::compact_str::CompactStr as ::std::default::Default>::default() { - let val = &self.city; - ::pb_jelly::wire_format::write(2, ::pb_jelly::wire_format::Type::LengthDelimited, w)?; - let l = ::pb_jelly::Message::compute_size(val); - ::pb_jelly::varint::write(l as u64, w)?; - ::pb_jelly::Message::serialize(val, w)?; - } - Ok(()) - } - fn deserialize(&mut self, mut buf: &mut B) -> ::std::io::Result<()> { - while let Some((field_number, typ)) = ::pb_jelly::wire_format::read(&mut buf)? { - match field_number { - 1 => { - ::pb_jelly::ensure_wire_format(typ, ::pb_jelly::wire_format::Type::LengthDelimited, "Address", 1)?; - let len = ::pb_jelly::varint::ensure_read(&mut buf)?; - let mut next = ::pb_jelly::ensure_split(buf, len as usize)?; - let mut val: ::compact_str::CompactStr = ::std::default::Default::default(); - ::pb_jelly::Message::deserialize(&mut val, &mut next)?; - self.street = val; - } - 2 => { - ::pb_jelly::ensure_wire_format(typ, ::pb_jelly::wire_format::Type::LengthDelimited, "Address", 2)?; - let len = ::pb_jelly::varint::ensure_read(&mut buf)?; - let mut next = ::pb_jelly::ensure_split(buf, len as usize)?; - let mut val: ::compact_str::CompactStr = ::std::default::Default::default(); - ::pb_jelly::Message::deserialize(&mut val, &mut next)?; - self.city = val; - } - _ => { - ::pb_jelly::skip(typ, &mut buf)?; - } - } - } - Ok(()) - } -} -impl ::pb_jelly::Reflection for Address { - fn which_one_of(&self, oneof_name: &str) -> ::std::option::Option<&'static str> { - match oneof_name { - _ => { - panic!("unknown oneof name given"); - } - } - } - fn get_field_mut(&mut self, field_name: &str) -> ::pb_jelly::reflection::FieldMut<'_> { - match field_name { - "street" => { - ::pb_jelly::reflection::FieldMut::Value(&mut self.street) - } - "city" => { - ::pb_jelly::reflection::FieldMut::Value(&mut self.city) - } - _ => { - panic!("unknown field name given") - } - } - } -} - -#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] -pub struct User { - pub name: ::compact_str::CompactStr, - pub age: u32, - pub address: ::std::option::Option
, -} -impl ::std::default::Default for User { - fn default() -> Self { - User { - name: ::std::default::Default::default(), - age: ::std::default::Default::default(), - address: ::std::default::Default::default(), - } - } -} -lazy_static! { - pub static ref User_default: User = User::default(); -} -impl ::pb_jelly::Message for User { - fn descriptor(&self) -> ::std::option::Option<::pb_jelly::MessageDescriptor> { - Some(::pb_jelly::MessageDescriptor { - name: "User", - full_name: "basic.User", - fields: &[ - ::pb_jelly::FieldDescriptor { - name: "name", - full_name: "basic.User.name", - index: 0, - number: 1, - typ: ::pb_jelly::wire_format::Type::LengthDelimited, - label: ::pb_jelly::Label::Optional, - oneof_index: None, - }, - ::pb_jelly::FieldDescriptor { - name: "age", - full_name: "basic.User.age", - index: 1, - number: 2, - typ: ::pb_jelly::wire_format::Type::Varint, - label: ::pb_jelly::Label::Optional, - oneof_index: None, - }, - ::pb_jelly::FieldDescriptor { - name: "address", - full_name: "basic.User.address", - index: 2, - number: 3, - typ: ::pb_jelly::wire_format::Type::LengthDelimited, - label: ::pb_jelly::Label::Optional, - oneof_index: None, - }, - ], - oneofs: &[ - ], - }) - } - fn compute_size(&self) -> usize { - let mut size = 0; - let mut name_size = 0; - if self.name != <::compact_str::CompactStr as ::std::default::Default>::default() { - let val = &self.name; - let l = ::pb_jelly::Message::compute_size(val); - name_size += ::pb_jelly::wire_format::serialized_length(1); - name_size += ::pb_jelly::varint::serialized_length(l as u64); - name_size += l; - } - size += name_size; - let mut age_size = 0; - if self.age != ::default() { - let val = &self.age; - let l = ::pb_jelly::Message::compute_size(val); - age_size += ::pb_jelly::wire_format::serialized_length(2); - age_size += l; - } - size += age_size; - let mut address_size = 0; - for val in &self.address { - let l = ::pb_jelly::Message::compute_size(val); - address_size += ::pb_jelly::wire_format::serialized_length(3); - address_size += ::pb_jelly::varint::serialized_length(l as u64); - address_size += l; - } - size += address_size; - size - } - fn compute_grpc_slices_size(&self) -> usize { - let mut size = 0; - if self.name != <::compact_str::CompactStr as ::std::default::Default>::default() { - let val = &self.name; - size += ::pb_jelly::Message::compute_grpc_slices_size(val); - } - if self.age != ::default() { - let val = &self.age; - size += ::pb_jelly::Message::compute_grpc_slices_size(val); - } - for val in &self.address { - size += ::pb_jelly::Message::compute_grpc_slices_size(val); - } - size - } - fn serialize(&self, w: &mut W) -> ::std::io::Result<()> { - if self.name != <::compact_str::CompactStr as ::std::default::Default>::default() { - let val = &self.name; - ::pb_jelly::wire_format::write(1, ::pb_jelly::wire_format::Type::LengthDelimited, w)?; - let l = ::pb_jelly::Message::compute_size(val); - ::pb_jelly::varint::write(l as u64, w)?; - ::pb_jelly::Message::serialize(val, w)?; - } - if self.age != ::default() { - let val = &self.age; - ::pb_jelly::wire_format::write(2, ::pb_jelly::wire_format::Type::Varint, w)?; - ::pb_jelly::Message::serialize(val, w)?; - } - for val in &self.address { - ::pb_jelly::wire_format::write(3, ::pb_jelly::wire_format::Type::LengthDelimited, w)?; - let l = ::pb_jelly::Message::compute_size(val); - ::pb_jelly::varint::write(l as u64, w)?; - ::pb_jelly::Message::serialize(val, w)?; - } - Ok(()) - } - fn deserialize(&mut self, mut buf: &mut B) -> ::std::io::Result<()> { - while let Some((field_number, typ)) = ::pb_jelly::wire_format::read(&mut buf)? { - match field_number { - 1 => { - ::pb_jelly::ensure_wire_format(typ, ::pb_jelly::wire_format::Type::LengthDelimited, "User", 1)?; - let len = ::pb_jelly::varint::ensure_read(&mut buf)?; - let mut next = ::pb_jelly::ensure_split(buf, len as usize)?; - let mut val: ::compact_str::CompactStr = ::std::default::Default::default(); - ::pb_jelly::Message::deserialize(&mut val, &mut next)?; - self.name = val; - } - 2 => { - ::pb_jelly::ensure_wire_format(typ, ::pb_jelly::wire_format::Type::Varint, "User", 2)?; - let mut val: u32 = ::std::default::Default::default(); - ::pb_jelly::Message::deserialize(&mut val, buf)?; - self.age = val; - } - 3 => { - ::pb_jelly::ensure_wire_format(typ, ::pb_jelly::wire_format::Type::LengthDelimited, "User", 3)?; - let len = ::pb_jelly::varint::ensure_read(&mut buf)?; - let mut next = ::pb_jelly::ensure_split(buf, len as usize)?; - let mut val: Address = ::std::default::Default::default(); - ::pb_jelly::Message::deserialize(&mut val, &mut next)?; - self.address = Some(val); - } - _ => { - ::pb_jelly::skip(typ, &mut buf)?; - } - } - } - Ok(()) - } -} -impl ::pb_jelly::Reflection for User { - fn which_one_of(&self, oneof_name: &str) -> ::std::option::Option<&'static str> { - match oneof_name { - _ => { - panic!("unknown oneof name given"); - } - } - } - fn get_field_mut(&mut self, field_name: &str) -> ::pb_jelly::reflection::FieldMut<'_> { - match field_name { - "name" => { - ::pb_jelly::reflection::FieldMut::Value(&mut self.name) - } - "age" => { - ::pb_jelly::reflection::FieldMut::Value(&mut self.age) - } - "address" => { - ::pb_jelly::reflection::FieldMut::Value(self.address.get_or_insert_with(::std::default::Default::default)) - } - _ => { - panic!("unknown field name given") - } - } - } -} - diff --git a/examples/pb-jelly/protos/gen/proto_user/src/lib.rs b/examples/pb-jelly/protos/gen/proto_user/src/lib.rs deleted file mode 100644 index 707043d8..00000000 --- a/examples/pb-jelly/protos/gen/proto_user/src/lib.rs +++ /dev/null @@ -1,27 +0,0 @@ -// @generated, do not edit - -#![warn(rust_2018_idioms)] -#![allow(irrefutable_let_patterns)] -#![allow(non_camel_case_types)] -#![allow(non_snake_case)] -#![allow(non_upper_case_globals)] -#![allow(unused_imports)] -#![allow(unused_variables)] -#![allow(irrefutable_let_patterns)] -#![allow(rustdoc::broken_intra_doc_links)] - -// Modules are generated based on the naming conventions of protobuf, which might cause "module inception" -#![allow(clippy::module_inception)] -// This is all generated code, so "manually" implementing derivable impls is okay -#![allow(clippy::derivable_impls)] -// For enums with many variants, the matches!(...) macro isn't obviously better -#![allow(clippy::match_like_matches_macro)] -// TODO: Ideally we don't allow this -#![allow(clippy::option_as_ref_deref)] -// TODO: Ideally we don't allow this -#![allow(clippy::match_single_binding)] - -#[macro_use] -extern crate lazy_static; - -pub mod basic; diff --git a/examples/pb-jelly/protos/user/basic.proto b/examples/pb-jelly/protos/user/basic.proto deleted file mode 100644 index 57a45c8f..00000000 --- a/examples/pb-jelly/protos/user/basic.proto +++ /dev/null @@ -1,16 +0,0 @@ -syntax = "proto3"; -package basic; - -// rust/extensions.proto is included by default -import "rust/extensions.proto"; - -message Address { - string street = 1 [(rust.type)="::compact_str::CompactStr"]; - string city = 2 [(rust.type)="::compact_str::CompactStr"]; -} - -message User { - string name = 1 [(rust.type)="::compact_str::CompactStr"]; - uint32 age = 2; - Address address = 3; -} diff --git a/examples/pb-jelly/src/main.rs b/examples/pb-jelly/src/main.rs deleted file mode 100644 index 203fa72d..00000000 --- a/examples/pb-jelly/src/main.rs +++ /dev/null @@ -1,22 +0,0 @@ -use compact_str::CompactStr; -use pb_jelly::Message; -use proto_user::basic::{ - Address, - User, -}; - -fn main() { - let user = User { - name: CompactStr::new_inline("John"), - age: 42, - address: Some(Address { - street: "432 Park Ave".into(), - city: "New York City".into(), - }), - }; - let bytes = user.serialize_to_vec(); - - let roundtrip_user = User::deserialize_from_slice(&bytes).unwrap(); - println!("{:#?}", roundtrip_user); - assert_eq!(user, roundtrip_user); -} From 6c42dc617e05c0ad47ce7ea119a1574952a6ba54 Mon Sep 17 00:00:00 2001 From: Parker Timmerman Date: Sat, 8 Jan 2022 13:58:03 -0500 Subject: [PATCH 08/18] add bytes example --- Cargo.toml | 2 +- examples/bytes/Cargo.toml | 10 ++++++++++ examples/bytes/src/main.rs | 15 +++++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 examples/bytes/Cargo.toml create mode 100644 examples/bytes/src/main.rs diff --git a/Cargo.toml b/Cargo.toml index d934e31f..c2da6b3c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,2 +1,2 @@ [workspace] -members = ["examples/serde", "compact_str", "tracing_alloc"] +members = ["examples/bytes", "examples/serde", "compact_str", "tracing_alloc"] diff --git a/examples/bytes/Cargo.toml b/examples/bytes/Cargo.toml new file mode 100644 index 00000000..91f5fd11 --- /dev/null +++ b/examples/bytes/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "bytes" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +bytes = "1" +compact_str = { path = "../../compact_str", features = ["bytes"] } diff --git a/examples/bytes/src/main.rs b/examples/bytes/src/main.rs new file mode 100644 index 00000000..8f26f1b8 --- /dev/null +++ b/examples/bytes/src/main.rs @@ -0,0 +1,15 @@ +use compact_str::CompactStr; +use std::io::Cursor; + +fn main() { + let word = "hello world!"; + + // Cursor<&[u8]> is `bytes::Buf` + let mut buf = Cursor::new(word.as_bytes()); + // `from_utf8_buf(...)` can fail, if the provided buffer is not valid UTF-8 + let compact_str = CompactStr::from_utf8_buf(&mut buf).expect("valid utf-8"); + + assert_eq!(compact_str, word); + + println!("{}", compact_str); +} From 939017977ad48767e935d64a867e50705f6f7fd0 Mon Sep 17 00:00:00 2001 From: Parker Timmerman Date: Sat, 8 Jan 2022 13:58:50 -0500 Subject: [PATCH 09/18] add bytes example to CI workflow --- .github/workflows/ci.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cbe8fc3f..7af55c4e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -78,6 +78,21 @@ jobs: run: | cargo miri test + example-bytes: + name: example - bytes + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: nightly + override: true + - uses: actions-rs/cargo@v1 + with: + command: run + args: --manifest-path examples/bytes/Cargo.toml + example-serde: name: example - serde runs-on: ubuntu-latest From 87a611e7793d96c1a6ababb6804c3e4fb001d122 Mon Sep 17 00:00:00 2001 From: Parker Timmerman Date: Sat, 8 Jan 2022 14:06:29 -0500 Subject: [PATCH 10/18] provide from_utf8_buf_unchecked API --- compact_str/src/features/bytes.rs | 7 +++++++ compact_str/src/repr/bytes.rs | 34 +++++++++++++++++++------------ 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/compact_str/src/features/bytes.rs b/compact_str/src/features/bytes.rs index d7a3c70a..25cff988 100644 --- a/compact_str/src/features/bytes.rs +++ b/compact_str/src/features/bytes.rs @@ -12,6 +12,13 @@ impl CompactStr { pub fn from_utf8_buf(buf: &mut B) -> Result { Repr::from_utf8_buf(buf).map(|repr| CompactStr { repr }) } + + /// Converts a buffer of bytes to a `CompactStr`, without checking that the provided buffer is + /// valid UTF-8. + pub unsafe fn from_utf8_buf_unchecked(buf: &mut B) -> Self { + let repr = Repr::from_utf8_buf_unchecked(buf); + CompactStr { repr } + } } #[cfg(test)] diff --git a/compact_str/src/repr/bytes.rs b/compact_str/src/repr/bytes.rs index 4dc0c546..675b94fa 100644 --- a/compact_str/src/repr/bytes.rs +++ b/compact_str/src/repr/bytes.rs @@ -15,8 +15,22 @@ const DEFAULT_TEXT: &str = "000000000000000000000000"; const DEFAULT_PACKED: Repr = Repr::new_const(DEFAULT_TEXT); impl Repr { - /// Converts a buffer of bytes to a `Repr` + /// Converts a buffer of bytes to a `Repr`, pub fn from_utf8_buf(buf: &mut B) -> Result { + // SAFETY: We check below to make sure the provided buffer is valid UTF-8 + let repr = unsafe { Self::from_utf8_buf_unchecked(buf) }; + + // Check to make sure the provided bytes are valid UTF-8, return the Repr if they are! + // + // TODO: Add an `as_slice()` method to Repr and refactor this call + match core::str::from_utf8(repr.as_str().as_bytes()) { + Ok(_) => Ok(repr), + Err(e) => Err(e), + } + } + + /// Converts a buffer of bytes to a `Repr`, without checking for valid UTF-8 + pub unsafe fn from_utf8_buf_unchecked(buf: &mut B) -> Self { let size = buf.remaining(); let chunk = buf.chunk(); @@ -24,7 +38,7 @@ impl Repr { if chunk.is_empty() { // If the chunk is empty, then we should have 0 remaining bytes debug_assert_eq!(size, 0); - return Ok(super::EMPTY); + return super::EMPTY; } let first_byte = buf.chunk()[0]; @@ -46,23 +60,17 @@ impl Repr { default }; - // SAFETY: Before returning this Repr we check to make sure the provided bytes are valid - // UTF-8 - let slice = unsafe { repr.as_mut_slice() }; + // SAFETY: The caller is responsible for making sure the provided buffer is UTF-8. This + // invariant is documented in the public API + let slice = repr.as_mut_slice(); // Copy the bytes from the buffer into our Repr! buf.copy_to_slice(&mut slice[..size]); // Set the length of the Repr // SAFETY: We just wrote `size` bytes into the Repr - unsafe { repr.set_len(size) }; + repr.set_len(size); - // Check to make sure the provided bytes are valid UTF-8, return the Repr if they are! - // - // TODO: Add an `as_slice()` method to Repr and refactor this call - match core::str::from_utf8(repr.as_str().as_bytes()) { - Ok(_) => Ok(repr), - Err(e) => Err(e), - } + repr } } From 891ec917d2d2a13d91513cd2d810d54c6c971975 Mon Sep 17 00:00:00 2001 From: Parker Timmerman Date: Sat, 8 Jan 2022 14:20:57 -0500 Subject: [PATCH 11/18] add doc tests for the bytes APIs --- compact_str/src/features/bytes.rs | 45 +++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/compact_str/src/features/bytes.rs b/compact_str/src/features/bytes.rs index 25cff988..c102ae11 100644 --- a/compact_str/src/features/bytes.rs +++ b/compact_str/src/features/bytes.rs @@ -9,12 +9,57 @@ use crate::{ impl CompactStr { /// Converts a buffer of bytes to a `CompactStr` + /// + /// # Examples + /// ### Basic usage + /// ``` + /// # use compact_str::CompactStr; + /// # use std::collections::VecDeque; + /// + /// // `bytes::Buf` is implemented for `VecDeque` + /// let mut sparkle_heart = VecDeque::from([240, 159, 146, 150]); + /// // We know these bytes are valid, so we can `.unwrap()` or `.expect(...)` here + /// let compact_str = CompactStr::from_utf8_buf(&mut sparkle_heart).expect("valid utf-8"); + /// + /// assert_eq!(compact_str, "💖"); + /// ``` + /// + /// ### With invalid/non-UTF8 bytes + /// ``` + /// # use compact_str::CompactStr; + /// # use std::io; + /// + /// // `bytes::Buf` is implemented for `std::io::Cursor<&[u8]>` + /// let mut invalid = io::Cursor::new(&[0, 159]); + /// + /// // The provided buffer is invalid, so trying to create a `ComapctStr` will fail + /// assert!(CompactStr::from_utf8_buf(&mut invalid).is_err()); + /// ``` pub fn from_utf8_buf(buf: &mut B) -> Result { Repr::from_utf8_buf(buf).map(|repr| CompactStr { repr }) } /// Converts a buffer of bytes to a `CompactStr`, without checking that the provided buffer is /// valid UTF-8. + /// + /// # Saftey + /// This function is unsafe because it does not check that the provided bytes are valid UTF-8. If + /// this constraint is violated, it may cause memory unsafety issues with futures uses of the + /// `ComapctStr`, as the rest of the library assumes that `CompactStr`s are valid UTF-8 + /// + /// # Examples + /// ``` + /// # use compact_str::CompactStr; + /// use std::io::Cursor; + /// + /// let word = "hello world"; + /// // `bytes::Buf` is implemented for `Cursor<&[u8]>` + /// let mut buffer = Cursor::new(word.as_bytes()); + /// + /// let compact_str = unsafe { CompactStr::from_utf8_buf_unchecked(&mut buffer) }; + /// + /// assert_eq!(compact_str, word); + /// ``` pub unsafe fn from_utf8_buf_unchecked(buf: &mut B) -> Self { let repr = Repr::from_utf8_buf_unchecked(buf); CompactStr { repr } From dd94227df65bb8b5c152a85a9637a130e648f23c Mon Sep 17 00:00:00 2001 From: Parker Timmerman Date: Sat, 8 Jan 2022 14:28:54 -0500 Subject: [PATCH 12/18] fix TODO: add as_slice() method to Repr --- compact_str/src/repr/bytes.rs | 7 ++++--- compact_str/src/repr/heap/arc.rs | 9 ++++++--- compact_str/src/repr/inline.rs | 10 ++++++---- compact_str/src/repr/mod.rs | 14 ++++++++++++++ compact_str/src/repr/packed.rs | 7 ++++++- 5 files changed, 36 insertions(+), 11 deletions(-) diff --git a/compact_str/src/repr/bytes.rs b/compact_str/src/repr/bytes.rs index 675b94fa..fb552e31 100644 --- a/compact_str/src/repr/bytes.rs +++ b/compact_str/src/repr/bytes.rs @@ -21,15 +21,16 @@ impl Repr { let repr = unsafe { Self::from_utf8_buf_unchecked(buf) }; // Check to make sure the provided bytes are valid UTF-8, return the Repr if they are! - // - // TODO: Add an `as_slice()` method to Repr and refactor this call - match core::str::from_utf8(repr.as_str().as_bytes()) { + match core::str::from_utf8(repr.as_slice()) { Ok(_) => Ok(repr), Err(e) => Err(e), } } /// Converts a buffer of bytes to a `Repr`, without checking for valid UTF-8 + /// + /// # Safety + /// The provided buffer must be valid UTF-8 pub unsafe fn from_utf8_buf_unchecked(buf: &mut B) -> Self { let size = buf.remaining(); let chunk = buf.chunk(); diff --git a/compact_str/src/repr/heap/arc.rs b/compact_str/src/repr/heap/arc.rs index 6e3ed03c..949b6f8c 100644 --- a/compact_str/src/repr/heap/arc.rs +++ b/compact_str/src/repr/heap/arc.rs @@ -56,11 +56,14 @@ impl ArcString { #[inline] pub fn as_str(&self) -> &str { - let buffer = self.inner().as_bytes(); - // SAFETY: The only way you can construct an `ArcString` is via a `&str` so it must be valid // UTF-8, or the caller has manually made those guarantees - unsafe { str::from_utf8_unchecked(&buffer[..self.len]) } + unsafe { str::from_utf8_unchecked(self.as_slice()) } + } + + #[inline(always)] + pub fn as_slice(&self) -> &[u8] { + &self.inner().as_bytes()[..self.len] } #[inline] diff --git a/compact_str/src/repr/inline.rs b/compact_str/src/repr/inline.rs index 5283eddb..5f9741f7 100644 --- a/compact_str/src/repr/inline.rs +++ b/compact_str/src/repr/inline.rs @@ -81,11 +81,13 @@ impl InlineString { #[inline] pub fn as_str(&self) -> &str { - let len = self.len(); - let slice = &self.buffer[..len]; - // SAFETY: You can only construct an InlineString via a &str - unsafe { ::std::str::from_utf8_unchecked(slice) } + unsafe { ::std::str::from_utf8_unchecked(self.as_slice()) } + } + + #[inline(always)] + pub fn as_slice(&self) -> &[u8] { + &self.buffer[..self.len()] } /// Provides a mutable reference to the underlying buffer diff --git a/compact_str/src/repr/mod.rs b/compact_str/src/repr/mod.rs index e17c71a1..44e53562 100644 --- a/compact_str/src/repr/mod.rs +++ b/compact_str/src/repr/mod.rs @@ -116,6 +116,11 @@ impl Repr { self.cast().into_str() } + #[inline] + pub fn as_slice(&self) -> &[u8] { + self.cast().into_slice() + } + #[inline] pub unsafe fn as_mut_slice(&mut self) -> &mut [u8] { self.cast_mut().into_mut_slice() @@ -231,6 +236,15 @@ impl<'a> StrongRepr<'a> { Self::Heap(heap) => heap.string.as_str(), } } + + #[inline] + pub fn into_slice(&self) -> &'a [u8] { + match self { + Self::Inline(inline) => inline.as_slice(), + Self::Packed(packed) => packed.as_slice(), + Self::Heap(heap) => heap.string.as_slice(), + } + } } #[derive(Debug)] diff --git a/compact_str/src/repr/packed.rs b/compact_str/src/repr/packed.rs index 7380c939..cff2a970 100644 --- a/compact_str/src/repr/packed.rs +++ b/compact_str/src/repr/packed.rs @@ -60,7 +60,12 @@ impl PackedString { #[inline] pub fn as_str(&self) -> &str { // SAFETY: You can only construct a PackedString via a &str - unsafe { ::std::str::from_utf8_unchecked(&self.buffer) } + unsafe { ::std::str::from_utf8_unchecked(self.as_slice()) } + } + + #[inline(always)] + pub fn as_slice(&self) -> &[u8] { + &self.buffer[..] } /// Provides a mutable reference to the underlying buffer From f486a9f28c836efea7dfd0832886984c043d87a0 Mon Sep 17 00:00:00 2001 From: Parker Timmerman Date: Sat, 8 Jan 2022 14:30:23 -0500 Subject: [PATCH 13/18] clippy fixes --- compact_str/src/features/bytes.rs | 8 ++++---- compact_str/src/repr/mod.rs | 2 +- examples/bytes/src/main.rs | 3 ++- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/compact_str/src/features/bytes.rs b/compact_str/src/features/bytes.rs index c102ae11..eb6fd447 100644 --- a/compact_str/src/features/bytes.rs +++ b/compact_str/src/features/bytes.rs @@ -42,10 +42,10 @@ impl CompactStr { /// Converts a buffer of bytes to a `CompactStr`, without checking that the provided buffer is /// valid UTF-8. /// - /// # Saftey - /// This function is unsafe because it does not check that the provided bytes are valid UTF-8. If - /// this constraint is violated, it may cause memory unsafety issues with futures uses of the - /// `ComapctStr`, as the rest of the library assumes that `CompactStr`s are valid UTF-8 + /// # Safety + /// This function is unsafe because it does not check that the provided bytes are valid UTF-8. + /// If this constraint is violated, it may cause memory unsafety issues with futures uses of + /// the `ComapctStr`, as the rest of the library assumes that `CompactStr`s are valid UTF-8 /// /// # Examples /// ``` diff --git a/compact_str/src/repr/mod.rs b/compact_str/src/repr/mod.rs index 44e53562..6e7ef784 100644 --- a/compact_str/src/repr/mod.rs +++ b/compact_str/src/repr/mod.rs @@ -238,7 +238,7 @@ impl<'a> StrongRepr<'a> { } #[inline] - pub fn into_slice(&self) -> &'a [u8] { + pub fn into_slice(self) -> &'a [u8] { match self { Self::Inline(inline) => inline.as_slice(), Self::Packed(packed) => packed.as_slice(), diff --git a/examples/bytes/src/main.rs b/examples/bytes/src/main.rs index 8f26f1b8..7c27c186 100644 --- a/examples/bytes/src/main.rs +++ b/examples/bytes/src/main.rs @@ -1,6 +1,7 @@ -use compact_str::CompactStr; use std::io::Cursor; +use compact_str::CompactStr; + fn main() { let word = "hello world!"; From 33d1bddef766b3e2d3549fbc82bce36a0f07c79c Mon Sep 17 00:00:00 2001 From: Parker Timmerman Date: Sat, 8 Jan 2022 14:31:23 -0500 Subject: [PATCH 14/18] fix edition issue in bytes example --- examples/bytes/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/bytes/Cargo.toml b/examples/bytes/Cargo.toml index 91f5fd11..41e87d52 100644 --- a/examples/bytes/Cargo.toml +++ b/examples/bytes/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "bytes" version = "0.1.0" -edition = "2021" +edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html From ce680c7847ec6a17f5bb42f43181bdd107461d0a Mon Sep 17 00:00:00 2001 From: Parker Timmerman Date: Sat, 8 Jan 2022 17:34:18 -0500 Subject: [PATCH 15/18] expose as_slice(...) on CompactStr --- compact_str/src/lib.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/compact_str/src/lib.rs b/compact_str/src/lib.rs index 01364325..30967d27 100644 --- a/compact_str/src/lib.rs +++ b/compact_str/src/lib.rs @@ -133,6 +133,11 @@ impl CompactStr { self.repr.as_str() } + #[inline] + pub fn as_slice(&self) -> &[u8] { + self.repr.as_slice() + } + // TODO: Implement a `try_as_mut_slice(...)` that will fail if it results in cloning? // /// Provides a mutable reference to the underlying buffer of bytes. From 852801aa24ad6060c09e1b291a1f734c9a1ca493 Mon Sep 17 00:00:00 2001 From: Parker Timmerman Date: Sat, 8 Jan 2022 18:12:52 -0500 Subject: [PATCH 16/18] fix MSRV issue in doc tests --- compact_str/src/features/bytes.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compact_str/src/features/bytes.rs b/compact_str/src/features/bytes.rs index eb6fd447..ca0fa551 100644 --- a/compact_str/src/features/bytes.rs +++ b/compact_str/src/features/bytes.rs @@ -17,7 +17,7 @@ impl CompactStr { /// # use std::collections::VecDeque; /// /// // `bytes::Buf` is implemented for `VecDeque` - /// let mut sparkle_heart = VecDeque::from([240, 159, 146, 150]); + /// let mut sparkle_heart = VecDeque::from(vec![240, 159, 146, 150]); /// // We know these bytes are valid, so we can `.unwrap()` or `.expect(...)` here /// let compact_str = CompactStr::from_utf8_buf(&mut sparkle_heart).expect("valid utf-8"); /// From 18bf8adcf6186f5f16c8435f1bca194adf10e418 Mon Sep 17 00:00:00 2001 From: Parker Timmerman Date: Sat, 8 Jan 2022 18:14:27 -0500 Subject: [PATCH 17/18] small doc comment update --- compact_str/src/features/bytes.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/compact_str/src/features/bytes.rs b/compact_str/src/features/bytes.rs index ca0fa551..6b1478a3 100644 --- a/compact_str/src/features/bytes.rs +++ b/compact_str/src/features/bytes.rs @@ -50,12 +50,11 @@ impl CompactStr { /// # Examples /// ``` /// # use compact_str::CompactStr; - /// use std::io::Cursor; + /// # use std::io; /// /// let word = "hello world"; - /// // `bytes::Buf` is implemented for `Cursor<&[u8]>` - /// let mut buffer = Cursor::new(word.as_bytes()); - /// + /// // `bytes::Buf` is implemented for `std::io::Cursor<&[u8]>` + /// let mut buffer = io::Cursor::new(word.as_bytes()); /// let compact_str = unsafe { CompactStr::from_utf8_buf_unchecked(&mut buffer) }; /// /// assert_eq!(compact_str, word); From 3ff1bea0ba455b8e1a3f0fcb3038325a63655cab Mon Sep 17 00:00:00 2001 From: Parker Timmerman Date: Sat, 8 Jan 2022 18:55:16 -0500 Subject: [PATCH 18/18] reduce the number of runs in alloc test --- compact_str/tests/alloc.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compact_str/tests/alloc.rs b/compact_str/tests/alloc.rs index 1ebcb127..1dcca489 100644 --- a/compact_str/tests/alloc.rs +++ b/compact_str/tests/alloc.rs @@ -22,8 +22,8 @@ fn test_randomized_allocations() { eprintln!("using seed: {}_u64", seed); let mut rng = StdRng::seed_from_u64(seed); - // generate a list of up to 10,000 words, with each word being up to 100 characters long - let num_words = rng.gen_range(0..10_000); + // generate a list of up to 1,000 words, with each word being up to 100 characters long + let num_words = rng.gen_range(0..1_000); let words: Vec = (0..num_words) .map(|_| { let len = rng.gen_range(0..100);