diff --git a/fuzz/generate_corpus.rs b/fuzz/generate_corpus.rs index 06b16f02..fe5d70b0 100644 --- a/fuzz/generate_corpus.rs +++ b/fuzz/generate_corpus.rs @@ -1,4 +1,4 @@ -use bson::{doc, Bson, Decimal128}; +use bson::{cstr, doc, Bson, Decimal128}; use std::{ fs, io::{Error, ErrorKind}, @@ -64,7 +64,7 @@ fn generate_type_marker_cases(dir: &Path) -> std::io::Result<()> { "bool": true, "date": bson::DateTime::now(), "null": Bson::Null, - "regex": Bson::RegularExpression(bson::Regex { pattern: "pattern".into(), options: "i".into() }), + "regex": Bson::RegularExpression(bson::Regex { pattern: cstr!("pattern").into(), options: cstr!("i").into() }), "int32": 123i32, "timestamp": bson::Timestamp { time: 12345, increment: 1 }, "int64": 123i64, diff --git a/serde-tests/json.rs b/serde-tests/json.rs index 7d6d3b2d..8b12f014 100644 --- a/serde-tests/json.rs +++ b/serde-tests/json.rs @@ -3,7 +3,7 @@ use serde_json::json; use super::AllTypes; -use bson::{doc, Bson, JavaScriptCodeWithScope, RawArrayBuf, RawBson, RawDocumentBuf}; +use bson::{cstr, doc, Bson, JavaScriptCodeWithScope, RawArrayBuf, RawBson, RawDocumentBuf}; use serde::{Deserialize, Serialize}; @@ -99,18 +99,18 @@ fn owned_raw_bson() { }); let mut doc_buf = RawDocumentBuf::new(); - doc_buf.append("a", "key").unwrap(); - doc_buf.append("number", 12).unwrap(); - doc_buf.append("bool", false).unwrap(); - doc_buf.append("nu", RawBson::Null).unwrap(); + doc_buf.append(cstr!("a"), "key"); + doc_buf.append(cstr!("number"), 12); + doc_buf.append(cstr!("bool"), false); + doc_buf.append(cstr!("nu"), RawBson::Null); let mut array_buf = RawArrayBuf::new(); - array_buf.push(1).unwrap(); - array_buf.push("string").unwrap(); + array_buf.push(1); + array_buf.push("string"); let mut bson_doc = RawDocumentBuf::new(); - bson_doc.append("first", true).unwrap(); - bson_doc.append("second", "string").unwrap(); + bson_doc.append(cstr!("first"), true); + bson_doc.append(cstr!("second"), "string"); let expected = Foo { doc_buf, diff --git a/serde-tests/test.rs b/serde-tests/test.rs index 9fbafdc4..00fe8f1c 100644 --- a/serde-tests/test.rs +++ b/serde-tests/test.rs @@ -18,6 +18,7 @@ use std::{ }; use bson::{ + cstr, doc, oid::ObjectId, spec::BinarySubtype, @@ -835,8 +836,8 @@ fn raw_regex() { let bytes = bson::serialize_to_vec(&doc! { "r": Regex { - pattern: "a[b-c]d".to_string(), - options: "ab".to_string(), + pattern: cstr!("a[b-c]d").into(), + options: cstr!("ab").into(), }, }) .expect("raw_regex"); @@ -927,8 +928,8 @@ impl AllTypes { }; let date = DateTime::now(); let regex = Regex { - pattern: "hello".to_string(), - options: "x".to_string(), + pattern: cstr!("hello").into(), + options: cstr!("x").into(), }; let timestamp = Timestamp { time: 123, @@ -1058,8 +1059,8 @@ fn all_raw_types_rmp() { scope: doc! { "x": 1 }, }, "regex": Regex { - pattern: "pattern".to_string(), - options: "opt".to_string() + pattern: cstr!("pattern").into(), + options: cstr!("opt").into() } }) .unwrap(); @@ -1254,24 +1255,22 @@ fn owned_raw_types() { let f = Foo { subdoc: RawDocumentBuf::from_iter([ - ("a key", RawBson::String("a value".to_string())), - ("an objectid", RawBson::ObjectId(oid)), - ("a date", RawBson::DateTime(dt)), + (cstr!("a key"), RawBson::String("a value".to_string())), + (cstr!("an objectid"), RawBson::ObjectId(oid)), + (cstr!("a date"), RawBson::DateTime(dt)), ( - "code_w_scope", + cstr!("code_w_scope"), RawBson::JavaScriptCodeWithScope(raw_code_w_scope.clone()), ), - ("decimal128", RawBson::Decimal128(d128)), - ]) - .unwrap(), + (cstr!("decimal128"), RawBson::Decimal128(d128)), + ]), array: RawArrayBuf::from_iter([ RawBson::String("a string".to_string()), RawBson::ObjectId(oid), RawBson::DateTime(dt), RawBson::JavaScriptCodeWithScope(raw_code_w_scope), RawBson::Decimal128(d128), - ]) - .unwrap(), + ]), }; let expected = doc! { diff --git a/src/bson.rs b/src/bson.rs index 8f35899d..24cbb031 100644 --- a/src/bson.rs +++ b/src/bson.rs @@ -31,7 +31,7 @@ use std::{ use serde_json::{json, Value}; pub use crate::document::Document; -use crate::{base64, oid, spec::ElementType, Binary, Decimal128}; +use crate::{base64, oid, raw::CString, spec::ElementType, Binary, Decimal128}; /// Possible BSON value types. #[derive(Clone, Default, PartialEq)] @@ -268,6 +268,12 @@ impl From for Bson { } } +impl From for Bson { + fn from(a: crate::raw::CString) -> Bson { + Bson::String(a.into_string()) + } +} + impl From for Bson { fn from(a: Document) -> Bson { Bson::Document(a) @@ -480,14 +486,14 @@ impl Bson { Bson::Boolean(v) => json!(v), Bson::Null => Value::Null, Bson::RegularExpression(Regex { pattern, options }) => { - let mut chars: Vec<_> = options.chars().collect(); + let mut chars: Vec<_> = options.as_str().chars().collect(); chars.sort_unstable(); let options: String = chars.into_iter().collect(); json!({ "$regularExpression": { - "pattern": pattern, + "pattern": pattern.into_string(), "options": options, } }) @@ -619,7 +625,7 @@ impl Bson { ref pattern, ref options, }) => { - let mut chars: Vec<_> = options.chars().collect(); + let mut chars: Vec<_> = options.as_str().chars().collect(); chars.sort_unstable(); let options: String = chars.into_iter().collect(); @@ -842,7 +848,9 @@ impl Bson { if let Ok(regex) = doc.get_document("$regularExpression") { if let Ok(pattern) = regex.get_str("pattern") { if let Ok(options) = regex.get_str("options") { - return Bson::RegularExpression(Regex::new(pattern, options)); + if let Ok(regex) = Regex::from_strings(pattern, options) { + return Bson::RegularExpression(regex); + } } } } @@ -1117,7 +1125,7 @@ impl Timestamp { #[derive(Debug, Clone, Eq, PartialEq, Hash)] pub struct Regex { /// The regex pattern to match. - pub pattern: String, + pub pattern: CString, /// The options for the regex. /// @@ -1126,18 +1134,22 @@ pub struct Regex { /// multiline matching, 'x' for verbose mode, 'l' to make \w, \W, etc. locale dependent, /// 's' for dotall mode ('.' matches everything), and 'u' to make \w, \W, etc. match /// unicode. - pub options: String, + pub options: CString, } impl Regex { - pub(crate) fn new(pattern: impl AsRef, options: impl AsRef) -> Self { + #[cfg(any(test, feature = "serde"))] + pub(crate) fn from_strings( + pattern: impl AsRef, + options: impl AsRef, + ) -> crate::error::Result { let mut chars: Vec<_> = options.as_ref().chars().collect(); chars.sort_unstable(); let options: String = chars.into_iter().collect(); - Self { - pattern: pattern.as_ref().to_string(), - options, - } + Ok(Self { + pattern: pattern.as_ref().to_string().try_into()?, + options: options.try_into()?, + }) } } diff --git a/src/de/raw.rs b/src/de/raw.rs index 4f809df1..d10711db 100644 --- a/src/de/raw.rs +++ b/src/de/raw.rs @@ -1306,15 +1306,15 @@ impl<'de> serde::de::Deserializer<'de> for &mut RegexAccess<'de> { RegexDeserializationStage::Pattern => { self.stage = RegexDeserializationStage::Options; match &self.re { - BsonCow::Borrowed(re) => visitor.visit_borrowed_str(re.pattern), - BsonCow::Owned(re) => visitor.visit_str(&re.pattern), + BsonCow::Borrowed(re) => visitor.visit_borrowed_str(re.pattern.as_str()), + BsonCow::Owned(re) => visitor.visit_str(re.pattern.as_str()), } } RegexDeserializationStage::Options => { self.stage = RegexDeserializationStage::Done; match &self.re { - BsonCow::Borrowed(re) => visitor.visit_borrowed_str(re.options), - BsonCow::Owned(re) => visitor.visit_str(&re.options), + BsonCow::Borrowed(re) => visitor.visit_borrowed_str(re.options.as_str()), + BsonCow::Owned(re) => visitor.visit_str(re.options.as_str()), } } RegexDeserializationStage::Done => { diff --git a/src/de/serde.rs b/src/de/serde.rs index bba415a4..6f454447 100644 --- a/src/de/serde.rs +++ b/src/de/serde.rs @@ -433,7 +433,9 @@ impl<'de> Visitor<'de> for BsonVisitor { "$regularExpression" => { let re = visitor.next_value::()?; - return Ok(Bson::RegularExpression(Regex::new(re.pattern, re.options))); + return Ok(Bson::RegularExpression( + Regex::from_strings(re.pattern, re.options).map_err(Error::custom)?, + )); } "$dbPointer" => { diff --git a/src/extjson/de.rs b/src/extjson/de.rs index f147ad43..aa78bd2e 100644 --- a/src/extjson/de.rs +++ b/src/extjson/de.rs @@ -37,6 +37,10 @@ pub enum Error { /// A general error encountered during deserialization. /// See: DeserializationError { message: String }, + + /// A generic crate error. + // TODO RUST-1406 collapse this + CrateError(crate::error::Error), } impl std::fmt::Display for Error { @@ -44,6 +48,7 @@ impl std::fmt::Display for Error { match *self { Self::InvalidObjectId(ref err) => err.fmt(fmt), Self::DeserializationError { ref message } => message.fmt(fmt), + Self::CrateError(ref err) => err.fmt(fmt), } } } @@ -69,6 +74,12 @@ impl From for Error { } } +impl From for Error { + fn from(value: crate::error::Error) -> Self { + Self::CrateError(value) + } +} + pub type Result = std::result::Result; /// This converts from the input JSON object as if it were [MongoDB Extended JSON v2](https://www.mongodb.com/docs/manual/reference/mongodb-extended-json/). @@ -88,7 +99,7 @@ impl TryFrom> for Bson { if obj.contains_key("$regularExpression") { let regex: models::Regex = serde_json::from_value(obj.into())?; - return Ok(regex.parse().into()); + return Ok(regex.parse()?.into()); } if obj.contains_key("$numberInt") { diff --git a/src/extjson/models.rs b/src/extjson/models.rs index caccd12f..0543fda6 100644 --- a/src/extjson/models.rs +++ b/src/extjson/models.rs @@ -134,8 +134,8 @@ pub(crate) struct RegexBody { } impl Regex { - pub(crate) fn parse(self) -> crate::Regex { - crate::Regex::new(self.body.pattern, self.body.options) + pub(crate) fn parse(self) -> crate::error::Result { + crate::Regex::from_strings(self.body.pattern, self.body.options) } } diff --git a/src/macros.rs b/src/macros.rs index 62d640b7..f1a18e78 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -240,12 +240,12 @@ macro_rules! rawbson { // Finished with trailing comma. (@array [$($elems:expr,)*]) => { - $crate::RawArrayBuf::from_iter(vec![$($elems,)*]).expect("invalid bson value") + $crate::RawArrayBuf::from_iter(vec![$($elems,)*]) }; // Finished without trailing comma. (@array [$($elems:expr),*]) => { - $crate::RawArrayBuf::from_iter(vec![$($elems),*]).expect("invalid bson value") + $crate::RawArrayBuf::from_iter(vec![$($elems),*]) }; // Next element is `null`. @@ -291,15 +291,26 @@ macro_rules! rawbson { // Finished. (@object $object:ident () () ()) => {}; - // Insert the current entry followed by trailing comma. - (@object $object:ident [$($key:tt)+] ($value:expr) , $($rest:tt)*) => { - $object.append(($($key)+), $value).expect("invalid bson value"); + // Insert the current entry with followed by trailing comma, with a key literal. + (@object $object:ident [$key:literal] ($value:expr) , $($rest:tt)*) => {{ + $object.append($crate::raw::cstr!($key), $value); + $crate::rawbson!(@object $object () ($($rest)*) ($($rest)*)); + }}; + + // Insert the current entry with followed by trailing comma, with a key expression. + (@object $object:ident [$($key:tt)+] ($value:expr) , $($rest:tt)*) => {{ + $object.append($($key)+, $value); $crate::rawbson!(@object $object () ($($rest)*) ($($rest)*)); + }}; + + // Insert the last entry without trailing comma, with a key literal. + (@object $object:ident [$key:literal] ($value:expr)) => { + $object.append($crate::raw::cstr!($key), $value); }; - // Insert the last entry without trailing comma. + // Insert the last entry without trailing comma, with a key expression. (@object $object:ident [$($key:tt)+] ($value:expr)) => { - $object.append(($($key)+), $value).expect("invalid bson value"); + $object.append($($key)+, $value); }; // Next value is `null`. diff --git a/src/raw.rs b/src/raw.rs index b1a2c8be..52d04a61 100644 --- a/src/raw.rs +++ b/src/raw.rs @@ -116,6 +116,7 @@ mod array; mod array_buf; mod bson; mod bson_ref; +mod cstr; mod document; mod document_buf; mod iter; @@ -142,6 +143,7 @@ pub use self::{ RawJavaScriptCodeWithScopeRef, RawRegexRef, }, + cstr::{assert_valid_cstr, cstr, validate_cstr, CStr, CString, IsValidCStr}, document::RawDocument, document_buf::{BindRawBsonRef, RawDocumentBuf}, iter::{RawElement, RawIter}, @@ -316,15 +318,3 @@ pub(crate) fn write_string(buf: &mut Vec, s: &str) { buf.extend(s.as_bytes()); buf.push(0); } - -pub(crate) fn write_cstring(buf: &mut Vec, s: &str) -> Result<()> { - if s.contains('\0') { - return Err(Error::malformed_bytes(format!( - "cstring with interior null: {:?}", - s - ))); - } - buf.extend(s.as_bytes()); - buf.push(0); - Ok(()) -} diff --git a/src/raw/array_buf.rs b/src/raw/array_buf.rs index d7e2068a..019c649c 100644 --- a/src/raw/array_buf.rs +++ b/src/raw/array_buf.rs @@ -53,19 +53,6 @@ impl RawArrayBuf { } } - #[allow(clippy::should_implement_trait)] - pub fn from_iter(iter: I) -> crate::error::Result - where - B: BindRawBsonRef, - I: IntoIterator, - { - let mut array_buf = RawArrayBuf::new(); - for item in iter { - array_buf.push(item)?; - } - Ok(array_buf) - } - /// Construct a new [`RawArrayBuf`] from the provided [`Vec`] of bytes. /// /// This involves a traversal of the array to count the values. @@ -78,14 +65,14 @@ impl RawArrayBuf { /// /// ``` /// # use bson::error::Error; - /// use bson::raw::{RawArrayBuf, RawDocumentBuf}; + /// use bson::raw::{cstr, RawArrayBuf, RawDocumentBuf}; /// /// let mut array = RawArrayBuf::new(); /// array.push("a string"); /// array.push(12_i32); /// /// let mut doc = RawDocumentBuf::new(); - /// doc.append("a key", "a value"); + /// doc.append(cstr!("a key"), "a value"); /// array.push(doc.clone()); /// /// let mut iter = array.into_iter(); @@ -102,10 +89,22 @@ impl RawArrayBuf { /// assert!(iter.next().is_none()); /// # Ok::<(), Error>(()) /// ``` - pub fn push(&mut self, value: impl BindRawBsonRef) -> crate::error::Result<()> { - self.inner.append(self.len.to_string(), value)?; + pub fn push(&mut self, value: impl BindRawBsonRef) { + self.inner.append( + super::CString::from_string_unchecked(self.len.to_string()), + value, + ); self.len += 1; - Ok(()) + } +} + +impl FromIterator for RawArrayBuf { + fn from_iter>(iter: T) -> Self { + let mut array_buf = RawArrayBuf::new(); + for item in iter { + array_buf.push(item); + } + array_buf } } @@ -200,7 +199,7 @@ impl TryFrom for RawArrayBuf { let mut tmp = RawArrayBuf::new(); for val in value { let raw: super::RawBson = val.try_into()?; - tmp.push(raw)?; + tmp.push(raw); } Ok(tmp) } diff --git a/src/raw/bson.rs b/src/raw/bson.rs index fb2c7252..e07d8459 100644 --- a/src/raw/bson.rs +++ b/src/raw/bson.rs @@ -204,8 +204,8 @@ impl RawBson { pub fn as_regex(&self) -> Option> { match self { RawBson::RegularExpression(v) => Some(RawRegexRef { - pattern: v.pattern.as_str(), - options: v.options.as_str(), + pattern: v.pattern.as_ref(), + options: v.options.as_ref(), }), _ => None, } @@ -289,8 +289,8 @@ impl RawBson { RawBson::Boolean(b) => RawBsonRef::Boolean(*b), RawBson::Null => RawBsonRef::Null, RawBson::RegularExpression(re) => RawBsonRef::RegularExpression(RawRegexRef { - options: re.options.as_str(), - pattern: re.pattern.as_str(), + options: re.options.as_ref(), + pattern: re.pattern.as_ref(), }), RawBson::JavaScriptCode(c) => RawBsonRef::JavaScriptCode(c.as_str()), RawBson::JavaScriptCodeWithScope(code_w_scope) => { diff --git a/src/raw/bson_ref.rs b/src/raw/bson_ref.rs index 8a72fed5..f6e6bfcf 100644 --- a/src/raw/bson_ref.rs +++ b/src/raw/bson_ref.rs @@ -3,7 +3,7 @@ use std::convert::{TryFrom, TryInto}; use super::{bson::RawBson, Error, RawArray, RawDocument, Result}; use crate::{ oid::{self, ObjectId}, - raw::{write_cstring, write_string, RawJavaScriptCodeWithScope}, + raw::{write_string, CStr, RawJavaScriptCodeWithScope}, spec::{BinarySubtype, ElementType}, Binary, Bson, @@ -256,7 +256,13 @@ impl<'a> RawBsonRef<'a> { RawBsonRef::Boolean(b) => RawBson::Boolean(b), RawBsonRef::Null => RawBson::Null, RawBsonRef::RegularExpression(re) => { - RawBson::RegularExpression(Regex::new(re.pattern, re.options)) + let mut chars: Vec<_> = re.options.as_str().chars().collect(); + chars.sort_unstable(); + let options: String = chars.into_iter().collect(); + RawBson::RegularExpression(Regex { + pattern: re.pattern.into(), + options: super::CString::from_string_unchecked(options), + }) } RawBsonRef::JavaScriptCode(c) => RawBson::JavaScriptCode(c.to_owned()), RawBsonRef::JavaScriptCodeWithScope(c_w_s) => { @@ -287,7 +293,7 @@ impl<'a> RawBsonRef<'a> { } #[inline] - pub(crate) fn append_to(self, dest: &mut Vec) -> Result<()> { + pub(crate) fn append_to(self, dest: &mut Vec) { match self { Self::Int32(val) => dest.extend(val.to_le_bytes()), Self::Int64(val) => dest.extend(val.to_le_bytes()), @@ -306,8 +312,8 @@ impl<'a> RawBsonRef<'a> { Self::Document(raw_document) => dest.extend(raw_document.as_bytes()), Self::Boolean(b) => dest.push(b as u8), Self::RegularExpression(re) => { - write_cstring(dest, re.pattern)?; - write_cstring(dest, re.options)?; + re.pattern.append_to(dest); + re.options.append_to(dest); } Self::JavaScriptCode(js) => write_string(dest, js), Self::JavaScriptCodeWithScope(code_w_scope) => { @@ -327,7 +333,6 @@ impl<'a> RawBsonRef<'a> { } Self::Null | Self::Undefined | Self::MinKey | Self::MaxKey => {} } - Ok(()) } } @@ -586,7 +591,7 @@ impl<'a> From<&'a Binary> for RawBsonRef<'a> { #[derive(Clone, Copy, Debug, PartialEq)] pub struct RawRegexRef<'a> { /// The regex pattern to match. - pub pattern: &'a str, + pub pattern: &'a CStr, /// The options for the regex. /// @@ -595,7 +600,7 @@ pub struct RawRegexRef<'a> { /// multiline matching, 'x' for verbose mode, 'l' to make \w, \W, etc. locale dependent, /// 's' for dotall mode ('.' matches everything), and 'u' to make \w, \W, etc. match /// unicode. - pub options: &'a str, + pub options: &'a CStr, } #[cfg(feature = "serde")] @@ -622,8 +627,8 @@ impl serde::Serialize for RawRegexRef<'_> { { #[derive(serde::Serialize)] struct BorrowedRegexBody<'a> { - pattern: &'a str, - options: &'a str, + pattern: &'a CStr, + options: &'a CStr, } let mut state = serializer.serialize_struct("$regularExpression", 1)?; diff --git a/src/raw/cstr.rs b/src/raw/cstr.rs new file mode 100644 index 00000000..8c7344ff --- /dev/null +++ b/src/raw/cstr.rs @@ -0,0 +1,201 @@ +use core::str; + +use crate::error::{Error, Result}; + +// A BSON-spec cstring: Zero or more UTF-8 encoded characters, excluding the null byte. +#[derive(Debug)] +#[repr(transparent)] +pub struct CStr { + data: [u8], +} + +impl<'a> TryFrom<&'a str> for &'a CStr { + type Error = Error; + + fn try_from(value: &str) -> Result<&CStr> { + match validate_cstr(value) { + Some(cs) => Ok(cs), + None => Err(Error::malformed_bytes(format!( + "cstring with interior null: {:?}", + value, + ))), + } + } +} + +impl CStr { + // Convenience shorthand for making the types of TryFrom line up + #[cfg(feature = "serde")] + pub(crate) fn from_str(value: &str) -> Result<&CStr> { + value.try_into() + } + + const fn from_str_unchecked(value: &str) -> &Self { + // Safety: the conversion is safe because CStr is repr(transparent), and the deref is safe + // because the pointer came from a safe reference. + unsafe { &*(value.as_bytes() as *const [u8] as *const CStr) } + } + + pub fn as_str(&self) -> &str { + // Safety: the only way to constrct a CStr is from a valid &str. + unsafe { str::from_utf8_unchecked(&self.data) } + } + + pub fn len(&self) -> usize { + self.as_str().len() + } + + pub fn is_empty(&self) -> bool { + self.as_str().is_empty() + } + + pub(crate) fn append_to(&self, buf: &mut Vec) { + buf.extend(&self.data); + buf.push(0); + } +} + +impl PartialEq<&CStr> for &CStr { + fn eq(&self, other: &&CStr) -> bool { + self.as_str() == other.as_str() + } +} + +impl std::borrow::ToOwned for CStr { + type Owned = CString; + + fn to_owned(&self) -> Self::Owned { + self.into() + } +} + +impl AsRef for CStr { + fn as_ref(&self) -> &CStr { + self + } +} + +impl AsRef for CStr { + fn as_ref(&self) -> &str { + self.as_str() + } +} + +#[cfg(feature = "serde")] +impl serde::Serialize for &CStr { + fn serialize(&self, serializer: S) -> std::result::Result + where + S: serde::Serializer, + { + self.as_str().serialize(serializer) + } +} + +#[diagnostic::on_unimplemented(message = "the string literal contains a zero byte")] +pub trait ValidCStr {} +pub struct IsValidCStr; +impl ValidCStr for IsValidCStr {} + +pub const fn validate_cstr(text: &str) -> Option<&CStr> { + let bytes = text.as_bytes(); + let mut i = 0; + while i < bytes.len() { + if bytes[i] == 0 { + return None; + } + i += 1; + } + Some(CStr::from_str_unchecked(text)) +} +pub const fn assert_valid_cstr() {} + +#[macro_export] +macro_rules! cstr { + ($text:literal) => {{ + const VALIDATED: Option<&$crate::raw::CStr> = $crate::raw::validate_cstr($text); + const VALID: bool = VALIDATED.is_some(); + $crate::raw::assert_valid_cstr::<$crate::raw::IsValidCStr>(); + VALIDATED.unwrap() + }}; +} +pub use cstr; + +#[derive(Clone, Eq, PartialEq, Hash)] +#[repr(transparent)] +pub struct CString { + data: String, +} + +impl TryFrom for CString { + type Error = Error; + + fn try_from(data: String) -> Result { + let _: &CStr = data.as_str().try_into()?; + Ok(Self { data }) + } +} + +impl TryFrom<&str> for CString { + type Error = Error; + + fn try_from(data: &str) -> Result { + let cs: &CStr = data.try_into()?; + Ok(cs.into()) + } +} + +impl CString { + pub(crate) fn from_string_unchecked(data: String) -> Self { + Self { data } + } + + pub fn into_string(self) -> String { + self.data + } + + pub fn as_str(&self) -> &str { + self.as_ref().as_str() + } +} + +impl From<&CStr> for CString { + fn from(value: &CStr) -> Self { + Self { + data: value.as_str().into(), + } + } +} + +impl AsRef for CString { + fn as_ref(&self) -> &CStr { + CStr::from_str_unchecked(self.data.as_str()) + } +} + +impl std::fmt::Debug for CString { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.data.fmt(f) + } +} + +impl std::fmt::Display for CString { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.data.fmt(f) + } +} + +impl std::borrow::Borrow for CString { + fn borrow(&self) -> &CStr { + self.as_ref() + } +} + +#[cfg(feature = "serde")] +impl serde::Serialize for CString { + fn serialize(&self, serializer: S) -> std::result::Result + where + S: serde::Serializer, + { + self.data.serialize(serializer) + } +} diff --git a/src/raw/document.rs b/src/raw/document.rs index 4746d7c5..6e6d6335 100644 --- a/src/raw/document.rs +++ b/src/raw/document.rs @@ -5,6 +5,7 @@ use std::{ use crate::{ error::{Error, Result}, + raw::CStr, Bson, DateTime, JavaScriptCodeWithScope, @@ -387,18 +388,18 @@ impl RawDocument { /// the key corresponds to a value which isn't a regex. /// /// ``` - /// use bson::{rawdoc, Regex}; + /// use bson::{rawdoc, Regex, raw::cstr}; /// /// let doc = rawdoc! { /// "regex": Regex { - /// pattern: r"end\s*$".into(), - /// options: "i".into(), + /// pattern: cstr!(r"end\s*$").into(), + /// options: cstr!("i").into(), /// }, /// "bool": true, /// }; /// - /// assert_eq!(doc.get_regex("regex")?.pattern, r"end\s*$"); - /// assert_eq!(doc.get_regex("regex")?.options, "i"); + /// assert_eq!(doc.get_regex("regex")?.pattern, cstr!(r"end\s*$")); + /// assert_eq!(doc.get_regex("regex")?.options, cstr!("i")); /// assert!(doc.get_regex("bool").is_err()); /// assert!(doc.get_regex("unknown").is_err()); /// # Ok::<(), Box>(()) @@ -505,9 +506,10 @@ impl RawDocument { } } - pub(crate) fn read_cstring_at(&self, start_at: usize) -> RawResult<&str> { + pub(crate) fn read_cstring_at(&self, start_at: usize) -> RawResult<&CStr> { let bytes = self.cstring_bytes_at(start_at)?; - try_to_str(bytes) + let s = try_to_str(bytes)?; + s.try_into() } /// Copy this into a [`Document`], returning an error if invalid BSON is encountered. diff --git a/src/raw/document_buf.rs b/src/raw/document_buf.rs index 2f5c22b9..c0e7ef92 100644 --- a/src/raw/document_buf.rs +++ b/src/raw/document_buf.rs @@ -4,7 +4,10 @@ use std::{ ops::Deref, }; -use crate::{raw::MIN_BSON_DOCUMENT_SIZE, Document}; +use crate::{ + raw::{CStr, MIN_BSON_DOCUMENT_SIZE}, + Document, +}; use super::{bson::RawBson, iter::Iter, RawBsonRef, RawDocument, RawIter, Result}; @@ -87,20 +90,6 @@ impl RawDocumentBuf { Self::decode_from_bytes(buf) } - #[allow(clippy::should_implement_trait)] - pub fn from_iter(iter: I) -> Result - where - S: AsRef, - B: BindRawBsonRef, - I: IntoIterator, - { - let mut buf = RawDocumentBuf::new(); - for (k, v) in iter { - buf.append(k, v)?; - } - Ok(buf) - } - /// Create a [`RawDocumentBuf`] from a [`Document`]. /// /// ``` @@ -117,8 +106,9 @@ impl RawDocumentBuf { pub fn from_document(doc: impl Borrow) -> Result { let mut out = RawDocumentBuf::new(); for (k, v) in doc.borrow() { + let k: &CStr = k.as_str().try_into()?; let val: RawBson = v.clone().try_into()?; - out.append(k, val)?; + out.append(k, val); } Ok(out) } @@ -193,17 +183,17 @@ impl RawDocumentBuf { /// the documentation for [BindRawBsonRef] for more details. /// ``` /// # use bson::error::Error; - /// use bson::{doc, raw::{RawBsonRef, RawDocumentBuf}}; + /// use bson::{doc, raw::{cstr, RawBsonRef, RawDocumentBuf}}; /// /// let mut doc = RawDocumentBuf::new(); /// // `&str` and `i32` both convert to `RawBsonRef` - /// doc.append("a string", "some string"); - /// doc.append("an integer", 12_i32); + /// doc.append(cstr!("a string"), "some string"); + /// doc.append(cstr!("an integer"), 12_i32); /// /// let mut subdoc = RawDocumentBuf::new(); - /// subdoc.append("a key", true); - /// doc.append("a borrowed document", &subdoc); - /// doc.append("an owned document", subdoc); + /// subdoc.append(cstr!("a key"), true); + /// doc.append(cstr!("a borrowed document"), &subdoc); + /// doc.append(cstr!("an owned document"), subdoc); /// /// let expected = doc! { /// "a string": "some string", @@ -215,14 +205,19 @@ impl RawDocumentBuf { /// assert_eq!(doc.to_document()?, expected); /// # Ok::<(), Error>(()) /// ``` - pub fn append( - &mut self, - key: impl AsRef, - value: impl BindRawBsonRef, - ) -> crate::error::Result<()> { - value.bind(|value_ref| { - raw_writer::RawWriter::new(&mut self.data).append(key.as_ref(), value_ref) - }) + pub fn append(&mut self, key: impl AsRef, value: impl BindRawBsonRef) { + let key = key.as_ref(); + value.bind(|value_ref| raw_writer::RawWriter::new(&mut self.data).append(key, value_ref)); + } +} + +impl, B: BindRawBsonRef> FromIterator<(K, B)> for RawDocumentBuf { + fn from_iter>(iter: T) -> Self { + let mut buf = RawDocumentBuf::new(); + for (k, v) in iter { + buf.append(k, v); + } + buf } } @@ -287,8 +282,9 @@ impl TryFrom for RawDocumentBuf { fn try_from(doc: Document) -> std::result::Result { let mut out = RawDocumentBuf::new(); for (k, v) in doc { + let k: &CStr = k.as_str().try_into()?; let val: RawBson = v.try_into()?; - out.append(k, val)?; + out.append(k, val); } Ok(out) } diff --git a/src/raw/document_buf/raw_writer.rs b/src/raw/document_buf/raw_writer.rs index dc550dbc..d988a823 100644 --- a/src/raw/document_buf/raw_writer.rs +++ b/src/raw/document_buf/raw_writer.rs @@ -1,4 +1,4 @@ -use crate::{raw::write_cstring, RawBsonRef}; +use crate::{raw::CStr, RawBsonRef}; pub(super) struct RawWriter<'a> { data: &'a mut Vec, @@ -9,19 +9,17 @@ impl<'a> RawWriter<'a> { Self { data } } - pub(super) fn append(&mut self, key: &str, value: RawBsonRef) -> crate::error::Result<()> { + pub(super) fn append(&mut self, key: &CStr, value: RawBsonRef) { let original_len = self.data.len(); self.data[original_len - 1] = value.element_type() as u8; - write_cstring(self.data, key)?; - value.append_to(self.data)?; + key.append_to(self.data); + value.append_to(self.data); // append trailing null byte self.data.push(0); // update length let new_len = (self.data.len() as i32).to_le_bytes(); self.data[0..4].copy_from_slice(&new_len); - - Ok(()) } } diff --git a/src/raw/iter.rs b/src/raw/iter.rs index 6355917e..7516c3c0 100644 --- a/src/raw/iter.rs +++ b/src/raw/iter.rs @@ -2,7 +2,7 @@ use std::convert::TryInto; use crate::{ oid::ObjectId, - raw::{Error, Result, MIN_BSON_DOCUMENT_SIZE, MIN_CODE_WITH_SCOPE_SIZE}, + raw::{CStr, Error, Result, MIN_BSON_DOCUMENT_SIZE, MIN_CODE_WITH_SCOPE_SIZE}, spec::{BinarySubtype, ElementType}, Bson, DateTime, @@ -50,7 +50,7 @@ impl<'a> Iterator for Iter<'a> { match self.inner.next() { Some(Ok(elem)) => match elem.value() { Err(e) => Some(Err(e)), - Ok(value) => Some(Ok((elem.key, value))), + Ok(value) => Some(Ok((elem.key.as_str(), value))), }, Some(Err(e)) => Some(Err(e)), None => None, @@ -111,7 +111,7 @@ impl<'a> RawIter<'a> { #[derive(Clone)] pub struct RawElement<'a> { - key: &'a str, + key: &'a CStr, kind: ElementType, doc: &'a RawDocument, start_at: usize, @@ -145,9 +145,11 @@ impl TryInto for RawElement<'_> { impl<'a> RawElement<'a> { #[cfg(feature = "serde")] pub(crate) fn toplevel(bytes: &'a [u8]) -> Result { + use crate::raw::cstr; + let doc = RawDocument::decode_from_bytes(bytes)?; Ok(Self { - key: "TOPLEVEL", + key: cstr!("TOPLEVEL"), kind: ElementType::EmbeddedDocument, doc, start_at: 0, @@ -160,7 +162,7 @@ impl<'a> RawElement<'a> { } pub fn key(&self) -> &'a str { - self.key + self.key.as_str() } pub fn element_type(&self) -> ElementType { @@ -305,11 +307,12 @@ impl<'a> RawElement<'a> { String::from_utf8_lossy(self.doc.cstring_bytes_at(self.start_at)?).into_owned(); let pattern_len = pattern.len(); Utf8LossyBson::RegularExpression(crate::Regex { - pattern, + pattern: pattern.try_into()?, options: String::from_utf8_lossy( self.doc.cstring_bytes_at(self.start_at + pattern_len + 1)?, ) - .into_owned(), + .into_owned() + .try_into()?, }) } _ => return Ok(None), @@ -317,7 +320,7 @@ impl<'a> RawElement<'a> { } fn malformed_error(&self, e: impl ToString) -> Error { - Error::malformed_bytes(e).with_key(self.key) + Error::malformed_bytes(e).with_key(self.key.as_str()) } pub(crate) fn slice(&self) -> &'a [u8] { @@ -344,7 +347,7 @@ impl<'a> RawElement<'a> { Ok(ObjectId::from_bytes( self.doc.as_bytes()[start_at..(start_at + 12)] .try_into() - .map_err(|e| Error::malformed_bytes(e).with_key(self.key))?, + .map_err(|e| Error::malformed_bytes(e).with_key(self.key.as_str()))?, )) } } @@ -443,7 +446,7 @@ impl<'a> Iterator for RawIter<'a> { }), Err(error) => { self.valid = false; - Err(error.with_key(key)) + Err(error.with_key(key.as_str())) } }) } diff --git a/src/raw/serde/bson_visitor.rs b/src/raw/serde/bson_visitor.rs index 8739321e..436b873e 100644 --- a/src/raw/serde/bson_visitor.rs +++ b/src/raw/serde/bson_visitor.rs @@ -84,14 +84,14 @@ impl OwnedOrBorrowedRawBsonVisitor { match (body.pattern, body.options) { (Cow::Borrowed(p), Cow::Borrowed(o)) => { RawBsonRef::RegularExpression(RawRegexRef { - pattern: p, - options: o, + pattern: p.try_into().map_err(A::Error::custom)?, + options: o.try_into().map_err(A::Error::custom)?, }) .into() } (p, o) => RawBson::RegularExpression(Regex { - pattern: p.into_owned(), - options: o.into_owned(), + pattern: p.into_owned().try_into().map_err(A::Error::custom)?, + options: o.into_owned().try_into().map_err(A::Error::custom)?, }) .into(), } diff --git a/src/raw/serde/seeded_visitor.rs b/src/raw/serde/seeded_visitor.rs index 8361421c..f6b0ae95 100644 --- a/src/raw/serde/seeded_visitor.rs +++ b/src/raw/serde/seeded_visitor.rs @@ -6,7 +6,7 @@ use serde::{ }; use crate::{ - raw::{write_cstring, write_string, RAW_BSON_NEWTYPE}, + raw::{write_string, RAW_BSON_NEWTYPE}, spec::{BinarySubtype, ElementType}, RawBson, RawBsonRef, @@ -119,7 +119,10 @@ impl<'a, 'de> SeededVisitor<'a, 'de> { /// Appends a cstring to the buffer. Returns an error if the given string contains a null byte. fn append_cstring(&mut self, key: &str) -> Result<(), String> { - write_cstring(self.buffer.get_owned_buffer(), key).map_err(|e| e.to_string()) + crate::raw::CStr::from_str(key) + .map_err(|e| e.to_string())? + .append_to(self.buffer.get_owned_buffer()); + Ok(()) } /// Appends a string and its length to the buffer. @@ -273,8 +276,7 @@ impl<'de> Visitor<'de> for SeededVisitor<'_, 'de> { // Cases that don't _ => { let bson = bson.as_ref(); - bson.append_to(self.buffer.get_owned_buffer()) - .map_err(A::Error::custom)?; + bson.append_to(self.buffer.get_owned_buffer()); Ok(bson.element_type()) } } diff --git a/src/raw/test.rs b/src/raw/test.rs index b651004d..ef28f456 100644 --- a/src/raw/test.rs +++ b/src/raw/test.rs @@ -104,7 +104,7 @@ fn rawdoc_to_doc() { "boolean": true, "datetime": DateTime::now(), "null": RawBson::Null, - "regex": Regex { pattern: String::from(r"end\s*$"), options: String::from("i")}, + "regex": Regex { pattern: cstr!(r"end\s*$").into(), options: cstr!("i").into()}, "javascript": RawBson::JavaScriptCode(String::from("console.log(console);")), "symbol": RawBson::Symbol(String::from("artist-formerly-known-as")), "javascript_with_scope": RawJavaScriptCodeWithScope { @@ -267,7 +267,7 @@ fn null() { #[test] fn regex() { let rawdoc = rawdoc! { - "regex": Regex { pattern: String::from(r"end\s*$"), options: String::from("i")}, + "regex": Regex { pattern: cstr!(r"end\s*$").into(), options: cstr!("i").into()}, }; let regex = rawdoc .get("regex") @@ -275,8 +275,8 @@ fn regex() { .expect("no key regex") .as_regex() .expect("was not regex"); - assert_eq!(regex.pattern, r"end\s*$"); - assert_eq!(regex.options, "i"); + assert_eq!(regex.pattern, cstr!(r"end\s*$")); + assert_eq!(regex.options, cstr!("i")); } #[test] fn javascript() { @@ -388,7 +388,7 @@ fn document_iteration() { "boolean": true, "datetime": DateTime::now(), "null": RawBson::Null, - "regex": Regex { pattern: String::from(r"end\s*$"), options: String::from("i") }, + "regex": Regex { pattern: cstr!(r"end\s*$").into(), options: cstr!("i").into() }, "javascript": RawBson::JavaScriptCode(String::from("console.log(console);")), "symbol": RawBson::Symbol(String::from("artist-formerly-known-as")), "javascript_with_scope": RawJavaScriptCodeWithScope { diff --git a/src/raw/test/append.rs b/src/raw/test/append.rs index fdaeef7f..e530ab7f 100644 --- a/src/raw/test/append.rs +++ b/src/raw/test/append.rs @@ -1,6 +1,6 @@ use crate::{ oid::ObjectId, - raw::RawJavaScriptCodeWithScope, + raw::{cstr, RawJavaScriptCodeWithScope}, spec::BinarySubtype, tests::LOCK, Binary, @@ -19,13 +19,10 @@ use crate::{ use pretty_assertions::assert_eq; -fn append_test( - expected: Document, - append: impl FnOnce(&mut RawDocumentBuf) -> crate::error::Result<()>, -) { +fn append_test(expected: Document, append: impl FnOnce(&mut RawDocumentBuf)) { let bytes = expected.encode_to_vec().unwrap(); let mut buf = RawDocumentBuf::new(); - assert!(append(&mut buf).is_ok()); + append(&mut buf); assert_eq!(buf.as_bytes(), bytes); } @@ -37,10 +34,9 @@ fn i32() { "c": 0_i32 }; append_test(expected, |doc| { - doc.append("a", -1_i32)?; - doc.append("b", 123_i32)?; - doc.append("c", 0_i32)?; - Ok(()) + doc.append(cstr!("a"), -1_i32); + doc.append(cstr!("b"), 123_i32); + doc.append(cstr!("c"), 0_i32); }); } @@ -52,10 +48,9 @@ fn i64() { "c": 0_i64 }; append_test(expected, |doc| { - doc.append("a", -1_i64)?; - doc.append("b", 123_i64)?; - doc.append("c", 0_i64)?; - Ok(()) + doc.append(cstr!("a"), -1_i64); + doc.append(cstr!("b"), 123_i64); + doc.append(cstr!("c"), 0_i64); }); } @@ -68,11 +63,10 @@ fn str() { "last": "the lazy sheep dog", }; append_test(expected, |doc| { - doc.append("first", "the quick")?; - doc.append("second", "brown fox")?; - doc.append("third", "jumped over")?; - doc.append("last", "the lazy sheep dog")?; - Ok(()) + doc.append(cstr!("first"), "the quick"); + doc.append(cstr!("second"), "brown fox"); + doc.append(cstr!("third"), "jumped over"); + doc.append(cstr!("last"), "the lazy sheep dog"); }); } @@ -86,12 +80,11 @@ fn double() { "inf": f64::INFINITY, }; append_test(expected, |doc| { - doc.append("positive", 12.5)?; - doc.append("0", 0.0)?; - doc.append("negative", -123.24)?; - doc.append("nan", f64::NAN)?; - doc.append("inf", f64::INFINITY)?; - Ok(()) + doc.append(cstr!("positive"), 12.5); + doc.append(cstr!("0"), 0.0); + doc.append(cstr!("negative"), -123.24); + doc.append(cstr!("nan"), f64::NAN); + doc.append(cstr!("inf"), f64::INFINITY); }); } @@ -102,9 +95,8 @@ fn boolean() { "false": false, }; append_test(expected, |doc| { - doc.append("true", true)?; - doc.append("false", false)?; - Ok(()) + doc.append(cstr!("true"), true); + doc.append(cstr!("false"), false); }); } @@ -113,7 +105,7 @@ fn null() { let expected = doc! { "null": null, }; - append_test(expected, |doc| doc.append("null", RawBson::Null)); + append_test(expected, |doc| doc.append(cstr!("null"), RawBson::Null)); } #[test] @@ -126,12 +118,11 @@ fn document() { } }; append_test(expected, |doc| { - doc.append("empty", RawDocumentBuf::new())?; + doc.append(cstr!("empty"), RawDocumentBuf::new()); let mut buf = RawDocumentBuf::new(); - buf.append("a", 1_i32)?; - buf.append("b", true)?; - doc.append("subdoc", buf)?; - Ok(()) + buf.append(cstr!("a"), 1_i32); + buf.append(cstr!("b"), true); + doc.append(cstr!("subdoc"), buf); }); } @@ -147,16 +138,15 @@ fn array() { ] }; append_test(expected, |doc| { - doc.append("empty", RawArrayBuf::new())?; + doc.append(cstr!("empty"), RawArrayBuf::new()); let mut buf = RawArrayBuf::new(); - buf.push(true)?; - buf.push("string")?; + buf.push(true); + buf.push("string"); let mut subdoc = RawDocumentBuf::new(); - subdoc.append("a", "subdoc")?; - buf.push(subdoc)?; - buf.push(123_i32)?; - doc.append("array", buf)?; - Ok(()) + subdoc.append(cstr!("a"), "subdoc"); + buf.push(subdoc); + buf.push(123_i32); + doc.append(cstr!("array"), buf); }); } @@ -168,7 +158,7 @@ fn oid() { let expected = doc! { "oid": oid, }; - append_test(expected, |doc| doc.append("oid", oid)); + append_test(expected, |doc| doc.append(cstr!("oid"), oid)); } #[test] @@ -182,9 +172,8 @@ fn datetime() { }; append_test(expected, |doc| { - doc.append("now", dt)?; - doc.append("old", old)?; - Ok(()) + doc.append(cstr!("now"), dt); + doc.append(cstr!("old"), old); }); } @@ -199,7 +188,7 @@ fn timestamp() { "ts": ts, }; - append_test(expected, |doc| doc.append("ts", ts)); + append_test(expected, |doc| doc.append(cstr!("ts"), ts)); } #[test] @@ -222,9 +211,8 @@ fn binary() { }; append_test(expected, |doc| { - doc.append("generic", bin)?; - doc.append("binary_old", old)?; - Ok(()) + doc.append(cstr!("generic"), bin); + doc.append(cstr!("binary_old"), old); }); } @@ -236,9 +224,8 @@ fn min_max_key() { }; append_test(expected, |doc| { - doc.append("min", RawBson::MinKey)?; - doc.append("max", RawBson::MaxKey)?; - Ok(()) + doc.append(cstr!("min"), RawBson::MinKey); + doc.append(cstr!("max"), RawBson::MaxKey); }); } @@ -248,17 +235,22 @@ fn undefined() { "undefined": Bson::Undefined, }; - append_test(expected, |doc| doc.append("undefined", RawBson::Undefined)); + append_test(expected, |doc| { + doc.append(cstr!("undefined"), RawBson::Undefined) + }); } #[test] fn regex() { let expected = doc! { - "regex": Regex::new("some pattern", "abc"), + "regex": Regex::from_strings("some pattern", "abc").unwrap(), }; append_test(expected, |doc| { - doc.append("regex", Regex::new("some pattern", "abc")) + doc.append( + cstr!("regex"), + Regex::from_strings("some pattern", "abc").unwrap(), + ) }); } @@ -275,19 +267,21 @@ fn code() { }; append_test(expected, |doc| { - doc.append("code", RawBson::JavaScriptCode("some code".to_string()))?; + doc.append( + cstr!("code"), + RawBson::JavaScriptCode("some code".to_string()), + ); let mut scope = RawDocumentBuf::new(); - scope.append("a", 1_i32)?; - scope.append("b", true)?; + scope.append(cstr!("a"), 1_i32); + scope.append(cstr!("b"), true); doc.append( - "code_w_scope", + cstr!("code_w_scope"), RawJavaScriptCodeWithScope { code: "some code".to_string(), scope, }, - )?; - Ok(()) + ); }); } @@ -298,7 +292,7 @@ fn symbol() { }; append_test(expected, |doc| { - doc.append("symbol", RawBson::Symbol("symbol".to_string())) + doc.append(cstr!("symbol"), RawBson::Symbol("symbol".to_string())) }); } @@ -317,7 +311,7 @@ fn dbpointer() { append_test(expected, |doc| { doc.append( - "symbol", + cstr!("symbol"), RawBson::DbPointer(DbPointer { namespace: "ns".to_string(), id, @@ -333,7 +327,7 @@ fn decimal128() { "decimal": decimal }; - append_test(expected, |doc| doc.append("decimal", decimal)); + append_test(expected, |doc| doc.append(cstr!("decimal"), decimal)); } #[test] @@ -352,34 +346,33 @@ fn general() { }; append_test(expected, |doc| { - doc.append("a", true)?; - doc.append("second key", 123.4)?; - doc.append("third", 15_i64)?; - doc.append("32", -100101_i32)?; + doc.append(cstr!("a"), true); + doc.append(cstr!("second key"), 123.4); + doc.append(cstr!("third"), 15_i64); + doc.append(cstr!("32"), -100101_i32); let mut subdoc = RawDocumentBuf::new(); - subdoc.append("a", "subkey")?; + subdoc.append(cstr!("a"), "subkey"); let mut subsubdoc = RawDocumentBuf::new(); - subsubdoc.append("subdoc", dt)?; - subdoc.append("another", subsubdoc)?; - doc.append("subdoc", subdoc)?; + subsubdoc.append(cstr!("subdoc"), dt); + subdoc.append(cstr!("another"), subsubdoc); + doc.append(cstr!("subdoc"), subdoc); let mut array = RawArrayBuf::new(); - array.push(1_i64)?; - array.push(true)?; + array.push(1_i64); + array.push(true); let mut array_subdoc = RawDocumentBuf::new(); - array_subdoc.append("doc", 23_i64)?; - array.push(array_subdoc)?; + array_subdoc.append(cstr!("doc"), 23_i64); + array.push(array_subdoc); let mut sub_array = RawArrayBuf::new(); - sub_array.push("another")?; - sub_array.push("array")?; - array.push(sub_array)?; + sub_array.push("another"); + sub_array.push("array"); + array.push(sub_array); - doc.append("array", array)?; - Ok(()) + doc.append(cstr!("array"), array); }); } @@ -387,25 +380,18 @@ fn general() { fn from_iter() { let doc_buf = RawDocumentBuf::from_iter([ ( - "array", - RawBson::Array( - RawArrayBuf::from_iter([ - RawBson::Boolean(true), - RawBson::Document( - RawDocumentBuf::from_iter([ - ("ok", RawBson::Boolean(false)), - ("other", RawBson::String("hello".to_string())), - ]) - .unwrap(), - ), - ]) - .unwrap(), - ), + cstr!("array"), + RawBson::Array(RawArrayBuf::from_iter([ + RawBson::Boolean(true), + RawBson::Document(RawDocumentBuf::from_iter([ + (cstr!("ok"), RawBson::Boolean(false)), + (cstr!("other"), RawBson::String("hello".to_string())), + ])), + ])), ), - ("bool", RawBson::Boolean(true)), - ("string", RawBson::String("some string".to_string())), - ]) - .unwrap(); + (cstr!("bool"), RawBson::Boolean(true)), + (cstr!("string"), RawBson::String("some string".to_string())), + ]); let doc = doc! { "array": [ @@ -420,22 +406,22 @@ fn from_iter() { }; let expected = doc! { "expected": doc }; - append_test(expected, |doc| doc.append("expected", doc_buf)); + append_test(expected, |doc| doc.append(cstr!("expected"), doc_buf)); } #[test] fn array_buf() { let mut arr_buf = RawArrayBuf::new(); - arr_buf.push(true).unwrap(); + arr_buf.push(true); let mut doc_buf = RawDocumentBuf::new(); - doc_buf.append("x", 3_i32).unwrap(); - doc_buf.append("string", "string").unwrap(); - arr_buf.push(doc_buf).unwrap(); + doc_buf.append(cstr!("x"), 3_i32); + doc_buf.append(cstr!("string"), "string"); + arr_buf.push(doc_buf); let mut sub_arr = RawArrayBuf::new(); - sub_arr.push("a string").unwrap(); - arr_buf.push(sub_arr).unwrap(); + sub_arr.push("a string"); + arr_buf.push(sub_arr); let arr = rawbson!([ true, diff --git a/src/raw/test/props.rs b/src/raw/test/props.rs index 6f0157d2..4b5918ff 100644 --- a/src/raw/test/props.rs +++ b/src/raw/test/props.rs @@ -22,7 +22,7 @@ pub(crate) fn arbitrary_bson() -> impl Strategy { any::().prop_map(Bson::Int32), any::().prop_map(Bson::Int64), any::<(String, String)>().prop_map(|(pattern, options)| { - Bson::RegularExpression(Regex::new(pattern, options)) + Bson::RegularExpression(Regex::from_strings(pattern, options).unwrap()) }), any::<[u8; 12]>().prop_map(|bytes| Bson::ObjectId(crate::oid::ObjectId::from_bytes(bytes))), (arbitrary_binary_subtype(), any::>()).prop_map(|(subtype, bytes)| { diff --git a/src/ser/raw.rs b/src/ser/raw.rs index 6980423b..189b4d80 100644 --- a/src/ser/raw.rs +++ b/src/ser/raw.rs @@ -9,7 +9,7 @@ use serde::{ use self::value_serializer::{ValueSerializer, ValueType}; use crate::{ - raw::{write_cstring, RAW_ARRAY_NEWTYPE, RAW_DOCUMENT_NEWTYPE}, + raw::{CStr, RAW_ARRAY_NEWTYPE, RAW_DOCUMENT_NEWTYPE}, ser::{Error, Result}, serde_helpers::HUMAN_READABLE_NEWTYPE, spec::{BinarySubtype, ElementType}, @@ -108,7 +108,7 @@ impl Serializer { fn serialize_raw(&mut self, v: RawBsonRef) -> Result<()> { self.update_element_type(v.element_type())?; - v.append_to(&mut self.bytes)?; + v.append_to(&mut self.bytes); Ok(()) } } @@ -290,7 +290,7 @@ impl<'a> serde::Serializer for &'a mut Serializer { T: serde::Serialize + ?Sized, { self.update_element_type(ElementType::EmbeddedDocument)?; - let mut d = DocumentSerializer::start(&mut *self)?; + let mut d = DocumentSerializer::start(&mut *self); d.serialize_entry(variant, value)?; d.end_doc()?; Ok(()) @@ -299,7 +299,7 @@ impl<'a> serde::Serializer for &'a mut Serializer { #[inline] fn serialize_seq(self, _len: Option) -> Result { self.update_element_type(ElementType::Array)?; - DocumentSerializer::start(&mut *self) + Ok(DocumentSerializer::start(&mut *self)) } #[inline] @@ -325,13 +325,17 @@ impl<'a> serde::Serializer for &'a mut Serializer { _len: usize, ) -> Result { self.update_element_type(ElementType::EmbeddedDocument)?; - VariantSerializer::start(&mut *self, variant, VariantInnerType::Tuple) + Ok(VariantSerializer::start( + &mut *self, + variant.try_into()?, + VariantInnerType::Tuple, + )) } #[inline] fn serialize_map(self, _len: Option) -> Result { self.update_element_type(ElementType::EmbeddedDocument)?; - DocumentSerializer::start(&mut *self) + Ok(DocumentSerializer::start(&mut *self)) } #[inline] @@ -360,7 +364,7 @@ impl<'a> serde::Serializer for &'a mut Serializer { )?; match value_type { Some(vt) => Ok(StructSerializer::Value(ValueSerializer::new(self, vt))), - None => Ok(StructSerializer::Document(DocumentSerializer::start(self)?)), + None => Ok(StructSerializer::Document(DocumentSerializer::start(self))), } } @@ -373,7 +377,11 @@ impl<'a> serde::Serializer for &'a mut Serializer { _len: usize, ) -> Result { self.update_element_type(ElementType::EmbeddedDocument)?; - VariantSerializer::start(&mut *self, variant, VariantInnerType::Struct) + Ok(VariantSerializer::start( + &mut *self, + variant.try_into()?, + VariantInnerType::Struct, + )) } } @@ -431,32 +439,28 @@ pub(crate) struct VariantSerializer<'a> { } impl<'a> VariantSerializer<'a> { - fn start( - rs: &'a mut Serializer, - variant: &'static str, - inner_type: VariantInnerType, - ) -> Result { + fn start(rs: &'a mut Serializer, variant: &'static CStr, inner_type: VariantInnerType) -> Self { let doc_start = rs.bytes.len(); // write placeholder length for document, will be updated at end static ZERO: RawBsonRef = RawBsonRef::Int32(0); - ZERO.append_to(&mut rs.bytes)?; + ZERO.append_to(&mut rs.bytes); let inner = match inner_type { VariantInnerType::Struct => ElementType::EmbeddedDocument, VariantInnerType::Tuple => ElementType::Array, }; rs.bytes.push(inner as u8); - write_cstring(&mut rs.bytes, variant)?; + variant.append_to(&mut rs.bytes); let inner_start = rs.bytes.len(); // write placeholder length for inner, will be updated at end - ZERO.append_to(&mut rs.bytes)?; + ZERO.append_to(&mut rs.bytes); - Ok(Self { + Self { root_serializer: rs, num_elements_serialized: 0, doc_start, inner_start, - }) + } } #[inline] @@ -465,7 +469,7 @@ impl<'a> VariantSerializer<'a> { T: Serialize + ?Sized, { self.root_serializer.reserve_element_type(); - write_cstring(&mut self.root_serializer.bytes, k)?; + CStr::from_str(k)?.append_to(&mut self.root_serializer.bytes); v.serialize(&mut *self.root_serializer)?; self.num_elements_serialized += 1; diff --git a/src/ser/raw/document_serializer.rs b/src/ser/raw/document_serializer.rs index 55da28d2..ec6f81a5 100644 --- a/src/ser/raw/document_serializer.rs +++ b/src/ser/raw/document_serializer.rs @@ -1,7 +1,6 @@ use serde::{ser::Impossible, Serialize}; use crate::{ - raw::write_cstring, ser::{Error, Result}, serialize_to_bson, Bson, @@ -22,14 +21,14 @@ pub(crate) struct DocumentSerializer<'a> { } impl<'a> DocumentSerializer<'a> { - pub(crate) fn start(rs: &'a mut Serializer) -> crate::ser::Result { + pub(crate) fn start(rs: &'a mut Serializer) -> Self { let start = rs.bytes.len(); - RawBsonRef::Int32(0).append_to(&mut rs.bytes)?; - Ok(Self { + RawBsonRef::Int32(0).append_to(&mut rs.bytes); + Self { root_serializer: rs, num_keys_serialized: 0, start, - }) + } } /// Serialize a document key using the provided closure. @@ -266,7 +265,8 @@ impl serde::Serializer for KeySerializer<'_> { #[inline] fn serialize_str(self, v: &str) -> Result { - Ok(write_cstring(&mut self.root_serializer.bytes, v)?) + crate::raw::CStr::from_str(v)?.append_to(&mut self.root_serializer.bytes); + Ok(()) } #[inline] diff --git a/src/ser/raw/value_serializer.rs b/src/ser/raw/value_serializer.rs index c57aa44f..3b661b00 100644 --- a/src/ser/raw/value_serializer.rs +++ b/src/ser/raw/value_serializer.rs @@ -51,7 +51,7 @@ enum SerializationStep { RegEx, RegExPattern, RegExOptions { - pattern: String, + pattern: crate::raw::CString, }, Timestamp, @@ -199,7 +199,7 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { let increment = u32::try_from(v).map_err(Error::custom)?; RawBsonRef::Timestamp(crate::Timestamp { time, increment }) - .append_to(&mut self.root_serializer.bytes)?; + .append_to(&mut self.root_serializer.bytes); Ok(()) } @@ -215,7 +215,7 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { subtype: v.into(), bytes, }; - RawBsonRef::Binary(binary).append_to(&mut self.root_serializer.bytes)?; + RawBsonRef::Binary(binary).append_to(&mut self.root_serializer.bytes); self.state = SerializationStep::Done; Ok(()) } @@ -258,11 +258,11 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { SerializationStep::DateTimeNumberLong => { let millis = v.parse().map_err(Error::custom)?; RawBsonRef::DateTime(crate::DateTime::from_millis(millis)) - .append_to(&mut self.root_serializer.bytes)?; + .append_to(&mut self.root_serializer.bytes); } SerializationStep::Oid => { let oid = ObjectId::parse_str(v).map_err(Error::custom)?; - RawBsonRef::ObjectId(oid).append_to(&mut self.root_serializer.bytes)?; + RawBsonRef::ObjectId(oid).append_to(&mut self.root_serializer.bytes); } SerializationStep::BinaryBytes => { self.state = SerializationStep::BinarySubType { @@ -274,10 +274,10 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { let subtype: BinarySubtype = subtype_byte[0].into(); let bytes = &base64::decode(base64.as_str()).map_err(Error::custom)?; let binary = RawBinaryRef { subtype, bytes }; - RawBsonRef::Binary(binary).append_to(&mut self.root_serializer.bytes)?; + RawBsonRef::Binary(binary).append_to(&mut self.root_serializer.bytes); } SerializationStep::Symbol => { - RawBsonRef::Symbol(v).append_to(&mut self.root_serializer.bytes)?; + RawBsonRef::Symbol(v).append_to(&mut self.root_serializer.bytes); } SerializationStep::DbPointerRef => { self.state = SerializationStep::DbPointerId { ns: v.to_owned() }; @@ -285,23 +285,26 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { SerializationStep::DbPointerId { ns } => { let id = ObjectId::parse_str(v).map_err(Error::custom)?; RawBsonRef::DbPointer(crate::RawDbPointerRef { namespace: ns, id }) - .append_to(&mut self.root_serializer.bytes)?; + .append_to(&mut self.root_serializer.bytes); } SerializationStep::RegExPattern => { self.state = SerializationStep::RegExOptions { - pattern: v.to_string(), + pattern: v.to_string().try_into()?, }; } SerializationStep::RegExOptions { pattern } => { let mut chars: Vec<_> = v.chars().collect(); chars.sort_unstable(); - let options = &chars.into_iter().collect::(); - RawBsonRef::RegularExpression(crate::RawRegexRef { pattern, options }) - .append_to(&mut self.root_serializer.bytes)?; + let options = chars.into_iter().collect::(); + RawBsonRef::RegularExpression(crate::RawRegexRef { + pattern: pattern.as_ref(), + options: options.as_str().try_into()?, + }) + .append_to(&mut self.root_serializer.bytes); } SerializationStep::Code => { - RawBsonRef::JavaScriptCode(v).append_to(&mut self.root_serializer.bytes)?; + RawBsonRef::JavaScriptCode(v).append_to(&mut self.root_serializer.bytes); } SerializationStep::CodeWithScopeCode => { self.state = SerializationStep::CodeWithScopeScope { @@ -324,7 +327,7 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { match self.state { SerializationStep::Decimal128Value => { let dec = crate::Decimal128::from_bytes(v.try_into().map_err(Error::custom)?); - RawBsonRef::Decimal128(dec).append_to(&mut self.root_serializer.bytes)?; + RawBsonRef::Decimal128(dec).append_to(&mut self.root_serializer.bytes); Ok(()) } SerializationStep::BinaryBytes => { @@ -336,8 +339,7 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { code, scope: RawDocument::decode_from_bytes(v).map_err(Error::custom)?, }; - RawBsonRef::JavaScriptCodeWithScope(raw) - .append_to(&mut self.root_serializer.bytes)?; + RawBsonRef::JavaScriptCodeWithScope(raw).append_to(&mut self.root_serializer.bytes); self.state = SerializationStep::Done; Ok(()) } @@ -445,9 +447,9 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { #[inline] fn serialize_map(self, _len: Option) -> Result { match self.state { - SerializationStep::CodeWithScopeScope { ref code, raw } if !raw => { - CodeWithScopeSerializer::start(code.as_str(), self.root_serializer) - } + SerializationStep::CodeWithScopeScope { ref code, raw } if !raw => Ok( + CodeWithScopeSerializer::start(code.as_str(), self.root_serializer), + ), _ => Err(self.invalid_step("map")), } } @@ -610,13 +612,13 @@ pub(crate) struct CodeWithScopeSerializer<'a> { impl<'a> CodeWithScopeSerializer<'a> { #[inline] - fn start(code: &str, rs: &'a mut Serializer) -> Result { + fn start(code: &str, rs: &'a mut Serializer) -> Self { let start = rs.bytes.len(); - RawBsonRef::Int32(0).append_to(&mut rs.bytes)?; // placeholder length + RawBsonRef::Int32(0).append_to(&mut rs.bytes); // placeholder length write_string(&mut rs.bytes, code); - let doc = DocumentSerializer::start(rs)?; - Ok(Self { start, doc }) + let doc = DocumentSerializer::start(rs); + Self { start, doc } } } diff --git a/src/ser/serde.rs b/src/ser/serde.rs index 99abc7c9..071a2900 100644 --- a/src/ser/serde.rs +++ b/src/ser/serde.rs @@ -624,8 +624,8 @@ impl Serialize for Regex { S: ser::Serializer, { let raw = RawRegexRef { - pattern: self.pattern.as_str(), - options: self.options.as_str(), + pattern: self.pattern.as_ref(), + options: self.options.as_ref(), }; raw.serialize(serializer) } diff --git a/src/tests/modules/bson.rs b/src/tests/modules/bson.rs index 2f80e7c5..7dd366a6 100644 --- a/src/tests/modules/bson.rs +++ b/src/tests/modules/bson.rs @@ -5,6 +5,7 @@ use std::{ use crate::{ base64, + cstr, doc, oid::ObjectId, spec::BinarySubtype, @@ -76,8 +77,8 @@ fn test_display_timestamp_type() { #[test] fn test_display_regex_type() { let x = Regex { - pattern: String::from("pattern"), - options: String::from("options"), + pattern: cstr!("pattern").into(), + options: cstr!("options").into(), }; let output = "/pattern/options"; assert_eq!(format!("{}", x), output); @@ -130,12 +131,12 @@ fn from_impls() { assert_eq!(Bson::from(false), Bson::Boolean(false)); assert_eq!( Bson::from(Regex { - pattern: String::from("\\s+$"), - options: String::from("i") + pattern: cstr!("\\s+$").into(), + options: cstr!("i").into(), }), Bson::RegularExpression(Regex { - pattern: String::from("\\s+$"), - options: String::from("i") + pattern: cstr!("\\s+$").into(), + options: cstr!("i").into(), }) ); assert_eq!( diff --git a/src/tests/modules/macros.rs b/src/tests/modules/macros.rs index 642dfe58..0cc9900c 100644 --- a/src/tests/modules/macros.rs +++ b/src/tests/modules/macros.rs @@ -1,5 +1,6 @@ use crate::{ base64, + cstr, doc, oid::ObjectId, spec::BinarySubtype, @@ -34,7 +35,7 @@ fn standard_format() { }, "bool": true, "null": null, - "regexp": Bson::RegularExpression(Regex { pattern: "s[ao]d".to_owned(), options: "i".to_owned() }), + "regexp": Bson::RegularExpression(Regex { pattern: cstr!("s[ao]d").into(), options: cstr!("i").into() }), "with_wrapped_parens": (-20), "code": Bson::JavaScriptCode("function(x) { return x._id; }".to_owned()), "i32": 12, @@ -77,7 +78,7 @@ fn standard_format() { }, "bool": true, "null": null, - "regexp": Regex { pattern: "s[ao]d".to_owned(), options: "i".to_owned() }, + "regexp": Regex { pattern: cstr!("s[ao]d").into(), options: cstr!("i").into() }, "with_wrapped_parens": (-20), "code": RawBson::JavaScriptCode("function(x) { return x._id; }".to_owned()), "i32": 12, diff --git a/src/tests/modules/ser.rs b/src/tests/modules/ser.rs index 8e16f3b6..5391e3ba 100644 --- a/src/tests/modules/ser.rs +++ b/src/tests/modules/ser.rs @@ -11,7 +11,6 @@ use crate::{ tests::LOCK, Bson, Document, - Regex, }; #[test] @@ -159,12 +158,6 @@ fn cstring_null_bytes_error() { let doc = doc! { "a": { "\0": "b" } }; verify_doc(doc); - let regex = doc! { "regex": Regex { pattern: "\0".into(), options: "a".into() } }; - verify_doc(regex); - - let regex = doc! { "regex": Regex { pattern: "a".into(), options: "\0".into() } }; - verify_doc(regex); - fn verify_doc(doc: Document) { let result = doc.encode_to_vec(); assert!(result.is_err(), "unexpected success"); diff --git a/src/tests/modules/serializer_deserializer.rs b/src/tests/modules/serializer_deserializer.rs index 2afc6a73..03b410e8 100644 --- a/src/tests/modules/serializer_deserializer.rs +++ b/src/tests/modules/serializer_deserializer.rs @@ -6,6 +6,7 @@ use std::{ use serde::{Deserialize, Serialize}; use crate::{ + cstr, de::deserialize_from_document, doc, oid::ObjectId, @@ -159,8 +160,8 @@ fn test_serialize_deserialize_null() { fn test_serialize_deserialize_regexp() { let _guard = LOCK.run_concurrently(); let src = Bson::RegularExpression(Regex { - pattern: "1".to_owned(), - options: "2".to_owned(), + pattern: cstr!("1").to_owned(), + options: cstr!("2").to_owned(), }); let dst = vec![14, 0, 0, 0, 11, 107, 101, 121, 0, 49, 0, 50, 0, 0]; diff --git a/src/tests/serde.rs b/src/tests/serde.rs index aedc739d..d5e38e91 100644 --- a/src/tests/serde.rs +++ b/src/tests/serde.rs @@ -2,12 +2,13 @@ use crate::{ bson, + cstr, deserialize_from_bson, deserialize_from_document, doc, oid::ObjectId, - serde_helpers, serde_helpers::{ + self, bson_datetime_as_rfc3339_string, hex_string_as_object_id, i64_as_bson_datetime, @@ -151,8 +152,8 @@ fn test_ser_regex() { } let regex = Regex { - pattern: "12".into(), - options: "01".into(), + pattern: cstr!("12").into(), + options: cstr!("01").into(), }; let foo = Foo { @@ -180,8 +181,8 @@ fn test_de_regex() { } let regex = Regex { - pattern: "12".into(), - options: "01".into(), + pattern: cstr!("12").into(), + options: cstr!("01").into(), }; let foo: Foo = deserialize_from_bson(Bson::Document(doc! { diff --git a/src/tests/spec/corpus.rs b/src/tests/spec/corpus.rs index 0255cf41..dccf3e0c 100644 --- a/src/tests/spec/corpus.rs +++ b/src/tests/spec/corpus.rs @@ -201,10 +201,9 @@ fn run_test(test: TestFile) { let owned_raw_bson_field = deserializer_raw .deserialize_any(FieldVisitor(test_key.as_str(), PhantomData::)) .expect(&description); + let test_key_cstr: &crate::raw::CStr = test_key.as_str().try_into().unwrap(); let from_slice_owned_vec = - RawDocumentBuf::from_iter([(test_key, owned_raw_bson_field)]) - .expect(&description) - .into_bytes(); + RawDocumentBuf::from_iter([(test_key_cstr, owned_raw_bson_field)]).into_bytes(); // deserialize the field from raw Bytes into a Bson let deserializer_value =