Skip to content

RUST-1992 Introduce the &CStr and CString types for keys and regular expressions #563

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
4 changes: 2 additions & 2 deletions fuzz/generate_corpus.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use bson::{doc, Bson, Decimal128};
use bson::{cstr, doc, Bson, Decimal128};
use std::{
fs,
io::{Error, ErrorKind},
Expand Down Expand Up @@ -64,7 +64,7 @@ fn generate_type_marker_cases(dir: &Path) -> std::io::Result<()> {
"bool": true,
"date": bson::DateTime::now(),
"null": Bson::Null,
"regex": Bson::RegularExpression(bson::Regex { pattern: "pattern".into(), options: "i".into() }),
"regex": Bson::RegularExpression(bson::Regex { pattern: cstr!("pattern").into(), options: cstr!("i").into() }),
"int32": 123i32,
"timestamp": bson::Timestamp { time: 12345, increment: 1 },
"int64": 123i64,
Expand Down
18 changes: 9 additions & 9 deletions serde-tests/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use serde_json::json;

use super::AllTypes;

use bson::{doc, Bson, JavaScriptCodeWithScope, RawArrayBuf, RawBson, RawDocumentBuf};
use bson::{cstr, doc, Bson, JavaScriptCodeWithScope, RawArrayBuf, RawBson, RawDocumentBuf};

use serde::{Deserialize, Serialize};

Expand Down Expand Up @@ -99,18 +99,18 @@ fn owned_raw_bson() {
});

let mut doc_buf = RawDocumentBuf::new();
doc_buf.append("a", "key").unwrap();
doc_buf.append("number", 12).unwrap();
doc_buf.append("bool", false).unwrap();
doc_buf.append("nu", RawBson::Null).unwrap();
doc_buf.append(cstr!("a"), "key");
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This really is the whole story in this one line - the failure point has been shifted from writing bytes to the document buffer (append and everything that used it) to constructing the string, and that in turn can now be done either at run-time or at compile-time if it's just a literal.

doc_buf.append(cstr!("number"), 12);
doc_buf.append(cstr!("bool"), false);
doc_buf.append(cstr!("nu"), RawBson::Null);

let mut array_buf = RawArrayBuf::new();
array_buf.push(1).unwrap();
array_buf.push("string").unwrap();
array_buf.push(1);
array_buf.push("string");

let mut bson_doc = RawDocumentBuf::new();
bson_doc.append("first", true).unwrap();
bson_doc.append("second", "string").unwrap();
bson_doc.append(cstr!("first"), true);
bson_doc.append(cstr!("second"), "string");

let expected = Foo {
doc_buf,
Expand Down
29 changes: 14 additions & 15 deletions serde-tests/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ use std::{
};

use bson::{
cstr,
doc,
oid::ObjectId,
spec::BinarySubtype,
Expand Down Expand Up @@ -835,8 +836,8 @@ fn raw_regex() {

let bytes = bson::serialize_to_vec(&doc! {
"r": Regex {
pattern: "a[b-c]d".to_string(),
options: "ab".to_string(),
pattern: cstr!("a[b-c]d").into(),
options: cstr!("ab").into(),
},
})
.expect("raw_regex");
Expand Down Expand Up @@ -927,8 +928,8 @@ impl AllTypes {
};
let date = DateTime::now();
let regex = Regex {
pattern: "hello".to_string(),
options: "x".to_string(),
pattern: cstr!("hello").into(),
options: cstr!("x").into(),
};
let timestamp = Timestamp {
time: 123,
Expand Down Expand Up @@ -1058,8 +1059,8 @@ fn all_raw_types_rmp() {
scope: doc! { "x": 1 },
},
"regex": Regex {
pattern: "pattern".to_string(),
options: "opt".to_string()
pattern: cstr!("pattern").into(),
options: cstr!("opt").into()
}
})
.unwrap();
Expand Down Expand Up @@ -1254,24 +1255,22 @@ fn owned_raw_types() {

let f = Foo {
subdoc: RawDocumentBuf::from_iter([
("a key", RawBson::String("a value".to_string())),
("an objectid", RawBson::ObjectId(oid)),
("a date", RawBson::DateTime(dt)),
(cstr!("a key"), RawBson::String("a value".to_string())),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Building from a key-value list is one place where the repeated cstr! is more of a hassle; I could potentially see providing a convenience try_from_iter method that accepts &str keys and returns a Result (i.e. like this used to be).

(cstr!("an objectid"), RawBson::ObjectId(oid)),
(cstr!("a date"), RawBson::DateTime(dt)),
(
"code_w_scope",
cstr!("code_w_scope"),
RawBson::JavaScriptCodeWithScope(raw_code_w_scope.clone()),
),
("decimal128", RawBson::Decimal128(d128)),
])
.unwrap(),
(cstr!("decimal128"), RawBson::Decimal128(d128)),
]),
array: RawArrayBuf::from_iter([
RawBson::String("a string".to_string()),
RawBson::ObjectId(oid),
RawBson::DateTime(dt),
RawBson::JavaScriptCodeWithScope(raw_code_w_scope),
RawBson::Decimal128(d128),
])
.unwrap(),
]),
};

let expected = doc! {
Expand Down
36 changes: 24 additions & 12 deletions src/bson.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ use std::{
use serde_json::{json, Value};

pub use crate::document::Document;
use crate::{base64, oid, spec::ElementType, Binary, Decimal128};
use crate::{base64, oid, raw::CString, spec::ElementType, Binary, Decimal128};

/// Possible BSON value types.
#[derive(Clone, Default, PartialEq)]
Expand Down Expand Up @@ -268,6 +268,12 @@ impl From<String> for Bson {
}
}

impl From<crate::raw::CString> for Bson {
fn from(a: crate::raw::CString) -> Bson {
Bson::String(a.into_string())
}
}

impl From<Document> for Bson {
fn from(a: Document) -> Bson {
Bson::Document(a)
Expand Down Expand Up @@ -480,14 +486,14 @@ impl Bson {
Bson::Boolean(v) => json!(v),
Bson::Null => Value::Null,
Bson::RegularExpression(Regex { pattern, options }) => {
let mut chars: Vec<_> = options.chars().collect();
let mut chars: Vec<_> = options.as_str().chars().collect();
chars.sort_unstable();

let options: String = chars.into_iter().collect();

json!({
"$regularExpression": {
"pattern": pattern,
"pattern": pattern.into_string(),
"options": options,
}
})
Expand Down Expand Up @@ -619,7 +625,7 @@ impl Bson {
ref pattern,
ref options,
}) => {
let mut chars: Vec<_> = options.chars().collect();
let mut chars: Vec<_> = options.as_str().chars().collect();
chars.sort_unstable();

let options: String = chars.into_iter().collect();
Expand Down Expand Up @@ -842,7 +848,9 @@ impl Bson {
if let Ok(regex) = doc.get_document("$regularExpression") {
if let Ok(pattern) = regex.get_str("pattern") {
if let Ok(options) = regex.get_str("options") {
return Bson::RegularExpression(Regex::new(pattern, options));
if let Ok(regex) = Regex::from_strings(pattern, options) {
return Bson::RegularExpression(regex);
}
}
}
}
Expand Down Expand Up @@ -1117,7 +1125,7 @@ impl Timestamp {
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct Regex {
/// The regex pattern to match.
pub pattern: String,
pub pattern: CString,

/// The options for the regex.
///
Expand All @@ -1126,18 +1134,22 @@ pub struct Regex {
/// multiline matching, 'x' for verbose mode, 'l' to make \w, \W, etc. locale dependent,
/// 's' for dotall mode ('.' matches everything), and 'u' to make \w, \W, etc. match
/// unicode.
pub options: String,
pub options: CString,
}

impl Regex {
pub(crate) fn new(pattern: impl AsRef<str>, options: impl AsRef<str>) -> Self {
#[cfg(any(test, feature = "serde"))]
pub(crate) fn from_strings(
pattern: impl AsRef<str>,
options: impl AsRef<str>,
) -> crate::error::Result<Self> {
let mut chars: Vec<_> = options.as_ref().chars().collect();
chars.sort_unstable();
let options: String = chars.into_iter().collect();
Self {
pattern: pattern.as_ref().to_string(),
options,
}
Ok(Self {
pattern: pattern.as_ref().to_string().try_into()?,
options: options.try_into()?,
})
}
}

Expand Down
8 changes: 4 additions & 4 deletions src/de/raw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1306,15 +1306,15 @@ impl<'de> serde::de::Deserializer<'de> for &mut RegexAccess<'de> {
RegexDeserializationStage::Pattern => {
self.stage = RegexDeserializationStage::Options;
match &self.re {
BsonCow::Borrowed(re) => visitor.visit_borrowed_str(re.pattern),
BsonCow::Owned(re) => visitor.visit_str(&re.pattern),
BsonCow::Borrowed(re) => visitor.visit_borrowed_str(re.pattern.as_str()),
BsonCow::Owned(re) => visitor.visit_str(re.pattern.as_str()),
}
}
RegexDeserializationStage::Options => {
self.stage = RegexDeserializationStage::Done;
match &self.re {
BsonCow::Borrowed(re) => visitor.visit_borrowed_str(re.options),
BsonCow::Owned(re) => visitor.visit_str(&re.options),
BsonCow::Borrowed(re) => visitor.visit_borrowed_str(re.options.as_str()),
BsonCow::Owned(re) => visitor.visit_str(re.options.as_str()),
}
}
RegexDeserializationStage::Done => {
Expand Down
4 changes: 3 additions & 1 deletion src/de/serde.rs
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,9 @@ impl<'de> Visitor<'de> for BsonVisitor {

"$regularExpression" => {
let re = visitor.next_value::<extjson::models::RegexBody>()?;
return Ok(Bson::RegularExpression(Regex::new(re.pattern, re.options)));
return Ok(Bson::RegularExpression(
Regex::from_strings(re.pattern, re.options).map_err(Error::custom)?,
));
}

"$dbPointer" => {
Expand Down
13 changes: 12 additions & 1 deletion src/extjson/de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,18 @@ pub enum Error {
/// A general error encountered during deserialization.
/// See: <https://docs.serde.rs/serde/de/trait.Error.html>
DeserializationError { message: String },

/// A generic crate error.
// TODO RUST-1406 collapse this
CrateError(crate::error::Error),
}

impl std::fmt::Display for Error {
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
match *self {
Self::InvalidObjectId(ref err) => err.fmt(fmt),
Self::DeserializationError { ref message } => message.fmt(fmt),
Self::CrateError(ref err) => err.fmt(fmt),
}
}
}
Expand All @@ -69,6 +74,12 @@ impl From<serde_json::Error> for Error {
}
}

impl From<crate::error::Error> for Error {
fn from(value: crate::error::Error) -> Self {
Self::CrateError(value)
}
}

pub type Result<T> = std::result::Result<T, Error>;

/// This converts from the input JSON object as if it were [MongoDB Extended JSON v2](https://www.mongodb.com/docs/manual/reference/mongodb-extended-json/).
Expand All @@ -88,7 +99,7 @@ impl TryFrom<serde_json::Map<String, serde_json::Value>> for Bson {

if obj.contains_key("$regularExpression") {
let regex: models::Regex = serde_json::from_value(obj.into())?;
return Ok(regex.parse().into());
return Ok(regex.parse()?.into());
}

if obj.contains_key("$numberInt") {
Expand Down
4 changes: 2 additions & 2 deletions src/extjson/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,8 @@ pub(crate) struct RegexBody {
}

impl Regex {
pub(crate) fn parse(self) -> crate::Regex {
crate::Regex::new(self.body.pattern, self.body.options)
pub(crate) fn parse(self) -> crate::error::Result<crate::Regex> {
crate::Regex::from_strings(self.body.pattern, self.body.options)
}
}

Expand Down
25 changes: 18 additions & 7 deletions src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,12 +240,12 @@ macro_rules! rawbson {

// Finished with trailing comma.
(@array [$($elems:expr,)*]) => {
$crate::RawArrayBuf::from_iter(vec![$($elems,)*]).expect("invalid bson value")
$crate::RawArrayBuf::from_iter(vec![$($elems,)*])
};

// Finished without trailing comma.
(@array [$($elems:expr),*]) => {
$crate::RawArrayBuf::from_iter(vec![$($elems),*]).expect("invalid bson value")
$crate::RawArrayBuf::from_iter(vec![$($elems),*])
};

// Next element is `null`.
Expand Down Expand Up @@ -291,15 +291,26 @@ macro_rules! rawbson {
// Finished.
(@object $object:ident () () ()) => {};

// Insert the current entry followed by trailing comma.
(@object $object:ident [$($key:tt)+] ($value:expr) , $($rest:tt)*) => {
$object.append(($($key)+), $value).expect("invalid bson value");
// Insert the current entry with followed by trailing comma, with a key literal.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tweaked the behavior of rawdoc! a bit here - now if the key is a literal it'll be implicitly wrapped in cstr! so it gets compile-time validated, otherwise it'll be assumed to be an expression that evaluates to a valid key and passed on to append (as before). The main difference here is that now this macro can no longer panic :)

(@object $object:ident [$key:literal] ($value:expr) , $($rest:tt)*) => {{
$object.append($crate::raw::cstr!($key), $value);
$crate::rawbson!(@object $object () ($($rest)*) ($($rest)*));
}};

// Insert the current entry with followed by trailing comma, with a key expression.
(@object $object:ident [$($key:tt)+] ($value:expr) , $($rest:tt)*) => {{
$object.append($($key)+, $value);
$crate::rawbson!(@object $object () ($($rest)*) ($($rest)*));
}};

// Insert the last entry without trailing comma, with a key literal.
(@object $object:ident [$key:literal] ($value:expr)) => {
$object.append($crate::raw::cstr!($key), $value);
};

// Insert the last entry without trailing comma.
// Insert the last entry without trailing comma, with a key expression.
(@object $object:ident [$($key:tt)+] ($value:expr)) => {
$object.append(($($key)+), $value).expect("invalid bson value");
$object.append($($key)+, $value);
};

// Next value is `null`.
Expand Down
14 changes: 2 additions & 12 deletions src/raw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ mod array;
mod array_buf;
mod bson;
mod bson_ref;
mod cstr;
mod document;
mod document_buf;
mod iter;
Expand All @@ -142,6 +143,7 @@ pub use self::{
RawJavaScriptCodeWithScopeRef,
RawRegexRef,
},
cstr::{assert_valid_cstr, cstr, validate_cstr, CStr, CString, IsValidCStr},
document::RawDocument,
document_buf::{BindRawBsonRef, RawDocumentBuf},
iter::{RawElement, RawIter},
Expand Down Expand Up @@ -316,15 +318,3 @@ pub(crate) fn write_string(buf: &mut Vec<u8>, s: &str) {
buf.extend(s.as_bytes());
buf.push(0);
}

pub(crate) fn write_cstring(buf: &mut Vec<u8>, s: &str) -> Result<()> {
if s.contains('\0') {
return Err(Error::malformed_bytes(format!(
"cstring with interior null: {:?}",
s
)));
}
buf.extend(s.as_bytes());
buf.push(0);
Ok(())
}
Loading