Skip to content

Commit

Permalink
fuzz: Adds a structured fuzzer
Browse files Browse the repository at this point in the history
This is a precursor to full roundtrip property testing.
  • Loading branch information
nathaniel-brough committed Feb 22, 2023
1 parent f0b3420 commit 779de32
Show file tree
Hide file tree
Showing 4 changed files with 180 additions and 0 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ encoding_rs = { version = "0.8", optional = true }
serde = { version = "1.0.100", optional = true }
tokio = { version = "1.0", optional = true, default-features = false, features = ["io-util"] }
memchr = "2.0"
arbitrary = { version = "1.2.3", features = ["derive"], optional = true }

[dev-dependencies]
criterion = "0.4"
Expand Down
8 changes: 8 additions & 0 deletions fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@ edition = "2018"
cargo-fuzz = true

[dependencies]
arbitrary = { version = "1.2.3", features = ["derive"] }
libfuzzer-sys = "0.4"

[dependencies.quick-xml]
path = ".."
features = ["arbitrary"]

# Prevent this from interfering with workspaces
[workspace]
Expand All @@ -23,3 +25,9 @@ name = "fuzz_target_1"
path = "fuzz_targets/fuzz_target_1.rs"
test = false
doc = false

[[bin]]
name = "structured_roundtrip"
path = "fuzz_targets/structured_roundtrip.rs"
test = false
doc = false
122 changes: 122 additions & 0 deletions fuzz/fuzz_targets/structured_roundtrip.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#![no_main]

use arbitrary::{Arbitrary, Unstructured};
use libfuzzer_sys::fuzz_target;
use quick_xml::events::{BytesCData, BytesText, Event};
use quick_xml::reader::{NsReader, Reader};
use quick_xml::writer::Writer;
use std::{hint::black_box, io::Cursor};

#[derive(Debug, arbitrary::Arbitrary)]
enum ElementWriterFunc<'a> {
WriteTextContent(&'a str),
WriteCDataContent(&'a str),
WritePiContent(&'a str),
WriteEmpty,
// TODO: We can't automatically generate an arbitrary function
// WriteInnerContent,
}

fn arbitrary_name(u: &mut Unstructured) -> arbitrary::Result<String> {
let s = String::arbitrary(u)?;
if s.is_empty() || !s.chars().all(char::is_alphanumeric) {
return Err(arbitrary::Error::IncorrectFormat);
}
return Ok(s);
}

#[derive(Debug, arbitrary::Arbitrary)]
enum WriterFunc<'a> {
WriteEvent(Event<'a>),
WriteBom,
WriteIndent,
CreateElement {
#[arbitrary(with = arbitrary_name)]
name: String,
func: ElementWriterFunc<'a>,
attributes: Vec<(&'a str, &'a str)>,
},
}

#[derive(Debug, arbitrary::Arbitrary)]
struct Driver<'a> {
writer_funcs: Vec<WriterFunc<'a>>,
reader_config: Vec<bool>,
}

fn fuzz_round_trip(driver: Driver) -> quick_xml::Result<()> {
let mut writer = Writer::new(Cursor::new(Vec::new()));
let writer_funcs = driver.writer_funcs;
for writer_func in writer_funcs.iter() {
// TODO: Handle error cases.
use WriterFunc::*;
match writer_func {
WriteEvent(event) => writer.write_event(event)?,
WriteBom => writer.write_bom()?,
WriteIndent => writer.write_indent()?,
CreateElement {
name,
func,
attributes,
} => {
let element_writer = writer
.create_element(&name)
.with_attributes(attributes.into_iter().copied());
use ElementWriterFunc::*;
match func {
WriteTextContent(text) => {
element_writer.write_text_content(BytesText::from_escaped(*text))?;
}
WriteCDataContent(text) => {
_ = element_writer.write_cdata_content(BytesCData::new(*text))?;
}
WritePiContent(text) => {
_ = element_writer.write_pi_content(BytesText::from_escaped(*text))?;
}
WriteEmpty => {
_ = element_writer.write_empty()?;
}
}
}
}
}
let xml = writer.into_inner().into_inner();
// The str should be valid as we just generated it, unwrapping **should** be safe.
let mut reader = Reader::from_str(std::str::from_utf8(&xml).unwrap());
let mut config_iter = driver.reader_config.iter();
reader.check_comments(*config_iter.next().unwrap_or(&false));
reader.check_end_names(*config_iter.next().unwrap_or(&false));
reader.expand_empty_elements(*config_iter.next().unwrap_or(&false));
reader.trim_markup_names_in_closing_tags(*config_iter.next().unwrap_or(&false));
reader.trim_text(*config_iter.next().unwrap_or(&false));
reader.trim_text_end(*config_iter.next().unwrap_or(&false));

loop {
let event = black_box(reader.read_event()?);
if event == Event::Eof {
break;
}
}

let mut reader = NsReader::from_reader(&xml[..]);
reader.check_comments(*config_iter.next().unwrap_or(&false));
reader.check_end_names(*config_iter.next().unwrap_or(&false));
reader.expand_empty_elements(*config_iter.next().unwrap_or(&false));
reader.trim_markup_names_in_closing_tags(*config_iter.next().unwrap_or(&false));
reader.trim_text(*config_iter.next().unwrap_or(&false));
reader.trim_text_end(*config_iter.next().unwrap_or(&false));

loop {
let event = black_box(reader.read_event()?);
if event == Event::Eof {
break;
}
}
Ok(())
}

fuzz_target!(|driver: Driver| {
if let Err(e) = fuzz_round_trip(driver) {
black_box(format!("{e:?}"));
}
});
49 changes: 49 additions & 0 deletions src/events/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,19 @@ pub struct BytesStart<'a> {
pub(crate) name_len: usize,
}

#[cfg(feature = "arbitrary")]
impl<'a> arbitrary::Arbitrary<'a> for BytesStart<'a> {
fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
let s = <&str>::arbitrary(u)?;
if s.is_empty() || !s.chars().all(char::is_alphanumeric) {
return Err(arbitrary::Error::IncorrectFormat);
}
let mut result = Self::new(s);
result.extend_attributes(Vec::<(&str, &str)>::arbitrary(u)?.into_iter());
Ok(result)
}
}

impl<'a> BytesStart<'a> {
/// Internal constructor, used by `Reader`. Supplies data in reader's encoding
#[inline]
Expand Down Expand Up @@ -297,6 +310,17 @@ pub struct BytesDecl<'a> {
content: BytesStart<'a>,
}

#[cfg(feature = "arbitrary")]
impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> {
fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
Ok(Self::new(
<&str>::arbitrary(u)?,
Option::<&str>::arbitrary(u)?,
Option::<&str>::arbitrary(u)?,
))
}
}

impl<'a> BytesDecl<'a> {
/// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`),
/// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`)
Expand Down Expand Up @@ -541,6 +565,13 @@ pub struct BytesEnd<'a> {
name: Cow<'a, [u8]>,
}

#[cfg(feature = "arbitrary")]
impl<'a> arbitrary::Arbitrary<'a> for BytesEnd<'a> {
fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
Ok(Self::new(<&str>::arbitrary(u)?))
}
}

impl<'a> BytesEnd<'a> {
/// Internal constructor, used by `Reader`. Supplies data in reader's encoding
#[inline]
Expand Down Expand Up @@ -619,6 +650,16 @@ pub struct BytesText<'a> {
decoder: Decoder,
}

#[cfg(feature = "arbitrary")]
impl<'a> arbitrary::Arbitrary<'a> for BytesText<'a> {
fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
let s = <&str>::arbitrary(u)?;
if s.is_empty() || !s.chars().all(char::is_alphanumeric) {
return Err(arbitrary::Error::IncorrectFormat);
}
Ok(Self::new(s))
}
}
impl<'a> BytesText<'a> {
/// Creates a new `BytesText` from an escaped byte sequence in the specified encoding.
#[inline]
Expand Down Expand Up @@ -743,6 +784,13 @@ pub struct BytesCData<'a> {
decoder: Decoder,
}

#[cfg(feature = "arbitrary")]
impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {
fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
Ok(Self::new(<&str>::arbitrary(u)?))
}
}

impl<'a> BytesCData<'a> {
/// Creates a new `BytesCData` from a byte sequence in the specified encoding.
#[inline]
Expand Down Expand Up @@ -869,6 +917,7 @@ impl<'a> Deref for BytesCData<'a> {
///
/// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
#[derive(Clone, Debug, Eq, PartialEq)]
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub enum Event<'a> {
/// Start tag (with attributes) `<tag attr="value">`.
Start(BytesStart<'a>),
Expand Down

0 comments on commit 779de32

Please sign in to comment.