Skip to content

Commit

Permalink
refactor(example): Un-unpeek json
Browse files Browse the repository at this point in the history
  • Loading branch information
epage committed Jun 30, 2023
1 parent 77f93e5 commit 9922ddb
Show file tree
Hide file tree
Showing 5 changed files with 227 additions and 235 deletions.
15 changes: 10 additions & 5 deletions examples/json/bench.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use winnow::prelude::*;
use winnow::Partial;

mod json;
Expand All @@ -15,20 +16,20 @@ fn json_bench(c: &mut criterion::Criterion) {
group.bench_with_input(criterion::BenchmarkId::new("basic", name), &len, |b, _| {
type Error<'i> = winnow::error::Error<parser::Stream<'i>>;

b.iter(|| parser::json::<Error>(sample).unwrap());
b.iter(|| parser::json::<Error>.parse_peek(sample).unwrap());
});
group.bench_with_input(criterion::BenchmarkId::new("unit", name), &len, |b, _| {
type Error<'i> = ();

b.iter(|| parser::json::<Error>(sample).unwrap());
b.iter(|| parser::json::<Error>.parse_peek(sample).unwrap());
});
group.bench_with_input(
criterion::BenchmarkId::new("verbose", name),
&len,
|b, _| {
type Error<'i> = winnow::error::VerboseError<parser::Stream<'i>>;

b.iter(|| parser::json::<Error>(sample).unwrap());
b.iter(|| parser::json::<Error>.parse_peek(sample).unwrap());
},
);
group.bench_with_input(
Expand All @@ -37,7 +38,7 @@ fn json_bench(c: &mut criterion::Criterion) {
|b, _| {
type Error<'i> = winnow::error::Error<parser_dispatch::Stream<'i>>;

b.iter(|| parser_dispatch::json::<Error>(sample).unwrap());
b.iter(|| parser_dispatch::json::<Error>.parse_peek(sample).unwrap());
},
);
group.bench_with_input(
Expand All @@ -46,7 +47,11 @@ fn json_bench(c: &mut criterion::Criterion) {
|b, _| {
type Error<'i> = winnow::error::Error<parser_partial::Stream<'i>>;

b.iter(|| parser_partial::json::<Error>(Partial::new(sample)).unwrap());
b.iter(|| {
parser_partial::json::<Error>
.parse_peek(Partial::new(sample))
.unwrap()
});
},
);
}
Expand Down
7 changes: 3 additions & 4 deletions examples/json/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ use winnow::error::convert_error;
use winnow::error::Error;
use winnow::error::VerboseError;
use winnow::prelude::*;
use winnow::unpeek;

fn main() -> Result<(), lexopt::Error> {
let args = Args::parse()?;
Expand All @@ -28,7 +27,7 @@ fn main() -> Result<(), lexopt::Error> {
});

if args.verbose {
match unpeek(parser::json::<VerboseError<&str>>).parse(data) {
match parser::json::<VerboseError<&str>>.parse(data) {
Ok(json) => {
println!("{:#?}", json);
}
Expand All @@ -42,8 +41,8 @@ fn main() -> Result<(), lexopt::Error> {
}
} else {
let result = match args.implementation {
Impl::Naive => unpeek(parser::json::<Error<&str>>).parse(data),
Impl::Dispatch => unpeek(parser_dispatch::json::<Error<&str>>).parse(data),
Impl::Naive => parser::json::<Error<&str>>.parse(data),
Impl::Dispatch => parser_dispatch::json::<Error<&str>>.parse(data),
};
match result {
Ok(json) => {
Expand Down
145 changes: 70 additions & 75 deletions examples/json/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ use winnow::{
combinator::{fold_repeat, separated0},
error::{ContextError, ParseError},
token::{any, none_of, take, take_while},
unpeek,
};

use crate::json::JsonValue;
Expand All @@ -20,8 +19,8 @@ pub type Stream<'i> = &'i str;
/// The root element of a JSON parser is any value
///
/// A parser has the following signature:
/// `Stream -> IResult<Stream, Output, Error>`, with `IResult` defined as:
/// `type IResult<I, O, E = (I, ErrorKind)> = Result<(I, O), Err<E>>;`
/// `&mut Stream -> PResult<Output, Error>`, with `PResult` defined as:
/// `type PResult<O, E = (I, ErrorKind)> = Result<O, Err<E>>;`
///
/// most of the times you can ignore the error type and use the default (but this
/// examples shows custom error types later on!)
Expand All @@ -30,41 +29,41 @@ pub type Stream<'i> = &'i str;
/// the input type, work directly with `&[u8]`, or any other type that
/// implements the required traits.
pub fn json<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>(
input: Stream<'i>,
) -> IResult<Stream<'i>, JsonValue, E> {
delimited(unpeek(ws), unpeek(json_value), unpeek(ws)).parse_peek(input)
input: &mut Stream<'i>,
) -> PResult<JsonValue, E> {
delimited(ws, json_value, ws).parse_next(input)
}

/// `alt` is a combinator that tries multiple parsers one by one, until
/// one of them succeeds
fn json_value<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>(
input: Stream<'i>,
) -> IResult<Stream<'i>, JsonValue, E> {
input: &mut Stream<'i>,
) -> PResult<JsonValue, E> {
// `alt` combines the each value parser. It returns the result of the first
// successful parser, or an error
alt((
unpeek(null).value(JsonValue::Null),
unpeek(boolean).map(JsonValue::Boolean),
unpeek(string).map(JsonValue::Str),
null.value(JsonValue::Null),
boolean.map(JsonValue::Boolean),
string.map(JsonValue::Str),
float.map(JsonValue::Num),
unpeek(array).map(JsonValue::Array),
unpeek(object).map(JsonValue::Object),
array.map(JsonValue::Array),
object.map(JsonValue::Object),
))
.parse_peek(input)
.parse_next(input)
}

/// `tag(string)` generates a parser that recognizes the argument string.
///
/// This also shows returning a sub-slice of the original input
fn null<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, &'i str, E> {
fn null<'i, E: ParseError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> {
// This is a parser that returns `"null"` if it sees the string "null", and
// an error otherwise
"null".parse_peek(input)
"null".parse_next(input)
}

/// We can combine `tag` with other functions, like `value` which returns a given constant value on
/// success.
fn boolean<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, bool, E> {
fn boolean<'i, E: ParseError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E> {
// This is a parser that returns `true` if it sees the string "true", and
// an error otherwise
let parse_true = "true".value(true);
Expand All @@ -73,22 +72,22 @@ fn boolean<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'
// an error otherwise
let parse_false = "false".value(false);

alt((parse_true, parse_false)).parse_peek(input)
alt((parse_true, parse_false)).parse_next(input)
}

/// This parser gathers all `char`s up into a `String`with a parse to recognize the double quote
/// character, before the string (using `preceded`) and after the string (using `terminated`).
fn string<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>(
input: Stream<'i>,
) -> IResult<Stream<'i>, String, E> {
input: &mut Stream<'i>,
) -> PResult<String, E> {
preceded(
'\"',
// `cut_err` transforms an `ErrMode::Backtrack(e)` to `ErrMode::Cut(e)`, signaling to
// combinators like `alt` that they should not try other parsers. We were in the
// right branch (since we found the `"` character) but encountered an error when
// parsing the string
cut_err(terminated(
fold_repeat(0.., unpeek(character), String::new, |mut string, c| {
fold_repeat(0.., character, String::new, |mut string, c| {
string.push(c);
string
}),
Expand All @@ -98,13 +97,13 @@ fn string<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>
// `context` lets you add a static string to errors to provide more information in the
// error chain (to indicate which parser had an error)
.context("string")
.parse_peek(input)
.parse_next(input)
}

/// You can mix the above declarative parsing with an imperative style to handle more unique cases,
/// like escaping
fn character<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, char, E> {
let (input, c) = none_of('\"').parse_peek(input)?;
fn character<'i, E: ParseError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> {
let c = none_of('\"').parse_next(input)?;
if c == '\\' {
alt((
any.verify_map(|c| {
Expand All @@ -118,24 +117,22 @@ fn character<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream
_ => return None,
})
}),
preceded('u', unpeek(unicode_escape)),
preceded('u', unicode_escape),
))
.parse_peek(input)
.parse_next(input)
} else {
Ok((input, c))
Ok(c)
}
}

fn unicode_escape<'i, E: ParseError<Stream<'i>>>(
input: Stream<'i>,
) -> IResult<Stream<'i>, char, E> {
fn unicode_escape<'i, E: ParseError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> {
alt((
// Not a surrogate
unpeek(u16_hex)
u16_hex
.verify(|cp| !(0xD800..0xE000).contains(cp))
.map(|cp| cp as u32),
// See https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF for details
separated_pair(unpeek(u16_hex), "\\u", unpeek(u16_hex))
separated_pair(u16_hex, "\\u", u16_hex)
.verify(|(high, low)| (0xD800..0xDC00).contains(high) && (0xDC00..0xE000).contains(low))
.map(|(high, low)| {
let high_ten = (high as u32) - 0xD800;
Expand All @@ -147,65 +144,54 @@ fn unicode_escape<'i, E: ParseError<Stream<'i>>>(
// Could be probably replaced with .unwrap() or _unchecked due to the verify checks
std::char::from_u32,
)
.parse_peek(input)
.parse_next(input)
}

fn u16_hex<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, u16, E> {
fn u16_hex<'i, E: ParseError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u16, E> {
take(4usize)
.verify_map(|s| u16::from_str_radix(s, 16).ok())
.parse_peek(input)
.parse_next(input)
}

/// Some combinators, like `separated0` or `many0`, will call a parser repeatedly,
/// accumulating results in a `Vec`, until it encounters an error.
/// If you want more control on the parser application, check out the `iterator`
/// combinator (cf `examples/iterator.rs`)
fn array<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>(
input: Stream<'i>,
) -> IResult<Stream<'i>, Vec<JsonValue>, E> {
input: &mut Stream<'i>,
) -> PResult<Vec<JsonValue>, E> {
preceded(
('[', unpeek(ws)),
cut_err(terminated(
separated0(unpeek(json_value), (unpeek(ws), ',', unpeek(ws))),
(unpeek(ws), ']'),
)),
('[', ws),
cut_err(terminated(separated0(json_value, (ws, ',', ws)), (ws, ']'))),
)
.context("array")
.parse_peek(input)
.parse_next(input)
}

fn object<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>(
input: Stream<'i>,
) -> IResult<Stream<'i>, HashMap<String, JsonValue>, E> {
input: &mut Stream<'i>,
) -> PResult<HashMap<String, JsonValue>, E> {
preceded(
('{', unpeek(ws)),
cut_err(terminated(
separated0(unpeek(key_value), (unpeek(ws), ',', unpeek(ws))),
(unpeek(ws), '}'),
)),
('{', ws),
cut_err(terminated(separated0(key_value, (ws, ',', ws)), (ws, '}'))),
)
.context("object")
.parse_peek(input)
.parse_next(input)
}

fn key_value<'i, E: ParseError<Stream<'i>> + ContextError<Stream<'i>, &'static str>>(
input: Stream<'i>,
) -> IResult<Stream<'i>, (String, JsonValue), E> {
separated_pair(
unpeek(string),
cut_err((unpeek(ws), ':', unpeek(ws))),
unpeek(json_value),
)
.parse_peek(input)
input: &mut Stream<'i>,
) -> PResult<(String, JsonValue), E> {
separated_pair(string, cut_err((ws, ':', ws)), json_value).parse_next(input)
}

/// Parser combinators are constructed from the bottom up:
/// first we write parsers for the smallest elements (here a space character),
/// then we'll combine them in larger parsers
fn ws<'i, E: ParseError<Stream<'i>>>(input: Stream<'i>) -> IResult<Stream<'i>, &'i str, E> {
fn ws<'i, E: ParseError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> {
// Combinators like `take_while` return a function. That function is the
// parser,to which we can pass the input
take_while(0.., WS).parse_peek(input)
take_while(0.., WS).parse_next(input)
}

const WS: &[char] = &[' ', '\t', '\r', '\n'];
Expand All @@ -222,24 +208,33 @@ mod test {

#[test]
fn json_string() {
assert_eq!(string::<Error<'_>>("\"\""), Ok(("", "".to_string())));
assert_eq!(string::<Error<'_>>("\"abc\""), Ok(("", "abc".to_string())));
assert_eq!(
string::<Error<'_>>("\"abc\\\"\\\\\\/\\b\\f\\n\\r\\t\\u0001\\u2014\u{2014}def\""),
string::<Error<'_>>.parse_peek("\"\""),
Ok(("", "".to_string()))
);
assert_eq!(
string::<Error<'_>>.parse_peek("\"abc\""),
Ok(("", "abc".to_string()))
);
assert_eq!(
string::<Error<'_>>
.parse_peek("\"abc\\\"\\\\\\/\\b\\f\\n\\r\\t\\u0001\\u2014\u{2014}def\""),
Ok(("", "abc\"\\/\x08\x0C\n\r\t\x01——def".to_string())),
);
assert_eq!(
string::<Error<'_>>("\"\\uD83D\\uDE10\""),
string::<Error<'_>>.parse_peek("\"\\uD83D\\uDE10\""),
Ok(("", "😐".to_string()))
);

assert!(string::<Error<'_>>("\"").is_err());
assert!(string::<Error<'_>>("\"abc").is_err());
assert!(string::<Error<'_>>("\"\\\"").is_err());
assert!(string::<Error<'_>>("\"\\u123\"").is_err());
assert!(string::<Error<'_>>("\"\\uD800\"").is_err());
assert!(string::<Error<'_>>("\"\\uD800\\uD800\"").is_err());
assert!(string::<Error<'_>>("\"\\uDC00\"").is_err());
assert!(string::<Error<'_>>.parse_peek("\"").is_err());
assert!(string::<Error<'_>>.parse_peek("\"abc").is_err());
assert!(string::<Error<'_>>.parse_peek("\"\\\"").is_err());
assert!(string::<Error<'_>>.parse_peek("\"\\u123\"").is_err());
assert!(string::<Error<'_>>.parse_peek("\"\\uD800\"").is_err());
assert!(string::<Error<'_>>
.parse_peek("\"\\uD800\\uD800\"")
.is_err());
assert!(string::<Error<'_>>.parse_peek("\"\\uDC00\"").is_err());
}

#[test]
Expand All @@ -257,7 +252,7 @@ mod test {
.collect(),
);

assert_eq!(json::<Error<'_>>(input), Ok(("", expected)));
assert_eq!(json::<Error<'_>>.parse_peek(input), Ok(("", expected)));
}

#[test]
Expand All @@ -268,7 +263,7 @@ mod test {

let expected = Array(vec![Num(42.0), Str("x".to_string())]);

assert_eq!(json::<Error<'_>>(input), Ok(("", expected)));
assert_eq!(json::<Error<'_>>.parse_peek(input), Ok(("", expected)));
}

#[test]
Expand All @@ -290,7 +285,7 @@ mod test {
"#;

assert_eq!(
json::<Error<'_>>(input),
json::<Error<'_>>.parse_peek(input),
Ok((
"",
Object(
Expand Down
Loading

0 comments on commit 9922ddb

Please sign in to comment.