Skip to content

Commit

Permalink
Merge pull request #300 from zesterer/stream
Browse files Browse the repository at this point in the history
Add Stream input
  • Loading branch information
zesterer authored Feb 27, 2023
2 parents 59b114c + c22c204 commit ece643c
Show file tree
Hide file tree
Showing 6 changed files with 112 additions and 8 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ serde_json = { version = "1.0", features = ["preserve_order"] }
criterion = "0.4.0"
pest = "2.5"
pest_derive = "2.5"
sn = "0.1"
logos = "0.12"

[[bench]]
Expand Down
4 changes: 4 additions & 0 deletions benches/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ fn bench_json(c: &mut Criterion) {
move |b| b.iter(|| black_box(pest::parse(json).unwrap()))
});

c.bench_function("json_sn", {
move |b| b.iter(|| black_box(sn::Parser::new(black_box(JSON)).parse().unwrap()))
});

c.bench_function("json_chumsky", {
use ::chumsky::prelude::*;
let json = chumsky::json();
Expand Down
3 changes: 1 addition & 2 deletions examples/indent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ fn parser<'a>() -> impl Parser<'a, &'a str, Vec<Stmt>> {
let expr = just("expr"); // TODO

let block = recursive(|block| {
let indent = any()
.filter(|c: &char| *c == ' ')
let indent = just(' ')
.repeated()
.configure(|cfg, parent_indent| cfg.exactly(*parent_indent));

Expand Down
106 changes: 103 additions & 3 deletions src/zero_copy/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
//! ways: from strings, slices, arrays, etc.

use super::*;
use core::cell::Cell;
use hashbrown::HashMap;

/// A trait for types that represents a stream of input tokens. Unlike [`Iterator`], this type
/// supports backtracking and a few other features required by the crate.
// TODO: Remove `Clone` bound
pub trait Input<'a>: 'a + Clone {
pub trait Input<'a>: 'a {
/// The type used to keep track of the current location in the stream
type Offset: Copy + Hash + Ord + Into<usize>;
/// The type of singular items read from the stream
Expand All @@ -30,6 +30,9 @@ pub trait Input<'a>: 'a + Clone {

/// Create a span from a start and end offset
fn span(&self, range: Range<Self::Offset>) -> Self::Span;

#[doc(hidden)]
fn reborrow(&self) -> Self;
}

/// A trait for types that represent slice-like streams of input tokens.
Expand Down Expand Up @@ -85,6 +88,10 @@ impl<'a> Input<'a> for &'a str {
fn span(&self, range: Range<Self::Offset>) -> Self::Span {
range.into()
}

fn reborrow(&self) -> Self {
*self
}
}

impl<'a> StrInput<'a, char> for &'a str {}
Expand Down Expand Up @@ -122,6 +129,10 @@ impl<'a, T: Clone> Input<'a> for &'a [T] {
fn span(&self, range: Range<Self::Offset>) -> Self::Span {
range.into()
}

fn reborrow(&self) -> Self {
*self
}
}

impl<'a> StrInput<'a, u8> for &'a [u8] {}
Expand Down Expand Up @@ -179,6 +190,10 @@ impl<'a, Ctx: Clone + 'a, I: Input<'a>> Input<'a> for WithContext<Ctx, I> {
fn span(&self, range: Range<Self::Offset>) -> Self::Span {
(self.0.clone(), self.1.span(range))
}

fn reborrow(&self) -> Self {
WithContext(self.0.clone(), self.1.reborrow())
}
}

impl<'a, Ctx: Clone + 'a, I: BorrowInput<'a>> BorrowInput<'a> for WithContext<Ctx, I> {
Expand Down Expand Up @@ -206,6 +221,90 @@ where
{
}

/// An input that dynamically pulls tokens from an [`Iterator`].
///
/// Internally, the stream will pull tokens in batches so as to avoid invoking the iterator every time a new token is
/// required.
pub struct Stream<I: Iterator>(Cell<(Vec<I::Item>, Option<I>)>);

impl<I: Iterator> Stream<I> {
/// Create a new stream from an [`Iterator`].
///
/// # Example
///
/// ```
/// # use chumsky::zero_copy::{prelude::*, input::Stream};
/// let stream = Stream::from_iter((0..10).map(|i| char::from_digit(i, 10).unwrap()));
///
/// let parser = text::digits::<_, _, extra::Err<Simple<_>>>(10).collect::<String>();
///
/// assert_eq!(parser.parse(&stream).into_result().as_deref(), Ok("0123456789"));
/// ```
pub fn from_iter<J: IntoIterator<IntoIter = I>>(iter: J) -> Self {
Self(Cell::new((Vec::new(), Some(iter.into_iter()))))
}

/// Box this stream, turning it into a [BoxedStream]. This can be useful in cases where your parser accepts input
/// from several different sources and it needs to work with all of them.
pub fn boxed<'a>(self) -> BoxedStream<'a, I::Item>
where
I: 'a,
{
let (vec, iter) = self.0.into_inner();
Stream(Cell::new((
vec,
Some(Box::new(iter.expect("no iterator?!"))),
)))
}
}

/// A stream containing a boxed iterator. See [`Stream::boxed`].
pub type BoxedStream<'a, T> = Stream<Box<dyn Iterator<Item = T> + 'a>>;

impl<'a, I: Iterator> Input<'a> for &'a Stream<I>
where
I::Item: Clone,
{
type Offset = usize;
type Token = I::Item;
type Span = SimpleSpan<usize>;

fn start(&self) -> Self::Offset {
0
}

unsafe fn next(&self, offset: Self::Offset) -> (Self::Offset, Option<Self::Token>) {
let mut other = Cell::new((Vec::new(), None));
self.0.swap(&other);

let (vec, iter) = other.get_mut();

// Pull new items into the vector if we need them
if vec.len() <= offset {
vec.extend(iter.as_mut().expect("no iterator?!").take(500));
}

// Get the token at the given offset
let tok = if let Some(tok) = vec.get(offset) {
Some(tok.clone())
} else {
None
};

self.0.swap(&other);

(offset + 1, tok)
}

fn span(&self, range: Range<Self::Offset>) -> Self::Span {
range.into()
}

fn reborrow(&self) -> Self {
*self
}
}

/// Represents the progress of a parser through the input
pub struct Marker<'a, I: Input<'a>> {
pub(crate) offset: I::Offset,
Expand Down Expand Up @@ -259,7 +358,7 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E>
use core::mem;

let mut new_ctx = InputRef {
input: self.input.clone(),
input: self.input.reborrow(),
offset: self.offset,
state: match &mut self.state {
Ok(state) => Ok(*state),
Expand Down Expand Up @@ -393,6 +492,7 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E>
}

#[inline]
#[cfg(feature = "regex")]
pub(crate) fn skip_bytes<C>(&mut self, skip: usize)
where
C: Char,
Expand Down
2 changes: 1 addition & 1 deletion src/zero_copy/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default> {
{
let mut inp = InputRef::new(input, Ok(state));
let res = self.go::<Check>(&mut inp);
let res = res.and_then(|o| expect_end(&mut inp));
let res = res.and_then(|()| expect_end(&mut inp));
let mut errs = inp.into_errs();
let out = match res {
Ok(_) => Some(()),
Expand Down
4 changes: 2 additions & 2 deletions src/zero_copy/text.rs
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ where
///
/// ```
/// # use chumsky::zero_copy::prelude::*;
/// let digits = text::digits::<'_, _, _, extra::Err<Simple<&str>>>(10).slice();
/// let digits = text::digits::<_, _, extra::Err<Simple<&str>>>(10).slice();
///
/// assert_eq!(digits.parse("0").into_result(), Ok("0"));
/// assert_eq!(digits.parse("1").into_result(), Ok("1"));
Expand All @@ -288,7 +288,7 @@ where
pub fn digits<'a, C, I, E>(radix: u32) -> Repeated<impl Parser<'a, I, C, E> + Copy + Clone, C, I, E>
where
C: Char,
I: StrInput<'a, C>,
I: Input<'a, Token = C>,
E: ParserExtra<'a, I>,
{
any()
Expand Down

0 comments on commit ece643c

Please sign in to comment.