Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Stream input #300

Merged
merged 4 commits into from
Feb 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ serde_json = { version = "1.0", features = ["preserve_order"] }
criterion = "0.4.0"
pest = "2.5"
pest_derive = "2.5"
sn = "0.1"
logos = "0.12"

[[bench]]
Expand Down
4 changes: 4 additions & 0 deletions benches/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ fn bench_json(c: &mut Criterion) {
move |b| b.iter(|| black_box(pest::parse(json).unwrap()))
});

c.bench_function("json_sn", {
move |b| b.iter(|| black_box(sn::Parser::new(black_box(JSON)).parse().unwrap()))
});

c.bench_function("json_chumsky", {
use ::chumsky::prelude::*;
let json = chumsky::json();
Expand Down
3 changes: 1 addition & 2 deletions examples/indent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ fn parser<'a>() -> impl Parser<'a, &'a str, Vec<Stmt>> {
let expr = just("expr"); // TODO

let block = recursive(|block| {
let indent = any()
.filter(|c: &char| *c == ' ')
let indent = just(' ')
.repeated()
.configure(|cfg, parent_indent| cfg.exactly(*parent_indent));

Expand Down
106 changes: 103 additions & 3 deletions src/zero_copy/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
//! ways: from strings, slices, arrays, etc.

use super::*;
use core::cell::Cell;
use hashbrown::HashMap;

/// A trait for types that represents a stream of input tokens. Unlike [`Iterator`], this type
/// supports backtracking and a few other features required by the crate.
// TODO: Remove `Clone` bound
pub trait Input<'a>: 'a + Clone {
zesterer marked this conversation as resolved.
Show resolved Hide resolved
pub trait Input<'a>: 'a {
/// The type used to keep track of the current location in the stream
type Offset: Copy + Hash + Ord + Into<usize>;
/// The type of singular items read from the stream
Expand All @@ -30,6 +30,9 @@ pub trait Input<'a>: 'a + Clone {

/// Create a span from a start and end offset
fn span(&self, range: Range<Self::Offset>) -> Self::Span;

#[doc(hidden)]
fn reborrow(&self) -> Self;
}

/// A trait for types that represent slice-like streams of input tokens.
Expand Down Expand Up @@ -85,6 +88,10 @@ impl<'a> Input<'a> for &'a str {
fn span(&self, range: Range<Self::Offset>) -> Self::Span {
range.into()
}

fn reborrow(&self) -> Self {
*self
}
}

impl<'a> StrInput<'a, char> for &'a str {}
Expand Down Expand Up @@ -122,6 +129,10 @@ impl<'a, T: Clone> Input<'a> for &'a [T] {
fn span(&self, range: Range<Self::Offset>) -> Self::Span {
range.into()
}

fn reborrow(&self) -> Self {
*self
}
}

impl<'a> StrInput<'a, u8> for &'a [u8] {}
Expand Down Expand Up @@ -179,6 +190,10 @@ impl<'a, Ctx: Clone + 'a, I: Input<'a>> Input<'a> for WithContext<Ctx, I> {
fn span(&self, range: Range<Self::Offset>) -> Self::Span {
(self.0.clone(), self.1.span(range))
}

fn reborrow(&self) -> Self {
WithContext(self.0.clone(), self.1.reborrow())
}
}

impl<'a, Ctx: Clone + 'a, I: BorrowInput<'a>> BorrowInput<'a> for WithContext<Ctx, I> {
Expand Down Expand Up @@ -206,6 +221,90 @@ where
{
}

/// An input that dynamically pulls tokens from an [`Iterator`].
///
/// Internally, the stream will pull tokens in batches so as to avoid invoking the iterator every time a new token is
/// required.
pub struct Stream<I: Iterator>(Cell<(Vec<I::Item>, Option<I>)>);

impl<I: Iterator> Stream<I> {
/// Create a new stream from an [`Iterator`].
///
/// # Example
///
/// ```
/// # use chumsky::zero_copy::{prelude::*, input::Stream};
/// let stream = Stream::from_iter((0..10).map(|i| char::from_digit(i, 10).unwrap()));
///
/// let parser = text::digits::<_, _, extra::Err<Simple<_>>>(10).collect::<String>();
///
/// assert_eq!(parser.parse(&stream).into_result().as_deref(), Ok("0123456789"));
/// ```
pub fn from_iter<J: IntoIterator<IntoIter = I>>(iter: J) -> Self {
Self(Cell::new((Vec::new(), Some(iter.into_iter()))))
}

/// Box this stream, turning it into a [BoxedStream]. This can be useful in cases where your parser accepts input
/// from several different sources and it needs to work with all of them.
pub fn boxed<'a>(self) -> BoxedStream<'a, I::Item>
where
I: 'a,
{
let (vec, iter) = self.0.into_inner();
Stream(Cell::new((
vec,
Some(Box::new(iter.expect("no iterator?!"))),
)))
}
}

/// A stream containing a boxed iterator. See [`Stream::boxed`].
pub type BoxedStream<'a, T> = Stream<Box<dyn Iterator<Item = T> + 'a>>;

impl<'a, I: Iterator> Input<'a> for &'a Stream<I>
where
I::Item: Clone,
{
type Offset = usize;
type Token = I::Item;
type Span = SimpleSpan<usize>;

fn start(&self) -> Self::Offset {
0
}

unsafe fn next(&self, offset: Self::Offset) -> (Self::Offset, Option<Self::Token>) {
let mut other = Cell::new((Vec::new(), None));
self.0.swap(&other);

let (vec, iter) = other.get_mut();

// Pull new items into the vector if we need them
if vec.len() <= offset {
vec.extend(iter.as_mut().expect("no iterator?!").take(500));
}

// Get the token at the given offset
let tok = if let Some(tok) = vec.get(offset) {
Some(tok.clone())
} else {
None
};

self.0.swap(&other);

(offset + 1, tok)
}

fn span(&self, range: Range<Self::Offset>) -> Self::Span {
range.into()
}

fn reborrow(&self) -> Self {
*self
}
}

/// Represents the progress of a parser through the input
pub struct Marker<'a, I: Input<'a>> {
pub(crate) offset: I::Offset,
Expand Down Expand Up @@ -259,7 +358,7 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E>
use core::mem;

let mut new_ctx = InputRef {
input: self.input.clone(),
input: self.input.reborrow(),
offset: self.offset,
state: match &mut self.state {
Ok(state) => Ok(*state),
Expand Down Expand Up @@ -393,6 +492,7 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E>
}

#[inline]
#[cfg(feature = "regex")]
pub(crate) fn skip_bytes<C>(&mut self, skip: usize)
where
C: Char,
Expand Down
2 changes: 1 addition & 1 deletion src/zero_copy/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default> {
{
let mut inp = InputRef::new(input, Ok(state));
let res = self.go::<Check>(&mut inp);
let res = res.and_then(|o| expect_end(&mut inp));
let res = res.and_then(|()| expect_end(&mut inp));
let mut errs = inp.into_errs();
let out = match res {
Ok(_) => Some(()),
Expand Down
4 changes: 2 additions & 2 deletions src/zero_copy/text.rs
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ where
///
/// ```
/// # use chumsky::zero_copy::prelude::*;
/// let digits = text::digits::<'_, _, _, extra::Err<Simple<&str>>>(10).slice();
/// let digits = text::digits::<_, _, extra::Err<Simple<&str>>>(10).slice();
///
/// assert_eq!(digits.parse("0").into_result(), Ok("0"));
/// assert_eq!(digits.parse("1").into_result(), Ok("1"));
Expand All @@ -288,7 +288,7 @@ where
pub fn digits<'a, C, I, E>(radix: u32) -> Repeated<impl Parser<'a, I, C, E> + Copy + Clone, C, I, E>
where
C: Char,
I: StrInput<'a, C>,
I: Input<'a, Token = C>,
E: ParserExtra<'a, I>,
{
any()
Expand Down