My SQL demo parser failed to parse due to ValueInput not derived? #697
-
I am quite new to chumsky and attempted to write a SQL parser demo as how the example nano_rust did. However I found in my third test function, //! # Grammar parser for SQL
//! This library is a grammar parser for SQL. It is used to parse SQL statements and return an AST of the input stream.
pub(crate) use chumsky::input::ValueInput;
pub(crate) use chumsky::prelude::*;
/// Lexical tokens for SQL.
#[derive(Debug, Clone, PartialEq)]
pub enum Token<'src> {
/// Indentifier for a database, table, or column.
///
/// regex bound of `[a-zA-Z_][a-zA-Z0-9_]*` is automatically granted by [chumsky::text::indent](https://docs.rs/chumsky/latest/chumsky/text/fn.ident.html).
Ident(&'src str),
/// DATABASE keyword
Database,
/// CREATE keyword
Create,
/// DROP keyword
Drop,
/// USE keyword
Use,
/// SHOW keyword
Show,
/// TABLES keyword
Tables,
/// INDEXES keyword
Indexes,
/// Punctuation
Ctrl(char),
}
type Span = SimpleSpan;
type Spanned<T> = (T, Span);
fn lexer<'src>(
) -> impl Parser<'src, &'src str, Vec<Spanned<Token<'src>>>, extra::Err<Rich<'src, char, Span>>> {
let ident = text::ascii::ident().map(|ident: &str| match ident {
"DATABASE" => Token::Database,
"CREATE" => Token::Create,
"DROP" => Token::Drop,
"USE" => Token::Use,
"SHOW" => Token::Show,
"TABLES" => Token::Tables,
"INDEXES" => Token::Indexes,
_ => Token::Ident(ident),
});
let ctrl = one_of(";").map(Token::Ctrl);
let token = ident.or(ctrl);
token
.map_with(|token, e| (token, e.span()))
.padded()
.recover_with(skip_then_retry_until(any().ignored(), end()))
.repeated()
.collect()
}
/// SQL Instructions
///
/// Equivalent to [SQL.g4](https://thu-db.github.io/dbs-tutorial/files/)
#[derive(Debug, PartialEq)]
pub enum SqlInstr<'src> {
/// Statement for operating on a database
Database(DatabaseStatement<'src>),
}
fn expr_parser<'src, I>(
) -> impl Parser<'src, I, Vec<Spanned<SqlInstr<'src>>>, extra::Err<Rich<'src, Token<'src>, Span>>> + Clone
where
I: ValueInput<'src, Token = Token<'src>, Span = Span>,
{
let db_statement = {
let create = just([Token::Create, Token::Database]).ignore_then(
select! {Token::Ident(n) = e => DatabaseStatement::Create(n).spanned(e.span())},
);
create.then_ignore(just(Token::Ctrl(';')))
}
.map(|(i, e)| SqlInstr::Database(i).spanned(e));
let statement = db_statement;
statement.repeated().collect()
}
#[derive(Debug, PartialEq)]
pub enum DatabaseStatement<'src> {
Create(&'src str),
Drop(&'src str),
Use(&'src str),
ShowDatabases,
ShowTables,
ShowIndexes,
}
impl<'a> DatabaseStatement<'a> {
fn spanned(self, span: Span) -> (Self, Span) {
(self, span)
}
}
impl<'a> SqlInstr<'a> {
fn spanned(self, span: Span) -> (Self, Span) {
(self, span)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_lexer_keyword() {
let input = "CREATE DATABASE sandspruen;";
let tokens = lexer().parse(input).unwrap();
assert_eq!(tokens[1].0, Token::Database);
assert_eq!(tokens.len(), 4);
}
#[test]
fn test_lexer_ident() {
let input = "sandspuren SandSpuren;sand_spuren";
let tokens = lexer().parse(input).unwrap();
assert_eq!(tokens[0].0, Token::Ident("sandspuren"));
assert_eq!(tokens[3].0, Token::Ident("sand_spuren"));
let input = "41sand_spuren";
let tokens = lexer().parse(input);
assert!(tokens.has_errors());
}
#[test]
fn test_parser_db_create() {
let input = "CREATE DATABASE sandspruen;";
let tokens = lexer().parse(input).unwrap();
let instr = expr_parser()
.map_with(|a, e| (a, e.span()))
.parse(
&tokens
.as_slice()
.map((input.len()..input.len()).into(), |(t, s)| (t, s)), // Error: `&[(Token<'_>, chumsky::span::SimpleSpan)]` is not an iterator
)
.unwrap();
assert_eq!(
instr[0].0,
SqlInstr::Database(DatabaseStatement::Create("sandspruen"))
);
}
} Sorry for my stupid question, but I expect the The version is |
Beta Was this translation helpful? Give feedback.
Replies: 1 comment 1 reply
-
Ah, the |
Beta Was this translation helpful? Give feedback.
Ah, the
Input::map
function was actually only introduced a few days ago and has not yet been included in a crates.io release. You'll need to useInput::spanned
for now, which works in a similar manner for tuples.