Skip to content

Commit

Permalink
Merge pull request #6239 from Xuanwo/refactor-ast
Browse files Browse the repository at this point in the history
refactor(parser): Move util to root of crate with split of input
  • Loading branch information
BohuTANG authored Jun 28, 2022
2 parents 4297c79 + f805035 commit 8a1b4cc
Show file tree
Hide file tree
Showing 10 changed files with 116 additions and 83 deletions.
1 change: 0 additions & 1 deletion common/ast/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ edition = "2021"

[lib]
doctest = false
test = false

[dependencies] # In alphabetical order
# Workspace dependencies
Expand Down
2 changes: 1 addition & 1 deletion common/ast/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ use logos::Span;
use pratt::NoError;
use pratt::PrattError;

use crate::input::Input;
use crate::parser::token::*;
use crate::parser::util::Input;

const MAX_DISPLAY_ERROR_COUNT: usize = 6;

Expand Down
79 changes: 79 additions & 0 deletions common/ast/src/input.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Copyright 2022 Datafuse Labs.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::ops::Range;
use std::ops::RangeFrom;
use std::ops::RangeFull;
use std::ops::RangeTo;

use crate::parser::token::Token;
use crate::Backtrace;

/// Input tokens slice with a backtrace that records all errors including
/// the optional branch.
#[derive(Debug, Clone, Copy)]
pub struct Input<'a>(pub &'a [Token<'a>], pub &'a Backtrace<'a>);

impl<'a> std::ops::Deref for Input<'a> {
type Target = [Token<'a>];

fn deref(&self) -> &Self::Target {
self.0
}
}

impl<'a> nom::InputLength for Input<'a> {
fn input_len(&self) -> usize {
self.0.input_len()
}
}

impl<'a> nom::Offset for Input<'a> {
fn offset(&self, second: &Self) -> usize {
let fst = self.0.as_ptr();
let snd = second.0.as_ptr();

(snd as usize - fst as usize) / std::mem::size_of::<Token>()
}
}

impl<'a> nom::Slice<Range<usize>> for Input<'a> {
fn slice(&self, range: Range<usize>) -> Self {
Input(&self.0[range], self.1)
}
}

impl<'a> nom::Slice<RangeTo<usize>> for Input<'a> {
fn slice(&self, range: RangeTo<usize>) -> Self {
Input(&self.0[range], self.1)
}
}

impl<'a> nom::Slice<RangeFrom<usize>> for Input<'a> {
fn slice(&self, range: RangeFrom<usize>) -> Self {
Input(&self.0[range], self.1)
}
}

impl<'a> nom::Slice<RangeFull> for Input<'a> {
fn slice(&self, _: RangeFull) -> Self {
*self
}
}

#[derive(Clone, Debug)]
pub struct WithSpan<'a, T> {
pub(crate) span: Input<'a>,
pub(crate) elem: T,
}
9 changes: 9 additions & 0 deletions common/ast/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,21 @@
// See the License for the specific language governing permissions and
// limitations under the License.

// TODO(xuanwo): Add crate level documents here.

mod error;
pub use error::Backtrace;
pub use error::DisplayError;
pub use error::Error;
pub use error::ErrorKind;

mod input;
pub use input::Input;

mod util;
pub use util::match_text;
pub use util::match_token;

pub mod ast;
pub mod parser;
pub mod udfs;
5 changes: 4 additions & 1 deletion common/ast/src/parser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,14 @@ use pratt::PrattParser;
use pratt::Precedence;

use crate::ast::*;
use crate::input::Input;
use crate::input::WithSpan;
use crate::match_token;
use crate::parser::query::*;
use crate::parser::token::*;
use crate::parser::unescape::unescape;
use crate::parser::util::*;
use crate::rule;
use crate::util::*;
use crate::Error;
use crate::ErrorKind;

Expand Down
5 changes: 2 additions & 3 deletions common/ast/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,19 @@ pub mod query;
pub mod statement;
pub mod token;
pub mod unescape;
pub mod util;

use common_exception::ErrorCode;
use common_exception::Result;

use self::expr::subexpr;
use self::util::comma_separated_list0;
use crate::ast::Expr;
use crate::ast::Statement;
use crate::input::Input;
use crate::parser::statement::statement;
use crate::parser::token::Token;
use crate::parser::token::TokenKind;
use crate::parser::token::Tokenizer;
use crate::parser::util::Input;
use crate::util::comma_separated_list0;
use crate::Backtrace;
use crate::DisplayError;

Expand Down
4 changes: 3 additions & 1 deletion common/ast/src/parser/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,12 @@ use pratt::PrattParser;
use pratt::Precedence;

use crate::ast::*;
use crate::input::Input;
use crate::input::WithSpan;
use crate::parser::expr::*;
use crate::parser::token::*;
use crate::parser::util::*;
use crate::rule;
use crate::util::*;

pub fn query(i: Input) -> IResult<Query> {
map(
Expand Down
3 changes: 2 additions & 1 deletion common/ast/src/parser/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,12 @@ use nom::Slice;
use url::Url;

use crate::ast::*;
use crate::input::Input;
use crate::parser::expr::*;
use crate::parser::query::*;
use crate::parser::token::*;
use crate::parser::util::*;
use crate::rule;
use crate::util::*;
use crate::ErrorKind;

pub fn statement(i: Input) -> IResult<Statement> {
Expand Down
89 changes: 15 additions & 74 deletions common/ast/src/parser/util.rs → common/ast/src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::ops::Range;
use std::ops::RangeFrom;
use std::ops::RangeFull;
use std::ops::RangeTo;

use nom::branch::alt;
use nom::combinator::map;
use nom::Offset;
Expand All @@ -26,17 +21,23 @@ use pratt::PrattParser;
use pratt::Precedence;

use crate::ast::Identifier;
use crate::input::Input;
use crate::input::WithSpan;
use crate::parser::token::*;
use crate::Backtrace;
use crate::rule;
use crate::Error;
use crate::ErrorKind;

pub type IResult<'a, Output> = nom::IResult<Input<'a>, Output, Error<'a>>;

/// Input tokens slice with a backtrace that records all errors including
/// the optional branch.
#[derive(Debug, Clone, Copy)]
pub struct Input<'a>(pub &'a [Token<'a>], pub &'a Backtrace<'a>);
#[macro_export]
macro_rules! rule {
($($tt:tt)*) => { nom_rule::rule!(
$crate::match_text,
$crate::match_token,
$($tt)*)
}
}

pub fn match_text(text: &'static str) -> impl FnMut(Input) -> IResult<&Token> {
move |i| match i.0.get(0).filter(|token| token.text() == text) {
Expand All @@ -58,15 +59,6 @@ pub fn match_token(kind: TokenKind) -> impl FnMut(Input) -> IResult<&Token> {
}
}

#[macro_export]
macro_rules! rule {
($($tt:tt)*) => { nom_rule::rule!(
$crate::parser::util::match_text,
$crate::parser::util::match_token,
$($tt)*)
}
}

pub fn ident(i: Input) -> IResult<Identifier> {
non_reserved_identifier(|token| token.is_reserved_ident(false))(i)
}
Expand All @@ -79,6 +71,8 @@ pub fn function_name(i: Input) -> IResult<Identifier> {
non_reserved_identifier(|token| token.is_reserved_function_name(false))(i)
}

/// TODO(xuanwo): Do we need to remove this function?
#[allow(dead_code)]
pub fn function_name_after_as(i: Input) -> IResult<Identifier> {
non_reserved_identifier(|token| token.is_reserved_function_name(true))(i)
}
Expand Down Expand Up @@ -132,7 +126,7 @@ fn non_reserved_keyword(
}
}

/// Parse one two two idents seperated by a peroid, fulfilling from the right.
/// Parse one two two idents seperated by a period, fulfilling from the right.
///
/// Example: `table.column`
pub fn peroid_separated_idents_1_to_2<'a>(
Expand All @@ -149,7 +143,7 @@ pub fn peroid_separated_idents_1_to_2<'a>(
)(i)
}

/// Parse one two three idents seperated by a peroid, fulfilling from the right.
/// Parse one two three idents seperated by a period, fulfilling from the right.
///
/// Example: `db.table.column`
pub fn peroid_separated_idents_1_to_3<'a>(
Expand Down Expand Up @@ -312,59 +306,6 @@ where
}
}

impl<'a> std::ops::Deref for Input<'a> {
type Target = [Token<'a>];

fn deref(&self) -> &Self::Target {
self.0
}
}

impl<'a> nom::InputLength for Input<'a> {
fn input_len(&self) -> usize {
self.0.input_len()
}
}

impl<'a> nom::Offset for Input<'a> {
fn offset(&self, second: &Self) -> usize {
let fst = self.0.as_ptr();
let snd = second.0.as_ptr();

(snd as usize - fst as usize) / std::mem::size_of::<Token>()
}
}

impl<'a> nom::Slice<Range<usize>> for Input<'a> {
fn slice(&self, range: Range<usize>) -> Self {
Input(&self.0[range], self.1)
}
}

impl<'a> nom::Slice<RangeTo<usize>> for Input<'a> {
fn slice(&self, range: RangeTo<usize>) -> Self {
Input(&self.0[range], self.1)
}
}

impl<'a> nom::Slice<RangeFrom<usize>> for Input<'a> {
fn slice(&self, range: RangeFrom<usize>) -> Self {
Input(&self.0[range], self.1)
}
}

impl<'a> nom::Slice<RangeFull> for Input<'a> {
fn slice(&self, _: RangeFull) -> Self {
*self
}
}

#[derive(Clone, Debug)]
pub struct WithSpan<'a, T> {
pub(crate) span: Input<'a>,
pub(crate) elem: T,
}

pub fn run_pratt_parser<'a, I, P, E>(
mut parser: P,
iter: &mut I,
Expand Down
2 changes: 1 addition & 1 deletion common/ast/tests/it/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ use common_ast::parser::parse_sql;
use common_ast::parser::query::*;
use common_ast::parser::token::*;
use common_ast::parser::tokenize_sql;
use common_ast::parser::util::Input;
use common_ast::rule;
use common_ast::Backtrace;
use common_ast::DisplayError;
use common_ast::Input;
use common_exception::Result;
use goldenfile::Mint;
use nom::Parser;
Expand Down

0 comments on commit 8a1b4cc

Please sign in to comment.