Skip to content

Commit

Permalink
Add NodeBuilder for building ASTs (#476)
Browse files Browse the repository at this point in the history
  • Loading branch information
jpschorr authored Jul 10, 2024
1 parent 684fff3 commit 1b22fb6
Show file tree
Hide file tree
Showing 8 changed files with 97 additions and 60 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added
- partiql-ast: Pretty-printing of AST via `ToPretty` trait
- partiql-ast: Added `NodeBuilder` to make building ASTs easier

### Fixed
- Minor documentation issues
Expand Down
73 changes: 73 additions & 0 deletions partiql-ast/src/builder.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
use crate::ast;
use crate::ast::{AstNode, NodeId};

/// A provider of 'fresh' [`NodeId`]s.
pub trait IdGenerator {
/// Provides a 'fresh' [`NodeId`].
fn id(&mut self) -> NodeId;
}

/// Auto-incrementing [`IdGenerator`]
pub struct AutoNodeIdGenerator {
next_id: ast::NodeId,
}

impl Default for AutoNodeIdGenerator {
fn default() -> Self {
AutoNodeIdGenerator { next_id: NodeId(1) }
}
}

impl IdGenerator for AutoNodeIdGenerator {
#[inline]
fn id(&mut self) -> NodeId {
let mut next = NodeId(&self.next_id.0 + 1);
std::mem::swap(&mut self.next_id, &mut next);
next
}
}

/// A provider of [`NodeId`]s that are always `0`; Useful for testing
#[derive(Default)]
pub struct NullIdGenerator {}

impl IdGenerator for NullIdGenerator {
fn id(&mut self) -> NodeId {
NodeId(0)
}
}

/// A Builder for [`AstNode`]s that uses a [`IdGenerator`] to assign [`NodeId`]s
pub struct NodeBuilder<Id: IdGenerator> {
/// Generator for 'fresh' [`NodeId`]s
pub id_gen: Id,
}

impl<Id> NodeBuilder<Id>
where
Id: IdGenerator,
{
pub fn new(id_gen: Id) -> Self {
Self { id_gen }
}

pub fn node<T>(&mut self, node: T) -> AstNode<T> {
let id = self.id_gen.id();
AstNode { id, node }
}
}

impl<T> Default for NodeBuilder<T>
where
T: IdGenerator + Default,
{
fn default() -> Self {
Self::new(T::default())
}
}

/// A [`NodeBuilder`] whose 'fresh' [`NodeId`]s are Auto-incrementing.
pub type NodeBuilderWithAutoId = NodeBuilder<AutoNodeIdGenerator>;

/// A [`NodeBuilder`] whose 'fresh' [`NodeId`]s are always `0`; Useful for testing
pub type NodeBuilderWithNullId = NodeBuilder<NullIdGenerator>;
2 changes: 2 additions & 0 deletions partiql-ast/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,6 @@ pub mod ast;

pub mod pretty;

pub mod builder;

pub mod visit;
13 changes: 3 additions & 10 deletions partiql-parser/src/parse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@ mod parser_state;
use crate::error::{ParseError, UnexpectedTokenData};
use crate::lexer;
use crate::lexer::CommentSkippingLexer;
use crate::parse::parser_state::{IdGenerator, ParserState};
use crate::parse::parser_state::ParserState;
use crate::preprocessor::{PreprocessingPartiqlLexer, BUILT_INS};
use lalrpop_util as lpop;
use partiql_ast::ast;
use partiql_ast::builder::IdGenerator;
use partiql_source_map::line_offset_tracker::LineOffsetTracker;
use partiql_source_map::location::{ByteOffset, BytePosition, ToLocated};
use partiql_source_map::metadata::LocationMap;
Expand Down Expand Up @@ -536,16 +537,8 @@ mod tests {

mod set_ops {
use super::*;
use partiql_ast::ast::NodeId;

#[derive(Default)]
pub(crate) struct NullIdGenerator {}

impl IdGenerator for NullIdGenerator {
fn id(&mut self) -> NodeId {
NodeId(0)
}
}
use partiql_ast::builder::NullIdGenerator;

impl<'input> ParserState<'input, NullIdGenerator> {
pub(crate) fn new_null_id() -> ParserState<'input, NullIdGenerator> {
Expand Down
3 changes: 2 additions & 1 deletion partiql-parser/src/parse/parse_util.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use partiql_ast::ast;

use crate::parse::parser_state::{IdGenerator, ParserState};
use crate::parse::parser_state::ParserState;
use bitflags::bitflags;
use partiql_ast::builder::IdGenerator;
use partiql_source_map::location::ByteOffset;

bitflags! {
Expand Down
51 changes: 9 additions & 42 deletions partiql-parser/src/parse/parser_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@ use crate::error::ParseError;
use crate::parse::lexer;
use std::ops::Range;

use partiql_ast::ast;

use lalrpop_util::ErrorRecovery;
use once_cell::sync::Lazy;
use regex::Regex;

use partiql_ast::ast::{AstNode, NodeId, SymbolPrimitive};
use partiql_ast::ast::{AstNode, SymbolPrimitive};
use partiql_ast::builder::{AutoNodeIdGenerator, IdGenerator, NodeBuilder};

use partiql_source_map::location::{ByteOffset, BytePosition, Location};
use partiql_source_map::metadata::LocationMap;
Expand All @@ -19,39 +18,10 @@ type ParseErrors<'input> = Vec<ParseErrorRecovery<'input>>;

const INIT_LOCATIONS: usize = 100;

/// A provider of 'fresh' [`NodeId`]s.
// NOTE `pub` instead of `pub(crate)` only because LALRPop's generated code uses this in `pub trait __ToTriple`
// which leads to compile time errors.
// However, since this module is included privately, this type doesn't leak outside the crate anyway.
pub trait IdGenerator {
/// Provides a 'fresh' [`NodeId`].
fn id(&mut self) -> NodeId;
}

/// Auto-incrementing [`IdGenerator`]
pub(crate) struct NodeIdGenerator {
next_id: ast::NodeId,
}

impl Default for NodeIdGenerator {
fn default() -> Self {
NodeIdGenerator { next_id: NodeId(1) }
}
}

impl IdGenerator for NodeIdGenerator {
#[inline]
fn id(&mut self) -> NodeId {
let mut next = NodeId(&self.next_id.0 + 1);
std::mem::swap(&mut self.next_id, &mut next);
next
}
}

/// State of the parsing during parse.
pub(crate) struct ParserState<'input, Id: IdGenerator> {
/// Generator for 'fresh' [`NodeId`]s
pub id_gen: Id,
pub node_builder: NodeBuilder<Id>,
/// Maps AST [`NodeId`]s to the location in the source from which each was derived.
pub locations: LocationMap,
/// Any errors accumulated during parse.
Expand All @@ -61,9 +31,9 @@ pub(crate) struct ParserState<'input, Id: IdGenerator> {
aggregates_pat: &'static Regex,
}

impl<'input> Default for ParserState<'input, NodeIdGenerator> {
impl<'input> Default for ParserState<'input, AutoNodeIdGenerator> {
fn default() -> Self {
ParserState::with_id_gen(NodeIdGenerator::default())
ParserState::with_id_gen(AutoNodeIdGenerator::default())
}
}

Expand All @@ -79,7 +49,7 @@ where
{
pub fn with_id_gen(id_gen: I) -> Self {
ParserState {
id_gen,
node_builder: NodeBuilder::new(id_gen),
locations: LocationMap::with_capacity(INIT_LOCATIONS),
errors: ParseErrors::default(),
aggregates_pat: &KNOWN_AGGREGATE_PATTERN,
Expand All @@ -93,12 +63,9 @@ impl<'input, Id: IdGenerator> ParserState<'input, Id> {
where
IntoLoc: Into<Location<BytePosition>>,
{
let location = location.into();
let id = self.id_gen.id();

self.locations.insert(id, location);

AstNode { id, node }
let node = self.node_builder.node(node);
self.locations.insert(node.id, location.into());
node
}

/// Create a new [`AstNode`] from the inner data which it is to hold and a [`ByteOffset`] range.
Expand Down
3 changes: 2 additions & 1 deletion partiql-parser/src/parse/partiql.lalrpop
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ use partiql_ast::ast;
use partiql_source_map::location::{ByteOffset, BytePosition, Location, ToLocated};

use crate::parse::parse_util::{strip_expr, strip_query, strip_query_set, CallSite, Attrs, Synth};
use crate::parse::parser_state::{ParserState, IdGenerator};
use crate::parse::parser_state::ParserState;
use partiql_ast::builder::IdGenerator;

grammar<'input, 'state, Id>(input: &'input str, state: &'state mut ParserState<'input, Id>) where Id: IdGenerator;

Expand Down
11 changes: 5 additions & 6 deletions partiql/src/subquery_tests.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#![deny(rust_2018_idioms)]
#![deny(clippy::all)]
#![warn(clippy::pedantic)]

Expand All @@ -18,14 +17,14 @@ mod tests {
#[inline]
pub(crate) fn evaluate(
catalog: &dyn Catalog,
logical: partiql_logical::LogicalPlan<partiql_logical::BindingsOp>,
logical: &LogicalPlan<partiql_logical::BindingsOp>,
bindings: MapBindings<Value>,
ctx_vals: &[(String, &(dyn Any))],
) -> Result<Value, EvalErr> {
let mut planner =
partiql_eval::plan::EvaluatorPlanner::new(EvaluationMode::Strict, catalog);

let mut plan = planner.compile(&logical).expect("Expect no plan error");
let mut plan = planner.compile(logical).expect("Expect no plan error");

let sys = SystemContext {
now: DateTime::from_system_now_utc(),
Expand All @@ -40,7 +39,7 @@ mod tests {
#[test]
fn locals_in_subqueries() {
// `SELECT VALUE _1 from (SELECT VALUE foo from <<{'a': 'b'}>> AS foo) AS _1;`
let mut sub_query = partiql_logical::LogicalPlan::new();
let mut sub_query = LogicalPlan::new();
let scan_op_id =
sub_query.add_operator(partiql_logical::BindingsOp::Scan(partiql_logical::Scan {
expr: partiql_logical::ValueExpr::Lit(Box::new(Value::Bag(Box::new(Bag::from(
Expand Down Expand Up @@ -86,7 +85,7 @@ mod tests {

let catalog = PartiqlCatalog::default();
let bindings = MapBindings::default();
let res = evaluate(&catalog, plan, bindings, &[]).expect("should eval correctly");
let res = evaluate(&catalog, &plan, bindings, &[]).expect("should eval correctly");
dbg!(&res);
assert!(res != Value::Missing);
assert_eq!(res, Value::from(bag![tuple![("a", "b")]]));
Expand Down Expand Up @@ -147,7 +146,7 @@ mod tests {
"foo",
Value::Bag(Box::new(Bag::from(vec![tuple![("a", "b")].into()]))),
);
let res = evaluate(&catalog, plan, bindings, &[]).expect("should eval correctly");
let res = evaluate(&catalog, &plan, bindings, &[]).expect("should eval correctly");
dbg!(&res);
assert!(res != Value::Missing);
assert_eq!(res, Value::from(bag![tuple![("a", "b")]]));
Expand Down

1 comment on commit 1b22fb6

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PartiQL (rust) Benchmark

Benchmark suite Current: 1b22fb6 Previous: 684fff3 Ratio
arith_agg-avg 775078 ns/iter (± 8705) 771998 ns/iter (± 3660) 1.00
arith_agg-avg_distinct 862652 ns/iter (± 6626) 868193 ns/iter (± 32480) 0.99
arith_agg-count 830846 ns/iter (± 7811) 819127 ns/iter (± 13014) 1.01
arith_agg-count_distinct 855765 ns/iter (± 2496) 850385 ns/iter (± 12627) 1.01
arith_agg-min 843309 ns/iter (± 3129) 835124 ns/iter (± 7322) 1.01
arith_agg-min_distinct 871031 ns/iter (± 11411) 864818 ns/iter (± 5042) 1.01
arith_agg-max 840150 ns/iter (± 3401) 826288 ns/iter (± 9355) 1.02
arith_agg-max_distinct 870457 ns/iter (± 1793) 860400 ns/iter (± 3296) 1.01
arith_agg-sum 833829 ns/iter (± 2930) 822396 ns/iter (± 21825) 1.01
arith_agg-sum_distinct 863680 ns/iter (± 2889) 851443 ns/iter (± 4511) 1.01
arith_agg-avg-count-min-max-sum 971146 ns/iter (± 7271) 967909 ns/iter (± 18508) 1.00
arith_agg-avg-count-min-max-sum-group_by 1228144 ns/iter (± 5068) 1261195 ns/iter (± 11052) 0.97
arith_agg-avg-count-min-max-sum-group_by-group_as 1825750 ns/iter (± 17664) 1872671 ns/iter (± 20081) 0.97
arith_agg-avg_distinct-count_distinct-min_distinct-max_distinct-sum_distinct 1171155 ns/iter (± 7797) 1178677 ns/iter (± 10540) 0.99
arith_agg-avg_distinct-count_distinct-min_distinct-max_distinct-sum_distinct-group_by 1475823 ns/iter (± 17342) 1589490 ns/iter (± 19655) 0.93
arith_agg-avg_distinct-count_distinct-min_distinct-max_distinct-sum_distinct-group_by-group_as 2067865 ns/iter (± 12643) 2165888 ns/iter (± 8946) 0.95
parse-1 4149 ns/iter (± 24) 4269 ns/iter (± 15) 0.97
parse-15 39532 ns/iter (± 211) 39638 ns/iter (± 150) 1.00
parse-30 76008 ns/iter (± 562) 77020 ns/iter (± 352) 0.99
compile-1 4432 ns/iter (± 13) 4263 ns/iter (± 10) 1.04
compile-15 35043 ns/iter (± 104) 33192 ns/iter (± 197) 1.06
compile-30 71338 ns/iter (± 331) 67869 ns/iter (± 314) 1.05
plan-1 69546 ns/iter (± 702) 69565 ns/iter (± 234) 1.00
plan-15 1078664 ns/iter (± 86226) 1082605 ns/iter (± 19293) 1.00
plan-30 2153387 ns/iter (± 13338) 2170474 ns/iter (± 56024) 0.99
eval-1 12766894 ns/iter (± 60043) 12909400 ns/iter (± 104669) 0.99
eval-15 86920998 ns/iter (± 1270499) 87331642 ns/iter (± 228299) 1.00
eval-30 167856739 ns/iter (± 489213) 168169555 ns/iter (± 384409) 1.00
join 9891 ns/iter (± 173) 9942 ns/iter (± 344) 0.99
simple 2474 ns/iter (± 27) 2482 ns/iter (± 13) 1.00
simple-no 427 ns/iter (± 2) 428 ns/iter (± 2) 1.00
numbers 57 ns/iter (± 0) 57 ns/iter (± 0) 1
parse-simple 586 ns/iter (± 3) 556 ns/iter (± 9) 1.05
parse-ion 1762 ns/iter (± 4) 1736 ns/iter (± 29) 1.01
parse-group 5772 ns/iter (± 35) 5734 ns/iter (± 257) 1.01
parse-complex 15496 ns/iter (± 188) 14833 ns/iter (± 122) 1.04
parse-complex-fexpr 22579 ns/iter (± 111) 21781 ns/iter (± 130) 1.04

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.