Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: power operator with right associativity #4125

Merged
merged 9 commits into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions prqlc/prqlc-parser/src/lexer/lr/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ pub enum TokenKind {
Or, // ||
Coalesce, // ??
DivInt, // //
// Pow, // **
Annotate, // @
Pow, // **
Annotate, // @

// Aesthetics only
Comment(String),
Expand Down Expand Up @@ -191,7 +191,7 @@ impl std::fmt::Display for TokenKind {
TokenKind::Or => f.write_str("||"),
TokenKind::Coalesce => f.write_str("??"),
TokenKind::DivInt => f.write_str("//"),
// TokenKind::Pow => f.write_str("**"),
TokenKind::Pow => f.write_str("**"),
TokenKind::Annotate => f.write_str("@{"),

TokenKind::Param(id) => write!(f, "${id}"),
Expand Down
2 changes: 1 addition & 1 deletion prqlc/prqlc-parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ fn lex_token() -> impl Parser<char, Token, Error = Cheap<char>> {
just("||").then_ignore(end_expr()).to(TokenKind::Or),
just("??").to(TokenKind::Coalesce),
just("//").to(TokenKind::DivInt),
// just("**").to(TokenKind::Pow),
just("**").to(TokenKind::Pow),
just("@")
.then(digits(1).not().rewind())
.to(TokenKind::Annotate),
Expand Down
65 changes: 59 additions & 6 deletions prqlc/prqlc-parser/src/parser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,7 @@ pub fn expr() -> impl Parser<TokenKind, Expr, Error = PError> + Clone {

// Binary operators
let expr = term;
// TODO: for `operator_pow` we need to do right-associative parsing
// let expr = binary_op_parser_right(expr, operator_pow());
let expr = binary_op_parser_right(expr, operator_pow());
let expr = binary_op_parser(expr, operator_mul());
let expr = binary_op_parser(expr, operator_add());
let expr = binary_op_parser(expr, operator_compare());
Expand Down Expand Up @@ -279,7 +278,7 @@ where
{
let term = term.map_with_span(|e, s| (e, s)).boxed();

(term.clone())
term.clone()
.then(op.then(term).repeated())
.foldl(|left, (op, right)| {
let span = Span {
Expand All @@ -298,6 +297,60 @@ where
.boxed()
}

pub fn binary_op_parser_right<'a, Term, Op>(
term: Term,
op: Op,
) -> impl Parser<TokenKind, Expr, Error = PError> + 'a
where
Term: Parser<TokenKind, Expr, Error = PError> + 'a,
Op: Parser<TokenKind, BinOp, Error = PError> + 'a,
{
let term = term.map_with_span(|e, s| (e, s)).boxed();

(term.clone())
.then(op.then(term).repeated())
.map(|(first, others)| {
// A transformation from this:
// ```
// first: e1
// others: [(op1 e2) (op2 e3)]
// ```
// ... into:
// ```
// r: [(e1 op1) (e2 op2)]
// e3
// ```
// .. so we can use foldr for right associativity.
// We could use `(term.then(op)).repeated().then(term)` instead,
// and have the correct structure from the get-go, but that would
// perform miserably with simple expressions without operators, because
// it would re-parse the term twice for each level of precedence we have.

let mut free = first;
let mut r = Vec::new();
for (op, expr) in others {
r.push((free, op));
free = expr;
}
(r, free)
})
.foldr(|(left, op), right| {
let span = Span {
start: left.1.start,
end: right.1.end,
source_id: left.1.source_id,
};
let kind = ExprKind::Binary(BinaryExpr {
left: Box::new(left.0),
op,
right: Box::new(right.0),
});
(into_expr(kind, span), span)
})
.map(|(e, _)| e)
.boxed()
}

fn func_call<E>(expr: E) -> impl Parser<TokenKind, Expr, Error = PError> + Clone
where
E: Parser<TokenKind, Expr, Error = PError> + Clone,
Expand Down Expand Up @@ -422,9 +475,9 @@ fn operator_unary() -> impl Parser<TokenKind, UnOp, Error = PError> {
.or(ctrl('!').to(UnOp::Not))
.or(just(TokenKind::Eq).to(UnOp::EqSelf))
}
// fn operator_pow() -> impl Parser<TokenKind, BinOp, Error = PError> {
// just(TokenKind::Pow).to(BinOp::Pow)
// }
fn operator_pow() -> impl Parser<TokenKind, BinOp, Error = PError> {
just(TokenKind::Pow).to(BinOp::Pow)
}
fn operator_mul() -> impl Parser<TokenKind, BinOp, Error = PError> {
(just(TokenKind::DivInt).to(BinOp::DivInt))
.or(ctrl('*').to(BinOp::Mul))
Expand Down
50 changes: 50 additions & 0 deletions prqlc/prqlc-parser/src/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1276,6 +1276,56 @@ fn test_func_call() {
"###);
}

#[test]
fn test_right_assoc() {
assert_yaml_snapshot!(parse_expr(r#"2 ** 3 ** 4"#).unwrap(), @r###"
---
Binary:
left:
Literal:
Integer: 2
op: Pow
right:
Binary:
left:
Literal:
Integer: 3
op: Pow
right:
Literal:
Integer: 4
"###);
assert_yaml_snapshot!(parse_expr(r#"1 + 2 ** (3 + 4) ** 4"#).unwrap(), @r###"
---
Binary:
left:
Literal:
Integer: 1
op: Add
right:
Binary:
left:
Literal:
Integer: 2
op: Pow
right:
Binary:
left:
Binary:
left:
Literal:
Integer: 3
op: Add
right:
Literal:
Integer: 4
op: Pow
right:
Literal:
Integer: 4
"###);
}

#[test]
fn test_op_precedence() {
assert_yaml_snapshot!(parse_expr(r#"1 + 2 - 3 - 4"#).unwrap(), @r###"
Expand Down
5 changes: 2 additions & 3 deletions prqlc/prqlc/src/codegen/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -617,9 +617,8 @@ mod test {
assert_is_formatted(r#"let a = 5 / 2 / 2"#);
assert_is_formatted(r#"let a = 5 / (2 / 2)"#);

// TODO: parsing for pow operator
// assert_is_formatted(r#"let a = (5 ** 2) ** 2"#);
// assert_is_formatted(r#"let a = 5 ** 2 ** 2"#);
assert_is_formatted(r#"let a = (5 ** 2) ** 2"#);
assert_is_formatted(r#"let a = 5 ** 2 ** 2"#);
}

#[test]
Expand Down
48 changes: 30 additions & 18 deletions prqlc/prqlc/src/semantic/ast_expand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,24 +202,36 @@ fn expand_binary(ast::BinaryExpr { op, left, right }: ast::BinaryExpr) -> Result
let left = expand_expr(*left)?;
let right = expand_expr(*right)?;

let func_name = match op {
ast::BinOp::Mul => ["std", "mul"],
ast::BinOp::DivInt => ["std", "div_i"],
ast::BinOp::DivFloat => ["std", "div_f"],
ast::BinOp::Mod => ["std", "mod"],
ast::BinOp::Pow => ["std", "pow"],
ast::BinOp::Add => ["std", "add"],
ast::BinOp::Sub => ["std", "sub"],
ast::BinOp::Eq => ["std", "eq"],
ast::BinOp::Ne => ["std", "ne"],
ast::BinOp::Gt => ["std", "gt"],
ast::BinOp::Lt => ["std", "lt"],
ast::BinOp::Gte => ["std", "gte"],
ast::BinOp::Lte => ["std", "lte"],
ast::BinOp::RegexSearch => ["std", "regex_search"],
ast::BinOp::And => ["std", "and"],
ast::BinOp::Or => ["std", "or"],
ast::BinOp::Coalesce => ["std", "coalesce"],
let func_name: Vec<&str> = match op {
ast::BinOp::Mul => vec!["std", "mul"],
ast::BinOp::DivInt => vec!["std", "div_i"],
ast::BinOp::DivFloat => vec!["std", "div_f"],
ast::BinOp::Mod => vec!["std", "mod"],
ast::BinOp::Pow => vec!["std", "math", "pow"],
ast::BinOp::Add => vec!["std", "add"],
ast::BinOp::Sub => vec!["std", "sub"],
ast::BinOp::Eq => vec!["std", "eq"],
ast::BinOp::Ne => vec!["std", "ne"],
ast::BinOp::Gt => vec!["std", "gt"],
ast::BinOp::Lt => vec!["std", "lt"],
ast::BinOp::Gte => vec!["std", "gte"],
ast::BinOp::Lte => vec!["std", "lte"],
ast::BinOp::RegexSearch => vec!["std", "regex_search"],
ast::BinOp::And => vec!["std", "and"],
ast::BinOp::Or => vec!["std", "or"],
ast::BinOp::Coalesce => vec!["std", "coalesce"],
};

// For the power operator, we need to reverse the order, since `math.pow a
// b` is equivalent to `b ** a`. (but for example `sub a b` is equivalent to
// `a - b`).
//
// (I think this is the most globally consistent approach, since final
// arguments should be the "data", which in the case of `pow` would be the
// base; but it's not perfect, we could change it...)
let (left, right) = match op {
ast::BinOp::Pow => (right, left),
_ => (left, right),
Comment on lines +225 to +234
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, interesting.

We should probably apply this to other non-commutative operators, std.sub for example:

5 - 1 ~> std.sub 1 5

This would be handy in a few places, such as [5, 10, 3] | map (std.sub 1) ~> [4, 9, 2].

It is a bit confusing and a potential source of bugs. The good news is that it would not be noticed in the simple case of using operators, but only when the functions directly. This would probably be done by more experienced users, who might appreciate functional composability of the "reverse importance order" of arguments.

So, ATM, I'm leaning in direction of "swaping the arguments of all bin op functions".

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, ATM, I'm leaning in direction of "swaping the arguments of all bin op functions".

Weirdly I just saw this, sorry.

I think that makes sense actually! I agree it won't make much difference to most but it is more correct...

};
Ok(new_binop(left, &func_name, right).kind)
}
Expand Down
1 change: 1 addition & 0 deletions prqlc/prqlc/src/sql/std.sql.prql
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ module math {
let asin = column -> s"ASIN({column:0})"
let tan = column -> s"TAN({column:0})"
let atan = column -> s"ATAN({column:0})"
# Note exponent goes first, so `pow 2 3` is 2^3
let pow = exponent column -> s"POW({column:0}, {exponent:0})"
let round = n_digits column -> s"ROUND({column:0}, {n_digits:0})"
}
Expand Down
3 changes: 2 additions & 1 deletion prqlc/prqlc/tests/integration/queries/math_module.prql
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,6 @@ select {
total_sin = math.sin total | math.asin | math.round 2,
total_tan = math.tan total | math.atan | math.round 2,
total_deg = total | math.degrees | math.radians | math.round 2,
total_square = total | math.pow 2| math.round 2,
total_square = total | math.pow 2 | math.round 2,
total_square_op = (total ** 2) | math.round 2,
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
source: prqlc/prqlc/tests/integration/queries.rs
expression: "# mssql:test\n# sqlite:skip (see https://github.com/rusqlite/rusqlite/issues/1211)\nfrom invoices\ntake 5\nselect {\n total_original = total | math.round 2,\n total_x = math.pi - total | math.round 2 | math.abs,\n total_floor = math.floor total,\n total_ceil = math.ceil total,\n total_log10 = math.log10 total | math.round 3,\n total_log2 = math.log 2 total | math.round 3,\n total_sqrt = math.sqrt total | math.round 3,\n total_ln = math.ln total | math.exp | math.round 2,\n total_cos = math.cos total | math.acos | math.round 2,\n total_sin = math.sin total | math.asin | math.round 2,\n total_tan = math.tan total | math.atan | math.round 2,\n total_deg = total | math.degrees | math.radians | math.round 2,\n total_square = total | math.pow 2| math.round 2,\n}\n"
expression: "# mssql:test\n# sqlite:skip (see https://github.com/rusqlite/rusqlite/issues/1211)\nfrom invoices\ntake 5\nselect {\n total_original = total | math.round 2,\n total_x = math.pi - total | math.round 2 | math.abs,\n total_floor = math.floor total,\n total_ceil = math.ceil total,\n total_log10 = math.log10 total | math.round 3,\n total_log2 = math.log 2 total | math.round 3,\n total_sqrt = math.sqrt total | math.round 3,\n total_ln = math.ln total | math.exp | math.round 2,\n total_cos = math.cos total | math.acos | math.round 2,\n total_sin = math.sin total | math.asin | math.round 2,\n total_tan = math.tan total | math.atan | math.round 2,\n total_deg = total | math.degrees | math.radians | math.round 2,\n total_square = total | math.pow 2 | math.round 2,\n total_square_op = (total ** 2) | math.round 2,\n}\n"
input_file: prqlc/prqlc/tests/integration/queries/math_module.prql
---
SELECT
Expand All @@ -16,9 +16,9 @@ SELECT
ROUND(ASIN(SIN(total)), 2) AS total_sin,
ROUND(ATAN(TAN(total)), 2) AS total_tan,
ROUND(RADIANS(DEGREES(total)), 2) AS total_deg,
ROUND(POW(total, 2), 2) AS total_square
ROUND(POW(total, 2), 2) AS total_square,
ROUND(POW(total, 2), 2) AS total_square_op
FROM
invoices
LIMIT
5

Loading
Loading