Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support ALIGN TO and Interval in Range query #9

Merged
merged 2 commits into from
Nov 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 40 additions & 12 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -659,14 +659,20 @@ impl<'a> Parser<'a> {
Ok(expr)
}

/// Parse Range clause with format `RANGE [ Duration literal | (INTERVAL [interval expr]) ] FILL [ NULL | PREV .....]`
fn parse_range_expr(&mut self, expr: Expr) -> Result<Expr, ParserError> {
let index = self.index;
let range = if self.parse_keyword(Keyword::RANGE) {
// Make sure Range followed by a value, or it will be confused with window function syntax
// Make sure Range followed by a value or interval expr, or it will be confused with window function syntax
// e.g. `COUNT(*) OVER (ORDER BY a RANGE BETWEEN INTERVAL '1 DAY' PRECEDING AND INTERVAL '1 DAY' FOLLOWING)`
if let Ok(value) = self.parse_value() {
if self.consume_token(&Token::LParen) {
self.expect_keyword(Keyword::INTERVAL)?;
let interval = self.parse_interval()?;
self.expect_token(&Token::RParen)?;
interval
} else if let Ok(value) = self.parse_value() {
value.verify_duration()?;
value
Expr::Value(value)
} else {
self.index = index;
return Ok(expr);
Expand Down Expand Up @@ -694,7 +700,7 @@ impl<'a> Parser<'a> {
if matches!(e, Expr::Function(..)) {
let args = vec![
FunctionArg::Unnamed(FunctionArgExpr::Expr(e.clone())),
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(range.clone()))),
FunctionArg::Unnamed(FunctionArgExpr::Expr(range.clone())),
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(fill.clone()))),
];
let range_func = Function {
Expand Down Expand Up @@ -5757,8 +5763,8 @@ impl<'a> Parser<'a> {
} else {
vec![]
};

let mut align: Option<(Value, Vec<Expr>)> = None;
// triple means (align duration, to, by)
let mut align: Option<(Expr, Expr, Vec<Expr>)> = None;
let mut fill: Option<String> = None;
for _ in 0..2 {
if self.parse_keyword(Keyword::ALIGN) {
Expand All @@ -5767,8 +5773,27 @@ impl<'a> Parser<'a> {
"Duplicate ALIGN keyword detected in SELECT clause.".into(),
));
}
let value = self.parse_value()?;
value.verify_duration()?;
// Must use parentheses in interval, otherwise it will cause syntax conflicts.
// `INTERVAL '1-1' YEAR TO MONTH` are conflict with
// `ALIGN INTERVAL '1' day TO '1970-01-01T00:00:00+08:00'`
let value = if self.consume_token(&Token::LParen) {
self.expect_keyword(Keyword::INTERVAL)?;
let interval = self.parse_interval()?;
self.expect_token(&Token::RParen)?;
interval
} else {
let value = self.parse_value()?;
value.verify_duration()?;
Expr::Value(value)
};
let to = if self.parse_keyword(Keyword::TO) {
let value = self.next_token().to_string();
Expr::Value(Value::SingleQuotedString(
value.trim_matches(|x| x == '\'' || x == '"').to_string(),
))
} else {
Expr::Value(Value::SingleQuotedString(String::new()))
};
let by = if self.parse_keyword(Keyword::BY) {
self.expect_token(&Token::LParen)?;
if self.consume_token(&Token::RParen) {
Expand All @@ -5793,7 +5818,7 @@ impl<'a> Parser<'a> {
} else {
vec![]
};
align = Some((value, by));
align = Some((value, to, by));
}
if self.parse_keyword(Keyword::FILL) {
if fill.is_some() {
Expand All @@ -5809,7 +5834,7 @@ impl<'a> Parser<'a> {
"ALIGN argument cannot be omitted in the range select query".into(),
));
}
let projection = if let Some((align, by)) = align {
let projection = if let Some((align, to, by)) = align {
let fill = fill.unwrap_or_default();
let by_num = FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
Value::SingleQuotedString(by.len().to_string()),
Expand All @@ -5822,7 +5847,7 @@ impl<'a> Parser<'a> {
FunctionArg::Unnamed(FunctionArgExpr::Expr(x))
})
.collect::<Vec<_>>();
// range_fn(func, range, fill, byc, [byv], align)
// range_fn(func, range, fill, byc, [byv], align, to)
// byc are length of variadic arguments [byv]
let mut rewrite_count = 0;
let mut align_fill_rewrite =
Expand Down Expand Up @@ -5850,7 +5875,10 @@ impl<'a> Parser<'a> {
range_func.args.push(by_num.clone());
range_func.args.extend(by.clone());
range_func.args.push(FunctionArg::Unnamed(
FunctionArgExpr::Expr(Expr::Value(align.clone())),
FunctionArgExpr::Expr(align.clone()),
));
range_func.args.push(FunctionArg::Unnamed(
FunctionArgExpr::Expr(to.clone()),
));
rewrite_count += 1;
return Ok(Some(Expr::Function(range_func)));
Expand Down
3 changes: 1 addition & 2 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -541,8 +541,7 @@ impl<'a> Tokenizer<'a> {
pub fn tokenize(&mut self) -> Result<Vec<Token>, TokenizerError> {
let twl = self.tokenize_with_location()?;

let mut tokens: Vec<Token> = vec![];
tokens.reserve(twl.len());
let mut tokens: Vec<Token> = Vec::with_capacity(twl.len());
for token_with_location in twl {
tokens.push(token_with_location.token);
}
Expand Down
86 changes: 63 additions & 23 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7406,59 +7406,59 @@ fn assert_sql_err(s: &'static str, result: &'static str) {

#[test]
fn parse_range_select() {
// rewrite format `range_fn(func_name, argc, [argv], range, fill, byc, [byv], align)`
// rewrite format `range_fn(func_name, argc, [argv], range, fill, byc, [byv], align, to)`
// regular without by
assert_sql("SELECT rate(metrics) RANGE '5m', sum(metrics) RANGE '10m' FILL MAX, sum(metrics) RANGE '10m' FROM t ALIGN '1h' FILL NULL;",
"SELECT range_fn(rate(metrics), '5m', 'NULL', '0', '1h'), range_fn(sum(metrics), '10m', 'MAX', '0', '1h'), range_fn(sum(metrics), '10m', 'NULL', '0', '1h') FROM t");
"SELECT range_fn(rate(metrics), '5m', 'NULL', '0', '1h', ''), range_fn(sum(metrics), '10m', 'MAX', '0', '1h', ''), range_fn(sum(metrics), '10m', 'NULL', '0', '1h', '') FROM t");

// regular with by
assert_sql("SELECT rate(metrics) RANGE '5m', sum(metrics) RANGE '10m' FILL MAX, sum(metrics) RANGE '10m' FROM t ALIGN '1h' by ((a+1)/2, b) FILL NULL;",
"SELECT range_fn(rate(metrics), '5m', 'NULL', '2', (a + 1) / 2, b, '1h'), range_fn(sum(metrics), '10m', 'MAX', '2', (a + 1) / 2, b, '1h'), range_fn(sum(metrics), '10m', 'NULL', '2', (a + 1) / 2, b, '1h') FROM t GROUP BY a, b");
"SELECT range_fn(rate(metrics), '5m', 'NULL', '2', (a + 1) / 2, b, '1h', ''), range_fn(sum(metrics), '10m', 'MAX', '2', (a + 1) / 2, b, '1h', ''), range_fn(sum(metrics), '10m', 'NULL', '2', (a + 1) / 2, b, '1h', '') FROM t GROUP BY a, b");

// explicit empty by
assert_sql("SELECT rate(metrics) RANGE '5m', sum(metrics) RANGE '10m' FILL MAX, sum(metrics) RANGE '10m' FROM t ALIGN '1h' by () FILL NULL;",
"SELECT range_fn(rate(metrics), '5m', 'NULL', '1', 1, '1h'), range_fn(sum(metrics), '10m', 'MAX', '1', 1, '1h'), range_fn(sum(metrics), '10m', 'NULL', '1', 1, '1h') FROM t");
"SELECT range_fn(rate(metrics), '5m', 'NULL', '1', 1, '1h', ''), range_fn(sum(metrics), '10m', 'MAX', '1', 1, '1h', ''), range_fn(sum(metrics), '10m', 'NULL', '1', 1, '1h', '') FROM t");

// expression1
assert_sql(
"SELECT avg(a/2 + 1) RANGE '5m' FILL NULL FROM t ALIGN '1h' FILL NULL;",
"SELECT range_fn(avg(a / 2 + 1), '5m', 'NULL', '0', '1h') FROM t",
"SELECT range_fn(avg(a / 2 + 1), '5m', 'NULL', '0', '1h', '') FROM t",
);

// expression2
assert_sql(
"SELECT avg(a) RANGE '5m' FILL NULL + 1 FROM t ALIGN '1h' FILL NULL;",
"SELECT range_fn(avg(a), '5m', 'NULL', '0', '1h') + 1 FROM t",
"SELECT range_fn(avg(a), '5m', 'NULL', '0', '1h', '') + 1 FROM t",
);

// expression3
assert_sql(
"SELECT ((avg(a) + sum(b))/2) RANGE '5m' FILL NULL FROM t ALIGN '1h' FILL NULL;",
"SELECT ((range_fn(avg(a), '5m', 'NULL', '0', '1h') + range_fn(sum(b), '5m', 'NULL', '0', '1h')) / 2) FROM t",
"SELECT ((range_fn(avg(a), '5m', 'NULL', '0', '1h', '') + range_fn(sum(b), '5m', 'NULL', '0', '1h', '')) / 2) FROM t",
);

// expression4
assert_sql(
"SELECT covariance(a, b) RANGE '5m' FILL NULL FROM t ALIGN '1h' FILL NULL;",
"SELECT range_fn(covariance(a, b), '5m', 'NULL', '0', '1h') FROM t",
"SELECT range_fn(covariance(a, b), '5m', 'NULL', '0', '1h', '') FROM t",
);

// expression5
assert_sql(
"SELECT covariance(cos(a), sin(b)) RANGE '5m' FILL NULL FROM t ALIGN '1h' FILL NULL;",
"SELECT range_fn(covariance(cos(a), sin(b)), '5m', 'NULL', '0', '1h') FROM t",
"SELECT range_fn(covariance(cos(a), sin(b)), '5m', 'NULL', '0', '1h', '') FROM t",
);

// expression6
assert_sql(
"SELECT ((covariance(a+1, b/2) + sum(b))/2) RANGE '5m' FILL NULL FROM t ALIGN '1h' FILL NULL;",
"SELECT ((range_fn(covariance(a + 1, b / 2), '5m', 'NULL', '0', '1h') + range_fn(sum(b), '5m', 'NULL', '0', '1h')) / 2) FROM t",
"SELECT ((range_fn(covariance(a + 1, b / 2), '5m', 'NULL', '0', '1h', '') + range_fn(sum(b), '5m', 'NULL', '0', '1h', '')) / 2) FROM t",
);

// FILL... ALIGN...
assert_sql(
"SELECT sum(metrics) RANGE '10m' FROM t FILL NULL ALIGN '1h';",
"SELECT range_fn(sum(metrics), '10m', 'NULL', '0', '1h') FROM t",
"SELECT range_fn(sum(metrics), '10m', 'NULL', '0', '1h', '') FROM t",
);

// FILL ... FILL ...
Expand Down Expand Up @@ -7518,57 +7518,57 @@ fn parse_range_in_expr() {
// use range in expr
assert_sql(
"SELECT rate(a) RANGE '6m' + 1 FROM t ALIGN '1h' FILL NULL;",
"SELECT range_fn(rate(a), '6m', 'NULL', '0', '1h') + 1 FROM t",
"SELECT range_fn(rate(a), '6m', 'NULL', '0', '1h', '') + 1 FROM t",
);

assert_sql(
"SELECT sin(rate(a) RANGE '6m' + 1) FROM t ALIGN '1h' FILL NULL;",
"SELECT sin(range_fn(rate(a), '6m', 'NULL', '0', '1h') + 1) FROM t",
"SELECT sin(range_fn(rate(a), '6m', 'NULL', '0', '1h', '') + 1) FROM t",
);

assert_sql(
"SELECT sin(first_value(a ORDER BY b ASC NULLS LAST) RANGE '6m' + 1) FROM t ALIGN '1h' by (tag0, tag1) FILL NULL;",
"SELECT sin(range_fn(first_value(a ORDER BY b ASC NULLS LAST), '6m', 'NULL', '2', tag0, tag1, '1h') + 1) FROM t GROUP BY tag0, tag1",
"SELECT sin(range_fn(first_value(a ORDER BY b ASC NULLS LAST), '6m', 'NULL', '2', tag0, tag1, '1h', '') + 1) FROM t GROUP BY tag0, tag1",
);

assert_sql(
"SELECT sin(count(distinct a) RANGE '6m' + 1) FROM t ALIGN '1h' by (tag0, tag1) FILL NULL;",
"SELECT sin(range_fn(count(DISTINCT a), '6m', 'NULL', '2', tag0, tag1, '1h') + 1) FROM t GROUP BY tag0, tag1",
"SELECT sin(range_fn(count(DISTINCT a), '6m', 'NULL', '2', tag0, tag1, '1h', '') + 1) FROM t GROUP BY tag0, tag1",
);

assert_sql(
"SELECT sin(rank() OVER (PARTITION BY a ORDER BY b DESC) RANGE '6m' + 1) FROM t ALIGN '1h' by (tag0, tag1) FILL NULL;",
"SELECT sin(range_fn(rank() OVER (PARTITION BY a ORDER BY b DESC), '6m', 'NULL', '2', tag0, tag1, '1h') + 1) FROM t GROUP BY tag0, tag1",
"SELECT sin(range_fn(rank() OVER (PARTITION BY a ORDER BY b DESC), '6m', 'NULL', '2', tag0, tag1, '1h', '') + 1) FROM t GROUP BY tag0, tag1",
);

assert_sql(
"SELECT sin(cos(round(sin(avg(a + b) RANGE '5m' + 1)))) FROM test ALIGN '1h' by (tag_0,tag_1);",
"SELECT sin(cos(round(sin(range_fn(avg(a + b), '5m', '', '2', tag_0, tag_1, '1h') + 1)))) FROM test GROUP BY tag_0, tag_1",
"SELECT sin(cos(round(sin(range_fn(avg(a + b), '5m', '', '2', tag_0, tag_1, '1h', '') + 1)))) FROM test GROUP BY tag_0, tag_1",
);

assert_sql("SELECT rate(a) RANGE '6m' + rate(a) RANGE '5m' FROM t ALIGN '1h' FILL NULL;",
"SELECT range_fn(rate(a), '6m', 'NULL', '0', '1h') + range_fn(rate(a), '5m', 'NULL', '0', '1h') FROM t");
"SELECT range_fn(rate(a), '6m', 'NULL', '0', '1h', '') + range_fn(rate(a), '5m', 'NULL', '0', '1h', '') FROM t");

assert_sql("SELECT (rate(a) RANGE '6m' + rate(a) RANGE '5m')/b + b * rate(a) RANGE '5m' FROM t ALIGN '1h' FILL NULL;",
"SELECT (range_fn(rate(a), '6m', 'NULL', '0', '1h') + range_fn(rate(a), '5m', 'NULL', '0', '1h')) / b + b * range_fn(rate(a), '5m', 'NULL', '0', '1h') FROM t GROUP BY b");
"SELECT (range_fn(rate(a), '6m', 'NULL', '0', '1h', '') + range_fn(rate(a), '5m', 'NULL', '0', '1h', '')) / b + b * range_fn(rate(a), '5m', 'NULL', '0', '1h', '') FROM t GROUP BY b");

assert_sql("SELECT round(max(a+1) Range '5m' FILL NULL), sin((max(a) + 1) Range '5m' FILL NULL) from t ALIGN '1h' by (b) FILL NULL;",
"SELECT round(range_fn(max(a + 1), '5m', 'NULL', '1', b, '1h')), sin((range_fn(max(a), '5m', 'NULL', '1', b, '1h') + 1)) FROM t GROUP BY b");
"SELECT round(range_fn(max(a + 1), '5m', 'NULL', '1', b, '1h', '')), sin((range_fn(max(a), '5m', 'NULL', '1', b, '1h', '') + 1)) FROM t GROUP BY b");

assert_sql(
"SELECT floor(ceil((min(a * 2) + max(a *2)) RANGE '20s' + 1.0)) FROM t ALIGN '1h';",
"SELECT FLOOR(CEIL((range_fn(min(a * 2), '20s', '', '0', '1h') + range_fn(max(a * 2), '20s', '', '0', '1h')) + 1.0)) FROM t",
"SELECT FLOOR(CEIL((range_fn(min(a * 2), '20s', '', '0', '1h', '') + range_fn(max(a * 2), '20s', '', '0', '1h', '')) + 1.0)) FROM t",
);

assert_sql(
"SELECT gcd(CAST(max(a + 1) Range '5m' FILL NULL AS Int64), CAST(b AS Int64)) + round(max(c+1) Range '6m' FILL NULL + 1) + max(d+3) Range '10m' FILL NULL * CAST(e AS Float64) + 1 FROM test ALIGN '1h' by (f, g);",
"SELECT gcd(CAST(range_fn(max(a + 1), '5m', 'NULL', '2', f, g, '1h') AS Int64), CAST(b AS Int64)) + round(range_fn(max(c + 1), '6m', 'NULL', '2', f, g, '1h') + 1) + range_fn(max(d + 3), '10m', 'NULL', '2', f, g, '1h') * CAST(e AS Float64) + 1 FROM test GROUP BY b, e, f, g",
"SELECT gcd(CAST(range_fn(max(a + 1), '5m', 'NULL', '2', f, g, '1h', '') AS Int64), CAST(b AS Int64)) + round(range_fn(max(c + 1), '6m', 'NULL', '2', f, g, '1h', '') + 1) + range_fn(max(d + 3), '10m', 'NULL', '2', f, g, '1h', '') * CAST(e AS Float64) + 1 FROM test GROUP BY b, e, f, g",
);

// Legal syntax but illegal semantic, nested range semantics are problematic, leave semantic problem to greptimedb
assert_sql(
"SELECT rate(max(a) RANGE '6m') RANGE '6m' + 1 FROM t ALIGN '1h' FILL NULL;",
"SELECT range_fn(rate(range_fn(max(a), '6m', '')), '6m', 'NULL', '0', '1h') + 1 FROM t",
"SELECT range_fn(rate(range_fn(max(a), '6m', '')), '6m', 'NULL', '0', '1h', '') + 1 FROM t",
);

assert_sql_err(
Expand All @@ -7586,3 +7586,43 @@ fn parse_range_in_expr() {
"sql parser error: Can't use the RANGE keyword in Expr 1 without function",
);
}

#[test]
fn parse_range_interval() {
assert_sql(
"SELECT rate(a) RANGE (INTERVAL '1 year 2 hours 3 minutes') FROM t ALIGN (INTERVAL '1 year 2 hours 3 minutes') FILL NULL;",
"SELECT range_fn(rate(a), INTERVAL '1 year 2 hours 3 minutes', 'NULL', '0', INTERVAL '1 year 2 hours 3 minutes', '') FROM t",
);
assert_sql(
"SELECT rate(a) RANGE (INTERVAL '1' YEAR) FROM t ALIGN (INTERVAL '1' YEAR) FILL NULL;",
"SELECT range_fn(rate(a), INTERVAL '1' YEAR, 'NULL', '0', INTERVAL '1' YEAR, '') FROM t",
);
assert_sql(
"SELECT sin(count(distinct a) RANGE (INTERVAL '1 year 2 hours 3 minutes') + 1) FROM t ALIGN (INTERVAL '1 year 2 hours 3 minutes') FILL NULL;",
"SELECT sin(range_fn(count(DISTINCT a), INTERVAL '1 year 2 hours 3 minutes', 'NULL', '0', INTERVAL '1 year 2 hours 3 minutes', '') + 1) FROM t",
);
assert_sql(
"SELECT rate(a) RANGE (INTERVAL '1' YEAR) FROM t ALIGN (INTERVAL '1' YEAR) TO '1970-01-01T00:00:00+08:00' BY (b, c) FILL NULL;",
"SELECT range_fn(rate(a), INTERVAL '1' YEAR, 'NULL', '2', b, c, INTERVAL '1' YEAR, '1970-01-01T00:00:00+08:00') FROM t GROUP BY b, c",
);
assert_sql_err(
"SELECT rate(a) RANGE INTERVAL '1 year 2 hours 3 minutes' FROM t ALIGN '1h' FILL NULL;",
"sql parser error: Expected end of statement, found: RANGE at Line: 1, Column 16",
);
}

#[test]
fn parse_range_to() {
assert_sql(
"SELECT rate(a) RANGE '6m' FROM t ALIGN '1h' TO NOW FILL NULL;",
"SELECT range_fn(rate(a), '6m', 'NULL', '0', '1h', 'NOW') FROM t",
);
assert_sql(
"SELECT rate(a) RANGE '6m' FROM t ALIGN '1h' TO CALENDAR FILL NULL;",
"SELECT range_fn(rate(a), '6m', 'NULL', '0', '1h', 'CALENDAR') FROM t",
);
assert_sql(
"SELECT rate(a) RANGE '6m' FROM t ALIGN '1h' TO '2021-07-01 00:00:00' FILL NULL;",
"SELECT range_fn(rate(a), '6m', 'NULL', '0', '1h', '2021-07-01 00:00:00') FROM t",
);
}
Loading