Skip to content

Commit 6a93567

Browse files
feat: support ALIGN TO and Interval in Range query (#9)
* feat: support TO and Interval in Range query * chore: fix ci
1 parent 0fbae07 commit 6a93567

File tree

3 files changed

+104
-37
lines changed

3 files changed

+104
-37
lines changed

src/parser/mod.rs

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -659,14 +659,20 @@ impl<'a> Parser<'a> {
659659
Ok(expr)
660660
}
661661

662+
/// Parse Range clause with format `RANGE [ Duration literal | (INTERVAL [interval expr]) ] FILL [ NULL | PREV .....]`
662663
fn parse_range_expr(&mut self, expr: Expr) -> Result<Expr, ParserError> {
663664
let index = self.index;
664665
let range = if self.parse_keyword(Keyword::RANGE) {
665-
// Make sure Range followed by a value, or it will be confused with window function syntax
666+
// Make sure Range followed by a value or interval expr, or it will be confused with window function syntax
666667
// e.g. `COUNT(*) OVER (ORDER BY a RANGE BETWEEN INTERVAL '1 DAY' PRECEDING AND INTERVAL '1 DAY' FOLLOWING)`
667-
if let Ok(value) = self.parse_value() {
668+
if self.consume_token(&Token::LParen) {
669+
self.expect_keyword(Keyword::INTERVAL)?;
670+
let interval = self.parse_interval()?;
671+
self.expect_token(&Token::RParen)?;
672+
interval
673+
} else if let Ok(value) = self.parse_value() {
668674
value.verify_duration()?;
669-
value
675+
Expr::Value(value)
670676
} else {
671677
self.index = index;
672678
return Ok(expr);
@@ -694,7 +700,7 @@ impl<'a> Parser<'a> {
694700
if matches!(e, Expr::Function(..)) {
695701
let args = vec![
696702
FunctionArg::Unnamed(FunctionArgExpr::Expr(e.clone())),
697-
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(range.clone()))),
703+
FunctionArg::Unnamed(FunctionArgExpr::Expr(range.clone())),
698704
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(fill.clone()))),
699705
];
700706
let range_func = Function {
@@ -5757,8 +5763,8 @@ impl<'a> Parser<'a> {
57575763
} else {
57585764
vec![]
57595765
};
5760-
5761-
let mut align: Option<(Value, Vec<Expr>)> = None;
5766+
// triple means (align duration, to, by)
5767+
let mut align: Option<(Expr, Expr, Vec<Expr>)> = None;
57625768
let mut fill: Option<String> = None;
57635769
for _ in 0..2 {
57645770
if self.parse_keyword(Keyword::ALIGN) {
@@ -5767,8 +5773,27 @@ impl<'a> Parser<'a> {
57675773
"Duplicate ALIGN keyword detected in SELECT clause.".into(),
57685774
));
57695775
}
5770-
let value = self.parse_value()?;
5771-
value.verify_duration()?;
5776+
// Must use parentheses in interval, otherwise it will cause syntax conflicts.
5777+
// `INTERVAL '1-1' YEAR TO MONTH` are conflict with
5778+
// `ALIGN INTERVAL '1' day TO '1970-01-01T00:00:00+08:00'`
5779+
let value = if self.consume_token(&Token::LParen) {
5780+
self.expect_keyword(Keyword::INTERVAL)?;
5781+
let interval = self.parse_interval()?;
5782+
self.expect_token(&Token::RParen)?;
5783+
interval
5784+
} else {
5785+
let value = self.parse_value()?;
5786+
value.verify_duration()?;
5787+
Expr::Value(value)
5788+
};
5789+
let to = if self.parse_keyword(Keyword::TO) {
5790+
let value = self.next_token().to_string();
5791+
Expr::Value(Value::SingleQuotedString(
5792+
value.trim_matches(|x| x == '\'' || x == '"').to_string(),
5793+
))
5794+
} else {
5795+
Expr::Value(Value::SingleQuotedString(String::new()))
5796+
};
57725797
let by = if self.parse_keyword(Keyword::BY) {
57735798
self.expect_token(&Token::LParen)?;
57745799
if self.consume_token(&Token::RParen) {
@@ -5793,7 +5818,7 @@ impl<'a> Parser<'a> {
57935818
} else {
57945819
vec![]
57955820
};
5796-
align = Some((value, by));
5821+
align = Some((value, to, by));
57975822
}
57985823
if self.parse_keyword(Keyword::FILL) {
57995824
if fill.is_some() {
@@ -5809,7 +5834,7 @@ impl<'a> Parser<'a> {
58095834
"ALIGN argument cannot be omitted in the range select query".into(),
58105835
));
58115836
}
5812-
let projection = if let Some((align, by)) = align {
5837+
let projection = if let Some((align, to, by)) = align {
58135838
let fill = fill.unwrap_or_default();
58145839
let by_num = FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
58155840
Value::SingleQuotedString(by.len().to_string()),
@@ -5822,7 +5847,7 @@ impl<'a> Parser<'a> {
58225847
FunctionArg::Unnamed(FunctionArgExpr::Expr(x))
58235848
})
58245849
.collect::<Vec<_>>();
5825-
// range_fn(func, range, fill, byc, [byv], align)
5850+
// range_fn(func, range, fill, byc, [byv], align, to)
58265851
// byc are length of variadic arguments [byv]
58275852
let mut rewrite_count = 0;
58285853
let mut align_fill_rewrite =
@@ -5850,7 +5875,10 @@ impl<'a> Parser<'a> {
58505875
range_func.args.push(by_num.clone());
58515876
range_func.args.extend(by.clone());
58525877
range_func.args.push(FunctionArg::Unnamed(
5853-
FunctionArgExpr::Expr(Expr::Value(align.clone())),
5878+
FunctionArgExpr::Expr(align.clone()),
5879+
));
5880+
range_func.args.push(FunctionArg::Unnamed(
5881+
FunctionArgExpr::Expr(to.clone()),
58545882
));
58555883
rewrite_count += 1;
58565884
return Ok(Some(Expr::Function(range_func)));

src/tokenizer.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -541,8 +541,7 @@ impl<'a> Tokenizer<'a> {
541541
pub fn tokenize(&mut self) -> Result<Vec<Token>, TokenizerError> {
542542
let twl = self.tokenize_with_location()?;
543543

544-
let mut tokens: Vec<Token> = vec![];
545-
tokens.reserve(twl.len());
544+
let mut tokens: Vec<Token> = Vec::with_capacity(twl.len());
546545
for token_with_location in twl {
547546
tokens.push(token_with_location.token);
548547
}

tests/sqlparser_common.rs

Lines changed: 63 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -7406,59 +7406,59 @@ fn assert_sql_err(s: &'static str, result: &'static str) {
74067406

74077407
#[test]
74087408
fn parse_range_select() {
7409-
// rewrite format `range_fn(func_name, argc, [argv], range, fill, byc, [byv], align)`
7409+
// rewrite format `range_fn(func_name, argc, [argv], range, fill, byc, [byv], align, to)`
74107410
// regular without by
74117411
assert_sql("SELECT rate(metrics) RANGE '5m', sum(metrics) RANGE '10m' FILL MAX, sum(metrics) RANGE '10m' FROM t ALIGN '1h' FILL NULL;",
7412-
"SELECT range_fn(rate(metrics), '5m', 'NULL', '0', '1h'), range_fn(sum(metrics), '10m', 'MAX', '0', '1h'), range_fn(sum(metrics), '10m', 'NULL', '0', '1h') FROM t");
7412+
"SELECT range_fn(rate(metrics), '5m', 'NULL', '0', '1h', ''), range_fn(sum(metrics), '10m', 'MAX', '0', '1h', ''), range_fn(sum(metrics), '10m', 'NULL', '0', '1h', '') FROM t");
74137413

74147414
// regular with by
74157415
assert_sql("SELECT rate(metrics) RANGE '5m', sum(metrics) RANGE '10m' FILL MAX, sum(metrics) RANGE '10m' FROM t ALIGN '1h' by ((a+1)/2, b) FILL NULL;",
7416-
"SELECT range_fn(rate(metrics), '5m', 'NULL', '2', (a + 1) / 2, b, '1h'), range_fn(sum(metrics), '10m', 'MAX', '2', (a + 1) / 2, b, '1h'), range_fn(sum(metrics), '10m', 'NULL', '2', (a + 1) / 2, b, '1h') FROM t GROUP BY a, b");
7416+
"SELECT range_fn(rate(metrics), '5m', 'NULL', '2', (a + 1) / 2, b, '1h', ''), range_fn(sum(metrics), '10m', 'MAX', '2', (a + 1) / 2, b, '1h', ''), range_fn(sum(metrics), '10m', 'NULL', '2', (a + 1) / 2, b, '1h', '') FROM t GROUP BY a, b");
74177417

74187418
// explicit empty by
74197419
assert_sql("SELECT rate(metrics) RANGE '5m', sum(metrics) RANGE '10m' FILL MAX, sum(metrics) RANGE '10m' FROM t ALIGN '1h' by () FILL NULL;",
7420-
"SELECT range_fn(rate(metrics), '5m', 'NULL', '1', 1, '1h'), range_fn(sum(metrics), '10m', 'MAX', '1', 1, '1h'), range_fn(sum(metrics), '10m', 'NULL', '1', 1, '1h') FROM t");
7420+
"SELECT range_fn(rate(metrics), '5m', 'NULL', '1', 1, '1h', ''), range_fn(sum(metrics), '10m', 'MAX', '1', 1, '1h', ''), range_fn(sum(metrics), '10m', 'NULL', '1', 1, '1h', '') FROM t");
74217421

74227422
// expression1
74237423
assert_sql(
74247424
"SELECT avg(a/2 + 1) RANGE '5m' FILL NULL FROM t ALIGN '1h' FILL NULL;",
7425-
"SELECT range_fn(avg(a / 2 + 1), '5m', 'NULL', '0', '1h') FROM t",
7425+
"SELECT range_fn(avg(a / 2 + 1), '5m', 'NULL', '0', '1h', '') FROM t",
74267426
);
74277427

74287428
// expression2
74297429
assert_sql(
74307430
"SELECT avg(a) RANGE '5m' FILL NULL + 1 FROM t ALIGN '1h' FILL NULL;",
7431-
"SELECT range_fn(avg(a), '5m', 'NULL', '0', '1h') + 1 FROM t",
7431+
"SELECT range_fn(avg(a), '5m', 'NULL', '0', '1h', '') + 1 FROM t",
74327432
);
74337433

74347434
// expression3
74357435
assert_sql(
74367436
"SELECT ((avg(a) + sum(b))/2) RANGE '5m' FILL NULL FROM t ALIGN '1h' FILL NULL;",
7437-
"SELECT ((range_fn(avg(a), '5m', 'NULL', '0', '1h') + range_fn(sum(b), '5m', 'NULL', '0', '1h')) / 2) FROM t",
7437+
"SELECT ((range_fn(avg(a), '5m', 'NULL', '0', '1h', '') + range_fn(sum(b), '5m', 'NULL', '0', '1h', '')) / 2) FROM t",
74387438
);
74397439

74407440
// expression4
74417441
assert_sql(
74427442
"SELECT covariance(a, b) RANGE '5m' FILL NULL FROM t ALIGN '1h' FILL NULL;",
7443-
"SELECT range_fn(covariance(a, b), '5m', 'NULL', '0', '1h') FROM t",
7443+
"SELECT range_fn(covariance(a, b), '5m', 'NULL', '0', '1h', '') FROM t",
74447444
);
74457445

74467446
// expression5
74477447
assert_sql(
74487448
"SELECT covariance(cos(a), sin(b)) RANGE '5m' FILL NULL FROM t ALIGN '1h' FILL NULL;",
7449-
"SELECT range_fn(covariance(cos(a), sin(b)), '5m', 'NULL', '0', '1h') FROM t",
7449+
"SELECT range_fn(covariance(cos(a), sin(b)), '5m', 'NULL', '0', '1h', '') FROM t",
74507450
);
74517451

74527452
// expression6
74537453
assert_sql(
74547454
"SELECT ((covariance(a+1, b/2) + sum(b))/2) RANGE '5m' FILL NULL FROM t ALIGN '1h' FILL NULL;",
7455-
"SELECT ((range_fn(covariance(a + 1, b / 2), '5m', 'NULL', '0', '1h') + range_fn(sum(b), '5m', 'NULL', '0', '1h')) / 2) FROM t",
7455+
"SELECT ((range_fn(covariance(a + 1, b / 2), '5m', 'NULL', '0', '1h', '') + range_fn(sum(b), '5m', 'NULL', '0', '1h', '')) / 2) FROM t",
74567456
);
74577457

74587458
// FILL... ALIGN...
74597459
assert_sql(
74607460
"SELECT sum(metrics) RANGE '10m' FROM t FILL NULL ALIGN '1h';",
7461-
"SELECT range_fn(sum(metrics), '10m', 'NULL', '0', '1h') FROM t",
7461+
"SELECT range_fn(sum(metrics), '10m', 'NULL', '0', '1h', '') FROM t",
74627462
);
74637463

74647464
// FILL ... FILL ...
@@ -7518,57 +7518,57 @@ fn parse_range_in_expr() {
75187518
// use range in expr
75197519
assert_sql(
75207520
"SELECT rate(a) RANGE '6m' + 1 FROM t ALIGN '1h' FILL NULL;",
7521-
"SELECT range_fn(rate(a), '6m', 'NULL', '0', '1h') + 1 FROM t",
7521+
"SELECT range_fn(rate(a), '6m', 'NULL', '0', '1h', '') + 1 FROM t",
75227522
);
75237523

75247524
assert_sql(
75257525
"SELECT sin(rate(a) RANGE '6m' + 1) FROM t ALIGN '1h' FILL NULL;",
7526-
"SELECT sin(range_fn(rate(a), '6m', 'NULL', '0', '1h') + 1) FROM t",
7526+
"SELECT sin(range_fn(rate(a), '6m', 'NULL', '0', '1h', '') + 1) FROM t",
75277527
);
75287528

75297529
assert_sql(
75307530
"SELECT sin(first_value(a ORDER BY b ASC NULLS LAST) RANGE '6m' + 1) FROM t ALIGN '1h' by (tag0, tag1) FILL NULL;",
7531-
"SELECT sin(range_fn(first_value(a ORDER BY b ASC NULLS LAST), '6m', 'NULL', '2', tag0, tag1, '1h') + 1) FROM t GROUP BY tag0, tag1",
7531+
"SELECT sin(range_fn(first_value(a ORDER BY b ASC NULLS LAST), '6m', 'NULL', '2', tag0, tag1, '1h', '') + 1) FROM t GROUP BY tag0, tag1",
75327532
);
75337533

75347534
assert_sql(
75357535
"SELECT sin(count(distinct a) RANGE '6m' + 1) FROM t ALIGN '1h' by (tag0, tag1) FILL NULL;",
7536-
"SELECT sin(range_fn(count(DISTINCT a), '6m', 'NULL', '2', tag0, tag1, '1h') + 1) FROM t GROUP BY tag0, tag1",
7536+
"SELECT sin(range_fn(count(DISTINCT a), '6m', 'NULL', '2', tag0, tag1, '1h', '') + 1) FROM t GROUP BY tag0, tag1",
75377537
);
75387538

75397539
assert_sql(
75407540
"SELECT sin(rank() OVER (PARTITION BY a ORDER BY b DESC) RANGE '6m' + 1) FROM t ALIGN '1h' by (tag0, tag1) FILL NULL;",
7541-
"SELECT sin(range_fn(rank() OVER (PARTITION BY a ORDER BY b DESC), '6m', 'NULL', '2', tag0, tag1, '1h') + 1) FROM t GROUP BY tag0, tag1",
7541+
"SELECT sin(range_fn(rank() OVER (PARTITION BY a ORDER BY b DESC), '6m', 'NULL', '2', tag0, tag1, '1h', '') + 1) FROM t GROUP BY tag0, tag1",
75427542
);
75437543

75447544
assert_sql(
75457545
"SELECT sin(cos(round(sin(avg(a + b) RANGE '5m' + 1)))) FROM test ALIGN '1h' by (tag_0,tag_1);",
7546-
"SELECT sin(cos(round(sin(range_fn(avg(a + b), '5m', '', '2', tag_0, tag_1, '1h') + 1)))) FROM test GROUP BY tag_0, tag_1",
7546+
"SELECT sin(cos(round(sin(range_fn(avg(a + b), '5m', '', '2', tag_0, tag_1, '1h', '') + 1)))) FROM test GROUP BY tag_0, tag_1",
75477547
);
75487548

75497549
assert_sql("SELECT rate(a) RANGE '6m' + rate(a) RANGE '5m' FROM t ALIGN '1h' FILL NULL;",
7550-
"SELECT range_fn(rate(a), '6m', 'NULL', '0', '1h') + range_fn(rate(a), '5m', 'NULL', '0', '1h') FROM t");
7550+
"SELECT range_fn(rate(a), '6m', 'NULL', '0', '1h', '') + range_fn(rate(a), '5m', 'NULL', '0', '1h', '') FROM t");
75517551

75527552
assert_sql("SELECT (rate(a) RANGE '6m' + rate(a) RANGE '5m')/b + b * rate(a) RANGE '5m' FROM t ALIGN '1h' FILL NULL;",
7553-
"SELECT (range_fn(rate(a), '6m', 'NULL', '0', '1h') + range_fn(rate(a), '5m', 'NULL', '0', '1h')) / b + b * range_fn(rate(a), '5m', 'NULL', '0', '1h') FROM t GROUP BY b");
7553+
"SELECT (range_fn(rate(a), '6m', 'NULL', '0', '1h', '') + range_fn(rate(a), '5m', 'NULL', '0', '1h', '')) / b + b * range_fn(rate(a), '5m', 'NULL', '0', '1h', '') FROM t GROUP BY b");
75547554

75557555
assert_sql("SELECT round(max(a+1) Range '5m' FILL NULL), sin((max(a) + 1) Range '5m' FILL NULL) from t ALIGN '1h' by (b) FILL NULL;",
7556-
"SELECT round(range_fn(max(a + 1), '5m', 'NULL', '1', b, '1h')), sin((range_fn(max(a), '5m', 'NULL', '1', b, '1h') + 1)) FROM t GROUP BY b");
7556+
"SELECT round(range_fn(max(a + 1), '5m', 'NULL', '1', b, '1h', '')), sin((range_fn(max(a), '5m', 'NULL', '1', b, '1h', '') + 1)) FROM t GROUP BY b");
75577557

75587558
assert_sql(
75597559
"SELECT floor(ceil((min(a * 2) + max(a *2)) RANGE '20s' + 1.0)) FROM t ALIGN '1h';",
7560-
"SELECT FLOOR(CEIL((range_fn(min(a * 2), '20s', '', '0', '1h') + range_fn(max(a * 2), '20s', '', '0', '1h')) + 1.0)) FROM t",
7560+
"SELECT FLOOR(CEIL((range_fn(min(a * 2), '20s', '', '0', '1h', '') + range_fn(max(a * 2), '20s', '', '0', '1h', '')) + 1.0)) FROM t",
75617561
);
75627562

75637563
assert_sql(
75647564
"SELECT gcd(CAST(max(a + 1) Range '5m' FILL NULL AS Int64), CAST(b AS Int64)) + round(max(c+1) Range '6m' FILL NULL + 1) + max(d+3) Range '10m' FILL NULL * CAST(e AS Float64) + 1 FROM test ALIGN '1h' by (f, g);",
7565-
"SELECT gcd(CAST(range_fn(max(a + 1), '5m', 'NULL', '2', f, g, '1h') AS Int64), CAST(b AS Int64)) + round(range_fn(max(c + 1), '6m', 'NULL', '2', f, g, '1h') + 1) + range_fn(max(d + 3), '10m', 'NULL', '2', f, g, '1h') * CAST(e AS Float64) + 1 FROM test GROUP BY b, e, f, g",
7565+
"SELECT gcd(CAST(range_fn(max(a + 1), '5m', 'NULL', '2', f, g, '1h', '') AS Int64), CAST(b AS Int64)) + round(range_fn(max(c + 1), '6m', 'NULL', '2', f, g, '1h', '') + 1) + range_fn(max(d + 3), '10m', 'NULL', '2', f, g, '1h', '') * CAST(e AS Float64) + 1 FROM test GROUP BY b, e, f, g",
75667566
);
75677567

75687568
// Legal syntax but illegal semantic, nested range semantics are problematic, leave semantic problem to greptimedb
75697569
assert_sql(
75707570
"SELECT rate(max(a) RANGE '6m') RANGE '6m' + 1 FROM t ALIGN '1h' FILL NULL;",
7571-
"SELECT range_fn(rate(range_fn(max(a), '6m', '')), '6m', 'NULL', '0', '1h') + 1 FROM t",
7571+
"SELECT range_fn(rate(range_fn(max(a), '6m', '')), '6m', 'NULL', '0', '1h', '') + 1 FROM t",
75727572
);
75737573

75747574
assert_sql_err(
@@ -7586,3 +7586,43 @@ fn parse_range_in_expr() {
75867586
"sql parser error: Can't use the RANGE keyword in Expr 1 without function",
75877587
);
75887588
}
7589+
7590+
#[test]
7591+
fn parse_range_interval() {
7592+
assert_sql(
7593+
"SELECT rate(a) RANGE (INTERVAL '1 year 2 hours 3 minutes') FROM t ALIGN (INTERVAL '1 year 2 hours 3 minutes') FILL NULL;",
7594+
"SELECT range_fn(rate(a), INTERVAL '1 year 2 hours 3 minutes', 'NULL', '0', INTERVAL '1 year 2 hours 3 minutes', '') FROM t",
7595+
);
7596+
assert_sql(
7597+
"SELECT rate(a) RANGE (INTERVAL '1' YEAR) FROM t ALIGN (INTERVAL '1' YEAR) FILL NULL;",
7598+
"SELECT range_fn(rate(a), INTERVAL '1' YEAR, 'NULL', '0', INTERVAL '1' YEAR, '') FROM t",
7599+
);
7600+
assert_sql(
7601+
"SELECT sin(count(distinct a) RANGE (INTERVAL '1 year 2 hours 3 minutes') + 1) FROM t ALIGN (INTERVAL '1 year 2 hours 3 minutes') FILL NULL;",
7602+
"SELECT sin(range_fn(count(DISTINCT a), INTERVAL '1 year 2 hours 3 minutes', 'NULL', '0', INTERVAL '1 year 2 hours 3 minutes', '') + 1) FROM t",
7603+
);
7604+
assert_sql(
7605+
"SELECT rate(a) RANGE (INTERVAL '1' YEAR) FROM t ALIGN (INTERVAL '1' YEAR) TO '1970-01-01T00:00:00+08:00' BY (b, c) FILL NULL;",
7606+
"SELECT range_fn(rate(a), INTERVAL '1' YEAR, 'NULL', '2', b, c, INTERVAL '1' YEAR, '1970-01-01T00:00:00+08:00') FROM t GROUP BY b, c",
7607+
);
7608+
assert_sql_err(
7609+
"SELECT rate(a) RANGE INTERVAL '1 year 2 hours 3 minutes' FROM t ALIGN '1h' FILL NULL;",
7610+
"sql parser error: Expected end of statement, found: RANGE at Line: 1, Column 16",
7611+
);
7612+
}
7613+
7614+
#[test]
7615+
fn parse_range_to() {
7616+
assert_sql(
7617+
"SELECT rate(a) RANGE '6m' FROM t ALIGN '1h' TO NOW FILL NULL;",
7618+
"SELECT range_fn(rate(a), '6m', 'NULL', '0', '1h', 'NOW') FROM t",
7619+
);
7620+
assert_sql(
7621+
"SELECT rate(a) RANGE '6m' FROM t ALIGN '1h' TO CALENDAR FILL NULL;",
7622+
"SELECT range_fn(rate(a), '6m', 'NULL', '0', '1h', 'CALENDAR') FROM t",
7623+
);
7624+
assert_sql(
7625+
"SELECT rate(a) RANGE '6m' FROM t ALIGN '1h' TO '2021-07-01 00:00:00' FILL NULL;",
7626+
"SELECT range_fn(rate(a), '6m', 'NULL', '0', '1h', '2021-07-01 00:00:00') FROM t",
7627+
);
7628+
}

0 commit comments

Comments
 (0)