Skip to content

Commit

Permalink
feat(query): add range function & aliases in select list (#11621)
Browse files Browse the repository at this point in the history
* feat(query): add range functions

* feat(query): add range functions

* feat(query): add range functions

* feat(query): add range functions

* feat(query): add range functions

* feat(query): add range functions

* feat(query): bind group context

* feat(query): remove allow_ambiguous

* feat(query): remove allow_ambiguous

---------

Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
sundy-li and mergify[bot] authored May 31, 2023

Verified

This commit was signed with the committer’s verified signature.
targos Michaël Zasso
1 parent d800bce commit 538dcd8
Showing 17 changed files with 82 additions and 81 deletions.
1 change: 1 addition & 0 deletions docs/doc/15-sql-functions/04-array-functions/index.md
Original file line number Diff line number Diff line change
@@ -9,6 +9,7 @@ SQL Array Functions and Usage.
|--------------------------------------|----------------------------------------------------------------------------------------------|---------------------------------------|--------------------------|
| **GET(array, index)** | Returns an element from the array by index (1-based) | **GET([1, 2], 2)** | 2 |
| **LENGTH(array)** | Returns the length of the array | **LENGTH([1, 2])** | 2 |
| **RANGE(start, end)** | Returns an array collected by [start, end) | **RANGE(1, 3)** | [1, 2] |
| **ARRAY_CONCAT(array1, array2)** | Concats two arrays | **ARRAY_CONCAT([1, 2], [3, 4]** | [1,2,3,4] |
| **ARRAY_CONTAINS(array, item)** | Checks if the array contains a specific element | **ARRAY_CONTAINS([1, 2], 1)** | 1 |
| **ARRAY_INDEXOF(array, item)** | Returns the index(1-based) of an element if the array contains the element | **ARRAY_INDEXOF([1, 2, 9], 9)** | 3 |
6 changes: 6 additions & 0 deletions src/query/functions/src/scalars/array.rs
Original file line number Diff line number Diff line change
@@ -164,6 +164,12 @@ pub fn register(registry: &mut FunctionRegistry) {
|_, _| 0u8,
);

registry.register_2_arg::<NumberType<u64>, NumberType<u64>, ArrayType<NumberType<u64>>, _, _>(
"range",
|_, _| FunctionDomain::Full,
|start, end, _| (start..end).collect(),
);

registry.register_1_arg::<ArrayType<GenericType<0>>, NumberType<u64>, _, _>(
"length",
|_| FunctionDomain::Full,
Original file line number Diff line number Diff line change
@@ -2865,6 +2865,8 @@ Functions overloads:
0 rand() :: Float64
1 rand(UInt64) :: Float64
2 rand(UInt64 NULL) :: Float64 NULL
0 range(UInt64, UInt64) :: Array(UInt64)
1 range(UInt64 NULL, UInt64 NULL) :: Array(UInt64) NULL
0 regexp(String, String) :: Boolean
1 regexp(String NULL, String NULL) :: Boolean NULL
0 regexp_instr FACTORY
2 changes: 2 additions & 0 deletions src/query/sql/src/planner/binder/aggregate.rs
Original file line number Diff line number Diff line change
@@ -28,6 +28,7 @@ use common_expression::types::NumberDataType;
use itertools::Itertools;

use super::prune_by_children;
use super::ExprContext;
use crate::binder::scalar::ScalarBinder;
use crate::binder::select::SelectList;
use crate::binder::Binder;
@@ -379,6 +380,7 @@ impl Binder {
}
}

bind_context.set_expr_context(ExprContext::GroupClaue);
match group_by {
GroupBy::Normal(exprs) => {
self.resolve_group_items(
98 changes: 48 additions & 50 deletions src/query/sql/src/planner/binder/bind_context.rs
Original file line number Diff line number Diff line change
@@ -48,6 +48,7 @@ use crate::NameResolutionContext;
pub enum ExprContext {
SelectClause,
WhereClause,
GroupClaue,
HavingClause,
OrderByClause,
LimitClause,
@@ -59,6 +60,12 @@ pub enum ExprContext {
Unknown,
}

impl ExprContext {
pub fn prefer_resolve_alias(&self) -> bool {
!matches!(self, ExprContext::SelectClause | ExprContext::WhereClause)
}
}

#[derive(Clone, Debug, Eq, PartialEq, serde::Deserialize, serde::Serialize)]
pub enum Visibility {
// Default for a column
@@ -279,22 +286,11 @@ impl BindContext {
column: &str,
span: Span,
available_aliases: &[(String, ScalarExpr)],
allow_ambiguous: bool,
) -> Result<NameResolutionResult> {
let mut result = vec![];

let mut bind_context: &BindContext = self;
// Lookup parent context to resolve outer reference.
loop {
if self.expr_context.is_where_clause() {
// In where clause, check bound columns first.
Self::search_bound_columns(bind_context, database, table, column, &mut result);
if !result.is_empty() {
break;
}
}

// TODO(leiysky): use `Identifier` for alias instead of raw string
if self.expr_context.prefer_resolve_alias() {
for (alias, scalar) in available_aliases {
if database.is_none() && table.is_none() && column == alias {
result.push(NameResolutionResult::Alias {
@@ -304,63 +300,65 @@ impl BindContext {
}
}

// We will lookup alias first. If there are matched aliases, we will skip
// looking up `BindContext` to avoid ambiguity.
if !result.is_empty() {
break;
}
self.search_bound_columns_recursively(database, table, column, &mut result);
} else {
self.search_bound_columns_recursively(database, table, column, &mut result);

if !self.expr_context.is_where_clause() {
Self::search_bound_columns(bind_context, database, table, column, &mut result);
if !result.is_empty() {
break;
for (alias, scalar) in available_aliases {
if database.is_none() && table.is_none() && column == alias {
result.push(NameResolutionResult::Alias {
alias: alias.clone(),
scalar: scalar.clone(),
});
}
}

if let Some(ref parent) = bind_context.parent {
bind_context = parent;
} else {
break;
}
}

if result.is_empty() {
Err(ErrorCode::SemanticError(format!("column {column} doesn't exist")).set_span(span))
} else if result.len() > 1 && !allow_ambiguous {
Err(ErrorCode::SemanticError(format!(
"column {column} reference is ambiguous, got {result:?}"
))
.set_span(span))
} else {
Ok(result.remove(0))
}
}

pub fn search_bound_columns(
bind_context: &BindContext,
pub fn search_bound_columns_recursively(
&self,
database: Option<&str>,
table: Option<&str>,
column: &str,
result: &mut Vec<NameResolutionResult>,
) {
for column_binding in bind_context.columns.iter() {
if Self::match_column_binding(database, table, column, column_binding) {
result.push(NameResolutionResult::Column(column_binding.clone()));
let mut bind_context: &BindContext = self;

loop {
for column_binding in bind_context.columns.iter() {
if Self::match_column_binding(database, table, column, column_binding) {
result.push(NameResolutionResult::Column(column_binding.clone()));
}
}
if !result.is_empty() {
return;
}
}
if !result.is_empty() {
return;
}

// look up internal column
if let Some(internal_column) = INTERNAL_COLUMN_FACTORY.get_internal_column(column) {
let column_binding = InternalColumnBinding {
database_name: database.map(|n| n.to_owned()),
table_name: table.map(|n| n.to_owned()),
index: bind_context.columns.len(),
internal_column,
};
result.push(NameResolutionResult::InternalColumn(column_binding));
// look up internal column
if let Some(internal_column) = INTERNAL_COLUMN_FACTORY.get_internal_column(column) {
let column_binding = InternalColumnBinding {
database_name: database.map(|n| n.to_owned()),
table_name: table.map(|n| n.to_owned()),
index: bind_context.columns.len(),
internal_column,
};
result.push(NameResolutionResult::InternalColumn(column_binding));
}
if !result.is_empty() {
return;
}

if let Some(ref parent) = bind_context.parent {
bind_context = parent;
} else {
break;
}
}
}

1 change: 0 additions & 1 deletion src/query/sql/src/planner/binder/binder.rs
Original file line number Diff line number Diff line change
@@ -108,7 +108,6 @@ impl<'a> Binder {
&self.name_resolution_ctx,
self.metadata.clone(),
&[],
false,
);
let mut hint_settings: HashMap<String, String> = HashMap::new();
for hint in &hints.hints_list {
3 changes: 2 additions & 1 deletion src/query/sql/src/planner/binder/copy.rs
Original file line number Diff line number Diff line change
@@ -660,7 +660,8 @@ impl<'a> Binder {
let select_list = self
.normalize_select_list(&mut from_context, select_list)
.await?;
let (scalar_items, projections) = self.analyze_projection(&from_context, &select_list)?;
let (scalar_items, projections) =
self.analyze_projection(&from_context.aggregate_info, &select_list)?;
let s_expr =
self.bind_projection(&mut from_context, &projections, &scalar_items, s_expr)?;
let mut output_context = BindContext::new();
1 change: 1 addition & 0 deletions src/query/sql/src/planner/binder/having.rs
Original file line number Diff line number Diff line change
@@ -39,6 +39,7 @@ impl Binder {
aliases: &[(String, ScalarExpr)],
having: &Expr,
) -> Result<(ScalarExpr, Span)> {
bind_context.set_expr_context(ExprContext::HavingClause);
let mut scalar_binder = ScalarBinder::new(
bind_context,
self.ctx.clone(),
10 changes: 7 additions & 3 deletions src/query/sql/src/planner/binder/project.rs
Original file line number Diff line number Diff line change
@@ -24,6 +24,7 @@ use common_exception::ErrorCode;
use common_exception::Result;
use common_exception::Span;

use super::AggregateInfo;
use crate::binder::select::SelectItem;
use crate::binder::select::SelectList;
use crate::binder::ExprContext;
@@ -48,12 +49,11 @@ use crate::WindowChecker;
impl Binder {
pub(super) fn analyze_projection(
&mut self,
bind_context: &BindContext,
agg_info: &AggregateInfo,
select_list: &SelectList,
) -> Result<(HashMap<IndexType, ScalarItem>, Vec<ColumnBinding>)> {
let mut columns = Vec::with_capacity(select_list.items.len());
let mut scalars = HashMap::new();
let agg_info = &bind_context.aggregate_info;
for item in select_list.items.iter() {
// This item is a grouping sets item, its data type should be nullable.
let is_grouping_sets_item = agg_info.grouping_id_column.is_some()
@@ -182,6 +182,7 @@ impl Binder {
input_context.set_expr_context(ExprContext::SelectClause);

let mut output = SelectList::default();
let mut prev_aliases = Vec::new();
for select_target in select_list {
match select_target {
SelectTarget::QualifiedName {
@@ -234,7 +235,7 @@ impl Binder {
self.ctx.clone(),
&self.name_resolution_ctx,
self.metadata.clone(),
&[],
&prev_aliases,
);
let (bound_expr, _) = scalar_binder.bind(expr).await?;

@@ -244,6 +245,9 @@ impl Binder {
None => format!("{:#}", expr).to_lowercase(),
};

if alias.is_some() {
prev_aliases.push((expr_name.clone(), bound_expr.clone()));
}
output.items.push(SelectItem {
select_target,
scalar: bound_expr,
7 changes: 0 additions & 7 deletions src/query/sql/src/planner/binder/scalar.rs
Original file line number Diff line number Diff line change
@@ -33,7 +33,6 @@ pub struct ScalarBinder<'a> {
name_resolution_ctx: &'a NameResolutionContext,
metadata: MetadataRef,
aliases: &'a [(String, ScalarExpr)],
allow_ambiguous: bool,
}

impl<'a> ScalarBinder<'a> {
@@ -50,14 +49,9 @@ impl<'a> ScalarBinder<'a> {
name_resolution_ctx,
metadata,
aliases,
allow_ambiguous: false,
}
}

pub fn allow_ambiguity(&mut self) {
self.allow_ambiguous = true;
}

#[async_backtrace::framed]
pub async fn bind(&mut self, expr: &Expr) -> Result<(ScalarExpr, DataType)> {
let mut type_checker = TypeChecker::new(
@@ -66,7 +60,6 @@ impl<'a> ScalarBinder<'a> {
self.name_resolution_ctx,
self.metadata.clone(),
self.aliases,
self.allow_ambiguous,
);
Ok(*type_checker.resolve(expr).await?)
}
2 changes: 1 addition & 1 deletion src/query/sql/src/planner/binder/select.rs
Original file line number Diff line number Diff line change
@@ -173,7 +173,7 @@ impl Binder {

// `analyze_projection` should behind `analyze_aggregate_select` because `analyze_aggregate_select` will rewrite `grouping`.
let (mut scalar_items, projections) =
self.analyze_projection(&from_context, &select_list)?;
self.analyze_projection(&from_context.aggregate_info, &select_list)?;

let having = if let Some(having) = &stmt.having {
Some(
1 change: 0 additions & 1 deletion src/query/sql/src/planner/binder/setting.rs
Original file line number Diff line number Diff line change
@@ -46,7 +46,6 @@ impl Binder {
&self.name_resolution_ctx,
self.metadata.clone(),
&[],
false,
);
let variable = variable.name.clone();

3 changes: 2 additions & 1 deletion src/query/sql/src/planner/binder/sort.rs
Original file line number Diff line number Diff line change
@@ -22,6 +22,7 @@ use common_ast::ast::OrderByExpr;
use common_exception::ErrorCode;
use common_exception::Result;

use super::ExprContext;
use crate::binder::scalar::ScalarBinder;
use crate::binder::select::SelectList;
use crate::binder::window::WindowRewriter;
@@ -66,6 +67,7 @@ impl Binder {
order_by: &[OrderByExpr],
distinct: bool,
) -> Result<OrderItems> {
bind_context.set_expr_context(ExprContext::OrderByClause);
// null is the largest value in databend, smallest in hive
// TODO: rewrite after https://github.com/jorgecarleitao/arrow2/pull/1286 is merged
let default_nulls_first = !self
@@ -110,7 +112,6 @@ impl Binder {
self.metadata.clone(),
aliases,
);
scalar_binder.allow_ambiguity();
let (bound_expr, _) = scalar_binder.bind(&order.expr).await?;

if let Some((idx, (alias, _))) = aliases
1 change: 0 additions & 1 deletion src/query/sql/src/planner/binder/table.rs
Original file line number Diff line number Diff line change
@@ -813,7 +813,6 @@ impl Binder {
&self.name_resolution_ctx,
self.metadata.clone(),
&[],
false,
);
let box (scalar, _) = type_checker.resolve(expr).await?;
let scalar_expr = scalar.as_expr()?;
10 changes: 2 additions & 8 deletions src/query/sql/src/planner/expression_parser.rs
Original file line number Diff line number Diff line change
@@ -93,14 +93,8 @@ pub fn parse_exprs(
}

let name_resolution_ctx = NameResolutionContext::try_from(settings.as_ref())?;
let mut type_checker = TypeChecker::new(
&mut bind_context,
ctx,
&name_resolution_ctx,
metadata,
&[],
false,
);
let mut type_checker =
TypeChecker::new(&mut bind_context, ctx, &name_resolution_ctx, metadata, &[]);

let sql_dialect = Dialect::MySQL;
let tokens = tokenize_sql(sql)?;
Loading

1 comment on commit 538dcd8

@vercel
Copy link

@vercel vercel bot commented on 538dcd8 May 31, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

databend – ./

databend-databend.vercel.app
databend-git-main-databend.vercel.app
databend.vercel.app
databend.rs

Please sign in to comment.