Skip to content

Commit

Permalink
Indexed field access for List (#1006)
Browse files Browse the repository at this point in the history
* enable GetIndexedField for Array and Dictionary

* fix GetIndexedField which should index slices not values

* Compat with latest sqlparser

* Add two tests for indexed_field access, level one and level two nesting

* fix compilation issues

* try fixing dictionary lookup

* address clippy warnings

* fix test

* Revert dictionary lookup for indexed fields

* Reject negative ints when accessing list values in get indexed field

* Fix doc in get_indexed_field

* use GetIndexedFieldExpr directly

* return the data type in unavailable field indexation error message

* Add unit tests for the physical plan of get_indexed_field

* Fix missing clause for const evaluator
  • Loading branch information
Igosuki authored Oct 29, 2021
1 parent 6fd85a7 commit 22a7d74
Show file tree
Hide file tree
Showing 11 changed files with 458 additions and 1 deletion.
50 changes: 50 additions & 0 deletions datafusion/src/field_util.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//! Utility functions for complex field access
use arrow::datatypes::{DataType, Field};

use crate::error::{DataFusionError, Result};
use crate::scalar::ScalarValue;

/// Returns the field access indexed by `key` from a [`DataType::List`]
/// # Error
/// Errors if
/// * the `data_type` is not a Struct or,
/// * there is no field key is not of the required index type
pub fn get_indexed_field(data_type: &DataType, key: &ScalarValue) -> Result<Field> {
match (data_type, key) {
(DataType::List(lt), ScalarValue::Int64(Some(i))) => {
if *i < 0 {
Err(DataFusionError::Plan(format!(
"List based indexed access requires a positive int, was {0}",
i
)))
} else {
Ok(Field::new(&i.to_string(), lt.data_type().clone(), false))
}
}
(DataType::List(_), _) => Err(DataFusionError::Plan(
"Only ints are valid as an indexed field in a list".to_string(),
)),
_ => Err(DataFusionError::Plan(
"The expression to get an indexed field is only valid for `List` types"
.to_string(),
)),
}
}
1 change: 1 addition & 0 deletions datafusion/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ pub mod variable;
pub use arrow;
pub use parquet;

pub(crate) mod field_util;
#[cfg(test)]
pub mod test;
pub mod test_util;
Expand Down
29 changes: 29 additions & 0 deletions datafusion/src/logical_plan/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
pub use super::Operator;
use crate::error::{DataFusionError, Result};
use crate::field_util::get_indexed_field;
use crate::logical_plan::{window_frames, DFField, DFSchema, LogicalPlan};
use crate::physical_plan::functions::Volatility;
use crate::physical_plan::{
Expand Down Expand Up @@ -245,6 +246,13 @@ pub enum Expr {
IsNull(Box<Expr>),
/// arithmetic negation of an expression, the operand must be of a signed numeric data type
Negative(Box<Expr>),
/// Returns the field of a [`ListArray`] by key
GetIndexedField {
/// the expression to take the field from
expr: Box<Expr>,
/// The name of the field to take
key: ScalarValue,
},
/// Whether an expression is between a given range.
Between {
/// The value to compare
Expand Down Expand Up @@ -433,6 +441,11 @@ impl Expr {
Expr::Wildcard => Err(DataFusionError::Internal(
"Wildcard expressions are not valid in a logical query plan".to_owned(),
)),
Expr::GetIndexedField { ref expr, key } => {
let data_type = expr.get_type(schema)?;

get_indexed_field(&data_type, key).map(|x| x.data_type().clone())
}
}
}

Expand Down Expand Up @@ -488,6 +501,10 @@ impl Expr {
Expr::Wildcard => Err(DataFusionError::Internal(
"Wildcard expressions are not valid in a logical query plan".to_owned(),
)),
Expr::GetIndexedField { ref expr, key } => {
let data_type = expr.get_type(input_schema)?;
get_indexed_field(&data_type, key).map(|x| x.is_nullable())
}
}
}

Expand Down Expand Up @@ -763,6 +780,7 @@ impl Expr {
.try_fold(visitor, |visitor, arg| arg.accept(visitor))
}
Expr::Wildcard => Ok(visitor),
Expr::GetIndexedField { ref expr, .. } => expr.accept(visitor),
}?;

visitor.post_visit(self)
Expand Down Expand Up @@ -923,6 +941,10 @@ impl Expr {
negated,
},
Expr::Wildcard => Expr::Wildcard,
Expr::GetIndexedField { expr, key } => Expr::GetIndexedField {
expr: rewrite_boxed(expr, rewriter)?,
key,
},
};

// now rewrite this expression itself
Expand Down Expand Up @@ -1799,6 +1821,9 @@ impl fmt::Debug for Expr {
}
}
Expr::Wildcard => write!(f, "*"),
Expr::GetIndexedField { ref expr, key } => {
write!(f, "({:?})[{}]", expr, key)
}
}
}
}
Expand Down Expand Up @@ -1879,6 +1904,10 @@ fn create_name(e: &Expr, input_schema: &DFSchema) -> Result<String> {
let expr = create_name(expr, input_schema)?;
Ok(format!("{} IS NOT NULL", expr))
}
Expr::GetIndexedField { expr, key } => {
let expr = create_name(expr, input_schema)?;
Ok(format!("{}[{}]", expr, key))
}
Expr::ScalarFunction { fun, args, .. } => {
create_function_name(&fun.to_string(), false, args, input_schema)
}
Expand Down
4 changes: 4 additions & 0 deletions datafusion/src/optimizer/common_subexpr_eliminate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,10 @@ impl ExprIdentifierVisitor<'_> {
Expr::Wildcard => {
desc.push_str("Wildcard-");
}
Expr::GetIndexedField { key, .. } => {
desc.push_str("GetIndexedField-");
desc.push_str(&key.to_string());
}
}

desc
Expand Down
7 changes: 7 additions & 0 deletions datafusion/src/optimizer/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ impl ExpressionVisitor for ColumnNameVisitor<'_> {
Expr::AggregateUDF { .. } => {}
Expr::InList { .. } => {}
Expr::Wildcard => {}
Expr::GetIndexedField { .. } => {}
}
Ok(Recursion::Continue(self))
}
Expand Down Expand Up @@ -337,6 +338,7 @@ pub fn expr_sub_expressions(expr: &Expr) -> Result<Vec<Expr>> {
Expr::Wildcard { .. } => Err(DataFusionError::Internal(
"Wildcard expressions are not valid in a logical query plan".to_owned(),
)),
Expr::GetIndexedField { expr, .. } => Ok(vec![expr.as_ref().to_owned()]),
}
}

Expand Down Expand Up @@ -496,6 +498,10 @@ pub fn rewrite_expression(expr: &Expr, expressions: &[Expr]) -> Result<Expr> {
Expr::Wildcard { .. } => Err(DataFusionError::Internal(
"Wildcard expressions are not valid in a logical query plan".to_owned(),
)),
Expr::GetIndexedField { expr: _, key } => Ok(Expr::GetIndexedField {
expr: Box::new(expressions[0].clone()),
key: key.clone(),
}),
}
}

Expand Down Expand Up @@ -650,6 +656,7 @@ impl ConstEvaluator {
Expr::Cast { .. } => true,
Expr::TryCast { .. } => true,
Expr::InList { .. } => true,
Expr::GetIndexedField { .. } => true,
}
}

Expand Down
Loading

0 comments on commit 22a7d74

Please sign in to comment.