diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs index 804e14bf72fb..020916debd70 100644 --- a/datafusion/common/src/dfschema.rs +++ b/datafusion/common/src/dfschema.rs @@ -206,6 +206,25 @@ impl DFSchema { Ok(dfschema) } + /// Return the same schema, where all fields have a given qualifier. + pub fn with_field_specific_qualified_schema( + &self, + qualifiers: Vec>, + ) -> Result { + if qualifiers.len() != self.fields().len() { + return _plan_err!( + "Number of qualifiers must match number of fields. Expected {}, got {}", + self.fields().len(), + qualifiers.len() + ); + } + Ok(DFSchema { + inner: Arc::clone(&self.inner), + field_qualifiers: qualifiers, + functional_dependencies: self.functional_dependencies.clone(), + }) + } + /// Check if the schema have some fields with the same name pub fn check_names(&self) -> Result<()> { let mut qualified_names = BTreeSet::new(); diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index c65fcb4c4c93..6c2a10a54ea7 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -89,6 +89,8 @@ use datafusion_physical_optimizer::PhysicalOptimizerRule; use datafusion_physical_plan::execution_plan::InvariantLevel; use datafusion_physical_plan::placeholder_row::PlaceholderRowExec; use datafusion_physical_plan::unnest::ListUnnest; +use datafusion_sql::TableReference; +use sqlparser::ast::NullTreatment; use crate::schema_equivalence::schema_satisfied_by; use async_trait::async_trait; @@ -96,7 +98,6 @@ use datafusion_datasource::file_groups::FileGroup; use futures::{StreamExt, TryStreamExt}; use itertools::{multiunzip, Itertools}; use log::{debug, trace}; -use sqlparser::ast::NullTreatment; use tokio::sync::Mutex; /// Physical query planner that converts a `LogicalPlan` to an @@ -890,8 +891,8 @@ impl DefaultPhysicalPlanner { // 2 Children LogicalPlan::Join(Join { - left, - right, + left: original_left, + right: original_right, on: keys, filter, join_type, @@ -916,23 +917,25 @@ impl DefaultPhysicalPlanner { let (left, left_col_keys, left_projected) = wrap_projection_for_join_if_necessary( &left_keys, - left.as_ref().clone(), + original_left.as_ref().clone(), )?; let (right, right_col_keys, right_projected) = wrap_projection_for_join_if_necessary( &right_keys, - right.as_ref().clone(), + original_right.as_ref().clone(), )?; let column_on = (left_col_keys, right_col_keys); let left = Arc::new(left); let right = Arc::new(right); - let new_join = LogicalPlan::Join(Join::try_new_with_project_input( + let (new_join, requalified) = Join::try_new_with_project_input( node, Arc::clone(&left), Arc::clone(&right), column_on, - )?); + )?; + + let new_join = LogicalPlan::Join(new_join); // If inputs were projected then create ExecutionPlan for these new // LogicalPlan nodes. @@ -965,8 +968,24 @@ impl DefaultPhysicalPlanner { // Remove temporary projected columns if left_projected || right_projected { - let final_join_result = - join_schema.iter().map(Expr::from).collect::>(); + // Re-qualify the join schema only if the inputs were previously requalified in + // `try_new_with_project_input`. This ensures that when building the Projection + // it can correctly resolve field nullability and data types + // by disambiguating fields from the left and right sides of the join. + let qualified_join_schema = if requalified { + Arc::new(qualify_join_schema_sides( + join_schema, + original_left, + original_right, + )?) + } else { + Arc::clone(join_schema) + }; + + let final_join_result = qualified_join_schema + .iter() + .map(Expr::from) + .collect::>(); let projection = LogicalPlan::Projection(Projection::try_new( final_join_result, Arc::new(new_join), @@ -1463,6 +1482,64 @@ fn get_null_physical_expr_pair( Ok((Arc::new(null_value), physical_name)) } +/// Qualifies the fields in a join schema with "left" and "right" qualifiers +/// without mutating the original schema. This function should only be used when +/// the join inputs have already been requalified earlier in `try_new_with_project_input`. +/// +/// The purpose is to avoid ambiguity errors later in planning (e.g., in nullability or data type resolution) +/// when converting expressions to fields. +fn qualify_join_schema_sides( + join_schema: &DFSchema, + left: &LogicalPlan, + right: &LogicalPlan, +) -> Result { + let left_fields = left.schema().fields(); + let right_fields = right.schema().fields(); + let join_fields = join_schema.fields(); + + // Validate lengths + if join_fields.len() != left_fields.len() + right_fields.len() { + return internal_err!( + "Join schema field count must match left and right field count." + ); + } + + // Validate field names match + for (i, (field, expected)) in join_fields + .iter() + .zip(left_fields.iter().chain(right_fields.iter())) + .enumerate() + { + if field.name() != expected.name() { + return internal_err!( + "Field name mismatch at index {}: expected '{}', found '{}'", + i, + expected.name(), + field.name() + ); + } + } + + // qualify sides + let qualifiers = join_fields + .iter() + .enumerate() + .map(|(i, _)| { + if i < left_fields.len() { + Some(TableReference::Bare { + table: Arc::from("left"), + }) + } else { + Some(TableReference::Bare { + table: Arc::from("right"), + }) + } + }) + .collect(); + + join_schema.with_field_specific_qualified_schema(qualifiers) +} + fn get_physical_expr_pair( expr: &Expr, input_dfschema: &DFSchema, diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index 533e81e64f29..7baff601e0b8 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -1646,6 +1646,38 @@ pub fn build_join_schema( dfschema.with_functional_dependencies(func_dependencies) } +/// (Re)qualify the sides of a join if needed, i.e. if the columns from one side would otherwise +/// conflict with the columns from the other. +/// This is especially useful for queries that come as Substrait, since Substrait doesn't currently allow specifying +/// aliases, neither for columns nor for tables. DataFusion requires columns to be uniquely identifiable, in some +/// places (see e.g. DFSchema::check_names). +/// The function returns: +/// - The requalified or original left logical plan +/// - The requalified or original right logical plan +/// - If a requalification was needed or not +pub fn requalify_sides_if_needed( + left: LogicalPlanBuilder, + right: LogicalPlanBuilder, +) -> Result<(LogicalPlanBuilder, LogicalPlanBuilder, bool)> { + let left_cols = left.schema().columns(); + let right_cols = right.schema().columns(); + if left_cols.iter().any(|l| { + right_cols.iter().any(|r| { + l == r || (l.name == r.name && (l.relation.is_none() || r.relation.is_none())) + }) + }) { + // These names have no connection to the original plan, but they'll make the columns + // (mostly) unique. + Ok(( + left.alias(TableReference::bare("left"))?, + right.alias(TableReference::bare("right"))?, + true, + )) + } else { + Ok((left, right, false)) + } +} + /// Add additional "synthetic" group by expressions based on functional /// dependencies. /// diff --git a/datafusion/expr/src/logical_plan/mod.rs b/datafusion/expr/src/logical_plan/mod.rs index a55f4d97b212..444fb50cc5b5 100644 --- a/datafusion/expr/src/logical_plan/mod.rs +++ b/datafusion/expr/src/logical_plan/mod.rs @@ -27,8 +27,9 @@ mod statement; pub mod tree_node; pub use builder::{ - build_join_schema, table_scan, union, wrap_projection_for_join_if_necessary, - LogicalPlanBuilder, LogicalPlanBuilderOptions, LogicalTableSource, UNNAMED_TABLE, + build_join_schema, requalify_sides_if_needed, table_scan, union, + wrap_projection_for_join_if_necessary, LogicalPlanBuilder, LogicalPlanBuilderOptions, + LogicalTableSource, UNNAMED_TABLE, }; pub use ddl::{ CreateCatalog, CreateCatalogSchema, CreateExternalTable, CreateFunction, diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 5bc07cf6213e..f7052ca25429 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -43,9 +43,10 @@ use crate::utils::{ grouping_set_expr_count, grouping_set_to_exprlist, split_conjunction, }; use crate::{ - build_join_schema, expr_vec_fmt, BinaryExpr, CreateMemoryTable, CreateView, Execute, - Expr, ExprSchemable, LogicalPlanBuilder, Operator, Prepare, - TableProviderFilterPushDown, TableSource, WindowFunctionDefinition, + build_join_schema, expr_vec_fmt, requalify_sides_if_needed, BinaryExpr, + CreateMemoryTable, CreateView, Execute, Expr, ExprSchemable, LogicalPlanBuilder, + Operator, Prepare, TableProviderFilterPushDown, TableSource, + WindowFunctionDefinition, }; use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; @@ -3750,37 +3751,61 @@ impl Join { }) } - /// Create Join with input which wrapped with projection, this method is used to help create physical join. + /// Create Join with input which wrapped with projection, this method is used in physcial planning only to help + /// create the physical join. pub fn try_new_with_project_input( original: &LogicalPlan, left: Arc, right: Arc, column_on: (Vec, Vec), - ) -> Result { + ) -> Result<(Self, bool)> { let original_join = match original { LogicalPlan::Join(join) => join, _ => return plan_err!("Could not create join with project input"), }; + let mut left_sch = LogicalPlanBuilder::from(Arc::clone(&left)); + let mut right_sch = LogicalPlanBuilder::from(Arc::clone(&right)); + + let mut requalified = false; + + // By definition, the resulting schema of an inner/left/right & full join will have first the left side fields and then the right, + // potentially having duplicate field names. Note this will only qualify fields if they have not been qualified before. + if original_join.join_type == JoinType::Inner + || original_join.join_type == JoinType::Left + || original_join.join_type == JoinType::Right + || original_join.join_type == JoinType::Full + { + (left_sch, right_sch, requalified) = + requalify_sides_if_needed(left_sch.clone(), right_sch.clone())?; + } + let on: Vec<(Expr, Expr)> = column_on .0 .into_iter() .zip(column_on.1) .map(|(l, r)| (Expr::Column(l), Expr::Column(r))) .collect(); - let join_schema = - build_join_schema(left.schema(), right.schema(), &original_join.join_type)?; - Ok(Join { - left, - right, - on, - filter: original_join.filter.clone(), - join_type: original_join.join_type, - join_constraint: original_join.join_constraint, - schema: Arc::new(join_schema), - null_equals_null: original_join.null_equals_null, - }) + let join_schema = build_join_schema( + left_sch.schema(), + right_sch.schema(), + &original_join.join_type, + )?; + + Ok(( + Join { + left, + right, + on, + filter: original_join.filter.clone(), + join_type: original_join.join_type, + join_constraint: original_join.join_constraint, + schema: Arc::new(join_schema), + null_equals_null: original_join.null_equals_null, + }, + requalified, + )) } } diff --git a/datafusion/substrait/src/logical_plan/consumer/rel/cross_rel.rs b/datafusion/substrait/src/logical_plan/consumer/rel/cross_rel.rs index a91366e47742..25c66a8e2297 100644 --- a/datafusion/substrait/src/logical_plan/consumer/rel/cross_rel.rs +++ b/datafusion/substrait/src/logical_plan/consumer/rel/cross_rel.rs @@ -15,9 +15,11 @@ // specific language governing permissions and limitations // under the License. -use crate::logical_plan::consumer::utils::requalify_sides_if_needed; use crate::logical_plan::consumer::SubstraitConsumer; use datafusion::logical_expr::{LogicalPlan, LogicalPlanBuilder}; + +use datafusion::logical_expr::requalify_sides_if_needed; + use substrait::proto::CrossRel; pub async fn from_cross_rel( @@ -30,6 +32,6 @@ pub async fn from_cross_rel( let right = LogicalPlanBuilder::from( consumer.consume_rel(cross.right.as_ref().unwrap()).await?, ); - let (left, right) = requalify_sides_if_needed(left, right)?; + let (left, right, _requalified) = requalify_sides_if_needed(left, right)?; left.cross_join(right.build()?)?.build() } diff --git a/datafusion/substrait/src/logical_plan/consumer/rel/join_rel.rs b/datafusion/substrait/src/logical_plan/consumer/rel/join_rel.rs index 881157dcfa66..3a7407fccdd0 100644 --- a/datafusion/substrait/src/logical_plan/consumer/rel/join_rel.rs +++ b/datafusion/substrait/src/logical_plan/consumer/rel/join_rel.rs @@ -15,13 +15,14 @@ // specific language governing permissions and limitations // under the License. -use crate::logical_plan::consumer::utils::requalify_sides_if_needed; use crate::logical_plan::consumer::SubstraitConsumer; use datafusion::common::{not_impl_err, plan_err, Column, JoinType}; +use datafusion::logical_expr::requalify_sides_if_needed; use datafusion::logical_expr::utils::split_conjunction; use datafusion::logical_expr::{ BinaryExpr, Expr, LogicalPlan, LogicalPlanBuilder, Operator, }; + use substrait::proto::{join_rel, JoinRel}; pub async fn from_join_rel( @@ -38,7 +39,7 @@ pub async fn from_join_rel( let right = LogicalPlanBuilder::from( consumer.consume_rel(join.right.as_ref().unwrap()).await?, ); - let (left, right) = requalify_sides_if_needed(left, right)?; + let (left, right, _requalified) = requalify_sides_if_needed(left, right)?; let join_type = from_substrait_jointype(join.r#type)?; // The join condition expression needs full input schema and not the output schema from join since we lose columns from diff --git a/datafusion/substrait/src/logical_plan/consumer/utils.rs b/datafusion/substrait/src/logical_plan/consumer/utils.rs index a267971ff8d3..3dc0fd55c46e 100644 --- a/datafusion/substrait/src/logical_plan/consumer/utils.rs +++ b/datafusion/substrait/src/logical_plan/consumer/utils.rs @@ -19,10 +19,9 @@ use crate::logical_plan::consumer::SubstraitConsumer; use datafusion::arrow::datatypes::{DataType, Field, FieldRef, Fields, Schema}; use datafusion::common::{ not_impl_err, substrait_datafusion_err, substrait_err, DFSchema, DFSchemaRef, - TableReference, }; use datafusion::logical_expr::expr::Sort; -use datafusion::logical_expr::{Cast, Expr, ExprSchemable, LogicalPlanBuilder}; +use datafusion::logical_expr::{Cast, Expr, ExprSchemable}; use std::collections::HashSet; use std::sync::Arc; use substrait::proto::sort_field::SortDirection; @@ -36,33 +35,6 @@ use substrait::proto::SortField; // https://github.com/apache/arrow-rs/blob/ee5694078c86c8201549654246900a4232d531a9/arrow-cast/src/cast/mod.rs#L1749). pub(super) const DEFAULT_TIMEZONE: &str = "UTC"; -/// (Re)qualify the sides of a join if needed, i.e. if the columns from one side would otherwise -/// conflict with the columns from the other. -/// Substrait doesn't currently allow specifying aliases, neither for columns nor for tables. For -/// Substrait the names don't matter since it only refers to columns by indices, however DataFusion -/// requires columns to be uniquely identifiable, in some places (see e.g. DFSchema::check_names). -pub(super) fn requalify_sides_if_needed( - left: LogicalPlanBuilder, - right: LogicalPlanBuilder, -) -> datafusion::common::Result<(LogicalPlanBuilder, LogicalPlanBuilder)> { - let left_cols = left.schema().columns(); - let right_cols = right.schema().columns(); - if left_cols.iter().any(|l| { - right_cols.iter().any(|r| { - l == r || (l.name == r.name && (l.relation.is_none() || r.relation.is_none())) - }) - }) { - // These names have no connection to the original plan, but they'll make the columns - // (mostly) unique. - Ok(( - left.alias(TableReference::bare("left"))?, - right.alias(TableReference::bare("right"))?, - )) - } else { - Ok((left, right)) - } -} - pub(super) fn next_struct_field_name( column_idx: usize, dfs_names: &[String], diff --git a/datafusion/substrait/tests/cases/consumer_integration.rs b/datafusion/substrait/tests/cases/consumer_integration.rs index 4a121e41d27e..48bba45655c4 100644 --- a/datafusion/substrait/tests/cases/consumer_integration.rs +++ b/datafusion/substrait/tests/cases/consumer_integration.rs @@ -584,4 +584,33 @@ mod tests { Ok(()) } + + #[tokio::test] + async fn test_join_with_expression_key() -> Result<()> { + let plan_str = test_plan_to_string("join_with_expression_key.json").await?; + assert_snapshot!( + plan_str, + @r#" + Projection: left.index_name AS index, right.upper(host) AS host, left.max(size_bytes) AS idx_size, right.max(total_bytes) AS db_size, CAST(left.max(size_bytes) AS Float64) / CAST(right.max(total_bytes) AS Float64) * Float64(100) AS pct_of_db + Inner Join: left.upper(host) = right.upper(host) + SubqueryAlias: left + Aggregate: groupBy=[[index_name, upper(host)]], aggr=[[max(size_bytes)]] + Projection: size_bytes, index_name, upper(host) + Filter: index_name = Utf8("aaa") + Values: (Utf8("aaa"), Utf8("host-a"), Int64(128)), (Utf8("bbb"), Utf8("host-b"), Int64(256)) + SubqueryAlias: right + Aggregate: groupBy=[[upper(host)]], aggr=[[max(total_bytes)]] + Projection: total_bytes, upper(host) + Inner Join: Filter: upper(host) = upper(host) + Values: (Utf8("host-a"), Int64(107)), (Utf8("host-b"), Int64(214)) + Projection: upper(host) + Aggregate: groupBy=[[index_name, upper(host)]], aggr=[[max(size_bytes)]] + Projection: size_bytes, index_name, upper(host) + Filter: index_name = Utf8("aaa") + Values: (Utf8("aaa"), Utf8("host-a"), Int64(128)), (Utf8("bbb"), Utf8("host-b"), Int64(256)) + "# + ); + + Ok(()) + } } diff --git a/datafusion/substrait/tests/testdata/test_plans/join_with_expression_key.json b/datafusion/substrait/tests/testdata/test_plans/join_with_expression_key.json new file mode 100644 index 000000000000..73fa06eea5f0 --- /dev/null +++ b/datafusion/substrait/tests/testdata/test_plans/join_with_expression_key.json @@ -0,0 +1,814 @@ +{ + "extensionUris": [{ + "extensionUriAnchor": 3, + "uri": "/functions_arithmetic.yaml" + }, { + "extensionUriAnchor": 2, + "uri": "/functions_string.yaml" + }, { + "extensionUriAnchor": 1, + "uri": "/functions_comparison.yaml" + }], + "extensions": [{ + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 0, + "name": "equal:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 1, + "name": "upper:str" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 2, + "name": "max:i64" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 3, + "name": "multiply:fp64_fp64" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 4, + "name": "divide:fp64_fp64" + } + }], + "relations": [{ + "root": { + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [5, 6, 7, 8, 9] + } + }, + "input": { + "join": { + "common": { + "direct": { + } + }, + "left": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [3, 4, 5] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["index_name", "host", "size_bytes"], + "struct": { + "types": [{ + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }], + "nullability": "NULLABILITY_REQUIRED" + } + }, + "virtualTable": { + "values": [{ + "fields": [{ + "string": "aaa", + "nullable": true + }, { + "string": "host-a", + "nullable": true + }, { + "i64": "128", + "nullable": true + }] + }, { + "fields": [{ + "string": "bbb", + "nullable": true + }, { + "string": "host-b", + "nullable": true + }, { + "i64": "256", + "nullable": true + }] + }] + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "outputType": { + "bool": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "string": "aaa" + } + } + }] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, { + "scalarFunction": { + "functionReference": 1, + "outputType": { + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }] + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }] + }], + "measures": [{ + "measure": { + "functionReference": 2, + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }] + } + }] + } + }, + "right": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [3, 4] + } + }, + "input": { + "join": { + "common": { + "direct": { + } + }, + "left": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["host", "total_bytes"], + "struct": { + "types": [{ + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }], + "nullability": "NULLABILITY_REQUIRED" + } + }, + "virtualTable": { + "values": [{ + "fields": [{ + "string": "host-a", + "nullable": true + }, { + "i64": "107", + "nullable": true + }] + }, { + "fields": [{ + "string": "host-b", + "nullable": true + }, { + "i64": "214", + "nullable": true + }] + }] + } + } + }, + "right": { + "project": { + "common": { + "emit": { + "outputMapping": [3] + } + }, + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [3, 4, 5] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["index_name", "host", "size_bytes"], + "struct": { + "types": [{ + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }], + "nullability": "NULLABILITY_REQUIRED" + } + }, + "virtualTable": { + "values": [{ + "fields": [{ + "string": "aaa", + "nullable": true + }, { + "string": "host-a", + "nullable": true + }, { + "i64": "128", + "nullable": true + }] + }, { + "fields": [{ + "string": "bbb", + "nullable": true + }, { + "string": "host-b", + "nullable": true + }, { + "i64": "256", + "nullable": true + }] + }] + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "outputType": { + "bool": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "string": "aaa" + } + } + }] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, { + "scalarFunction": { + "functionReference": 1, + "outputType": { + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }] + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }] + }], + "measures": [{ + "measure": { + "functionReference": 2, + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }] + } + }] + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }] + } + }, + "expression": { + "scalarFunction": { + "functionReference": 0, + "outputType": { + "bool": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 1, + "outputType": { + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }] + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + } + }] + } + }, + "type": "JOIN_TYPE_INNER" + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, { + "scalarFunction": { + "functionReference": 1, + "outputType": { + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }] + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }] + }], + "measures": [{ + "measure": { + "functionReference": 2, + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }] + } + }] + } + }, + "expression": { + "scalarFunction": { + "functionReference": 0, + "outputType": { + "bool": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 3 + } + }, + "rootReference": { + } + } + } + }] + } + }, + "type": "JOIN_TYPE_INNER" + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 3 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + }, { + "scalarFunction": { + "functionReference": 3, + "outputType": { + "fp64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 4, + "outputType": { + "fp64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "cast": { + "type": { + "fp64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_THROW_EXCEPTION" + } + } + }, { + "value": { + "cast": { + "type": { + "fp64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_THROW_EXCEPTION" + } + } + }] + } + } + }, { + "value": { + "literal": { + "fp64": 100.0 + } + } + }] + } + }] + } + }, + "names": ["index", "host", "idx_size", "db_size", "pct_of_db"] + } + }] +} \ No newline at end of file