Skip to content

Failed optimizations with Int64 type #15291

@aectaan

Description

@aectaan

Describe the bug

Datafusion optimizer produce different behaviour with different types of arguments in request. Also behaviour is dependent on positions of arguments.

To Reproduce

Add this test to optimiser_integrations.rs

#[test]
fn test_prepared_request() {
    let successful = r#"
        PREPARE req(BIGINT) AS
        WITH aggregations_group AS (
            SELECT
                count(col_utf8) FILTER (WHERE $1 - 1 <= col_int32) as foo,
                count(col_utf8) FILTER (WHERE $1 - 2 <= col_int32 AND col_uint32 >= 0) as bar,
                count(col_utf8) FILTER (WHERE $1 - 2 <= col_int32 AND col_uint32 >= 0 AND col_uint32 >= 0) as baz
            FROM test
        )
        SELECT * FROM aggregations_group
        "#;

    test_pgsql(successful).unwrap();

    let failed = r#"
            PREPARE req(BIGINT) AS
            WITH aggregations_group AS (
                SELECT
                    count(col_utf8) FILTER (WHERE $1 - 1 <= col_int64) as foo,
                    count(col_utf8) FILTER (WHERE $1 - 2 <= col_int64 AND col_uint32 >= 0) as bar,
                    count(col_utf8) FILTER (WHERE $1 - 2 <= col_int64 AND col_uint32 >= 0 AND col_uint32 >= 0) as baz
                FROM test
            )
            SELECT * FROM aggregations_group
            "#;

    test_pgsql(failed).unwrap();
}

fn test_pgsql(sql: &str) -> Result<LogicalPlan> {
    // parse the SQL
    let dialect = PostgreSqlDialect {}; // or AnsiDialect, or your own dialect ...
    let ast: Vec<Statement> = Parser::parse_sql(&dialect, sql).unwrap();
    let statement = &ast[0];
    let context_provider = MyContextProvider::default()
        .with_udaf(sum_udaf())
        .with_udaf(count_udaf())
        .with_udaf(avg_udaf())
        .with_expr_planners(vec![
            Arc::new(AggregateFunctionPlanner),
            Arc::new(WindowFunctionPlanner),
        ]);

    let sql_to_rel = SqlToRel::new(&context_provider);
    let plan = sql_to_rel.sql_statement_to_plan(statement.clone())?;

    let config = OptimizerContext::new().with_skip_failing_rules(false);
    let analyzer = Analyzer::new();
    let optimizer = Optimizer::new();
    // analyze and optimize the logical plan
    let plan = analyzer.execute_and_check(plan, config.options(), |_, _| {})?;
    optimizer.optimize(plan, &config, observe)
}

Also add a Field::new("col_int64", DataType::Int64, true) column to test table.

Failed request will execute succesfuly with any of this changes:

  • remove redundant condition
let failed = r#"
            PREPARE req(BIGINT) AS
            WITH aggregations_group AS (
                SELECT
                    count(col_utf8) FILTER (WHERE $1 - 1 <= col_int64) as foo,
                    count(col_utf8) FILTER (WHERE $1 - 2 <= col_int64 AND col_uint32 >= 0) as bar,
                    count(col_utf8) FILTER (WHERE $1 - 2 <= col_int64 AND col_uint32 >= 0) as baz
                FROM test
            )
            SELECT * FROM aggregations_group
            "#;
  • swap order of expressions
let failed = r#"
            PREPARE req(BIGINT) AS
            WITH aggregations_group AS (
                SELECT
                    count(col_utf8) FILTER (WHERE $1 - 1 <= col_int64) as foo,
                    count(col_utf8) FILTER (WHERE $1 - 2 <= col_int64 AND col_uint32 >= 0) as bar,
                    count(col_utf8) FILTER (WHERE col_uint32 >= 0 AND $1 - 2 <= col_int64 AND col_uint32 >= 0) as baz
                FROM test
            )
            SELECT * FROM aggregations_group
            "#;

Expected behavior

Expected to get successful result not dependent on types or expression order

Additional context

No response

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions