diff --git a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run01.sql b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run01.sql index 7aa4170eab0985..5cc0a0ea685e37 100644 --- a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run01.sql +++ b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run01.sql @@ -22,4 +22,37 @@ insert into test_tb_mix_format values (1,1,'b'),(2,1,'b'),(3,1,'b'),(4,1,'b'),(5 -- update some data, these splits will be readed by jni insert into test_tb_mix_format values (1,2,'b'),(2,2,'b'),(3,2,'b'),(4,2,'b'),(5,2,'b'); -- delete foramt in table properties, doris should get format by file name -alter table test_tb_mix_format unset TBLPROPERTIES ('file.format'); \ No newline at end of file +alter table test_tb_mix_format unset TBLPROPERTIES ('file.format'); + +drop table if exists two_partition; +CREATE TABLE two_partition ( + id BIGINT, + create_date STRING, + region STRING +) PARTITIONED BY (create_date,region) TBLPROPERTIES ( + 'primary-key' = 'create_date,region,id', + 'bucket'=10, + 'file.format'='orc' +); + +insert into two_partition values(1,'2020-01-01','bj'); +insert into two_partition values(2,'2020-01-01','sh'); +insert into two_partition values(3,'2038-01-01','bj'); +insert into two_partition values(4,'2038-01-01','sh'); +insert into two_partition values(5,'2038-01-02','bj'); + +drop table if exists null_partition; +CREATE TABLE null_partition ( + id BIGINT, + region STRING +) PARTITIONED BY (region) TBLPROPERTIES ( + 'primary-key' = 'region,id', + 'bucket'=10, + 'file.format'='orc' +); +-- null NULL "null" all will be in partition [null] +insert into null_partition values(1,'bj'); +insert into null_partition values(2,null); +insert into null_partition values(3,NULL); +insert into null_partition values(4,'null'); +insert into null_partition values(5,'NULL'); \ No newline at end of file diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/MTMVPartitionDefinition.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/MTMVPartitionDefinition.java index c2e9abd2f0f97c..8624bed9ceefae 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/MTMVPartitionDefinition.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/MTMVPartitionDefinition.java @@ -25,8 +25,8 @@ import org.apache.doris.analysis.FunctionParams; import org.apache.doris.analysis.SlotRef; import org.apache.doris.analysis.StringLiteral; +import org.apache.doris.catalog.PartitionType; import org.apache.doris.common.DdlException; -import org.apache.doris.datasource.hive.HMSExternalTable; import org.apache.doris.mtmv.MTMVPartitionExprFactory; import org.apache.doris.mtmv.MTMVPartitionInfo; import org.apache.doris.mtmv.MTMVPartitionInfo.MTMVPartitionType; @@ -136,9 +136,9 @@ private RelatedTableInfo getRelatedTableInfo(NereidsPlanner planner, String part if (!partitionColumnNames.contains(relatedTableInfo.getColumn())) { throw new AnalysisException("error related column: " + relatedTableInfo.getColumn()); } - if (!(mtmvBaseRelatedTable instanceof HMSExternalTable) + if (!(mtmvBaseRelatedTable.getPartitionType(Optional.empty()).equals(PartitionType.LIST)) && partitionColumnNames.size() != 1) { - throw new AnalysisException("only hms table support multi column partition."); + throw new AnalysisException("only List PartitionType support multi column partition."); } return relatedTableInfo; } diff --git a/regression-test/data/mtmv_p0/test_paimon_mtmv.out b/regression-test/data/mtmv_p0/test_paimon_mtmv.out index c28b7cb7baca22..ba6fc06c1d2491 100644 --- a/regression-test/data/mtmv_p0/test_paimon_mtmv.out +++ b/regression-test/data/mtmv_p0/test_paimon_mtmv.out @@ -111,3 +111,32 @@ false -- !not_partition_after -- true +-- !join_one_partition -- +1 2 a 1 2 +10 1 a \N \N +2 2 a \N \N +3 2 a \N \N +4 2 a \N \N +5 2 a \N \N +6 1 a \N \N +7 1 a \N \N +8 1 a \N \N +9 1 a \N \N + +-- !two_partition -- +1 2020-01-01 bj +2 2020-01-01 sh +3 2038-01-01 bj +4 2038-01-01 sh +5 2038-01-02 bj + +-- !limit_partition -- +3 2038-01-01 bj +4 2038-01-01 sh +5 2038-01-02 bj + +-- !null_partition -- +1 bj +4 null +5 NULL + diff --git a/regression-test/data/mtmv_p0/test_paimon_olap_rewrite_mtmv.out b/regression-test/data/mtmv_p0/test_paimon_olap_rewrite_mtmv.out new file mode 100644 index 00000000000000..09d23b7736e1d8 --- /dev/null +++ b/regression-test/data/mtmv_p0/test_paimon_olap_rewrite_mtmv.out @@ -0,0 +1,79 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !refresh_one_partition -- +1 2 a 1 2 +10 1 a \N \N +2 2 a \N \N +3 2 a \N \N +4 2 a \N \N +5 2 a \N \N +6 1 a \N \N +7 1 a \N \N +8 1 a \N \N +9 1 a \N \N + +-- !refresh_one_partition_rewrite -- +1 2 a 1 2 +1 2 b 1 2 +10 1 a \N \N +10 1 b \N \N +2 2 a \N \N +2 2 b \N \N +3 2 a \N \N +3 2 b \N \N +4 2 a \N \N +4 2 b \N \N +5 2 a \N \N +5 2 b \N \N +6 1 a \N \N +6 1 b \N \N +7 1 a \N \N +7 1 b \N \N +8 1 a \N \N +8 1 b \N \N +9 1 a \N \N +9 1 b \N \N + +-- !refresh_auto -- +1 2 a 1 2 +1 2 b 1 2 +10 1 a \N \N +10 1 b \N \N +2 2 a \N \N +2 2 b \N \N +3 2 a \N \N +3 2 b \N \N +4 2 a \N \N +4 2 b \N \N +5 2 a \N \N +5 2 b \N \N +6 1 a \N \N +6 1 b \N \N +7 1 a \N \N +7 1 b \N \N +8 1 a \N \N +8 1 b \N \N +9 1 a \N \N +9 1 b \N \N + +-- !refresh_all_partition_rewrite -- +1 2 a 1 2 +1 2 b 1 2 +10 1 a \N \N +10 1 b \N \N +2 2 a \N \N +2 2 b \N \N +3 2 a \N \N +3 2 b \N \N +4 2 a \N \N +4 2 b \N \N +5 2 a \N \N +5 2 b \N \N +6 1 a \N \N +6 1 b \N \N +7 1 a \N \N +7 1 b \N \N +8 1 a \N \N +8 1 b \N \N +9 1 a \N \N +9 1 b \N \N + diff --git a/regression-test/suites/mtmv_p0/test_paimon_mtmv.groovy b/regression-test/suites/mtmv_p0/test_paimon_mtmv.groovy index f2989edbf6dfd6..48d63e03ec3db5 100644 --- a/regression-test/suites/mtmv_p0/test_paimon_mtmv.groovy +++ b/regression-test/suites/mtmv_p0/test_paimon_mtmv.groovy @@ -25,6 +25,24 @@ suite("test_paimon_mtmv", "p0,external,mtmv,external_docker,external_docker_dori String catalogName = "${suiteName}_catalog" String mvName = "${suiteName}_mv" String dbName = context.config.getDbNameByFile(context.file) + String otherDbName = "${suiteName}_otherdb" + String tableName = "${suiteName}_table" + + sql """drop database if exists ${otherDbName}""" + sql """create database ${otherDbName}""" + sql """ + CREATE TABLE ${otherDbName}.${tableName} ( + `user_id` INT, + `num` INT + ) ENGINE=OLAP + DUPLICATE KEY(`user_id`) + DISTRIBUTED BY HASH(`user_id`) BUCKETS 2 + PROPERTIES ('replication_num' = '1') ; + """ + + sql """ + insert into ${otherDbName}.${tableName} values(1,2); + """ String minio_port = context.config.otherConfigs.get("iceberg_minio_port") String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") @@ -99,8 +117,10 @@ suite("test_paimon_mtmv", "p0,external,mtmv,external_docker,external_docker_dori sql """ CREATE MATERIALIZED VIEW ${mvName} BUILD DEFERRED REFRESH AUTO ON MANUAL - DISTRIBUTED BY RANDOM BUCKETS 2 - PROPERTIES ('replication_num' = '1') + KEY(`id`) + COMMENT "comment1" + DISTRIBUTED BY HASH(`id`) BUCKETS 2 + PROPERTIES ('replication_num' = '1',"grace_period"="333") AS SELECT * FROM ${catalogName}.`test_paimon_spark`.test_tb_mix_format; """ @@ -113,6 +133,137 @@ suite("test_paimon_mtmv", "p0,external,mtmv,external_docker,external_docker_dori order_qt_not_partition "SELECT * FROM ${mvName} " order_qt_not_partition_after "select SyncWithBaseTables from mv_infos('database'='${dbName}') where Name='${mvName}'" sql """drop materialized view if exists ${mvName};""" + + // refresh on schedule + sql """ + CREATE MATERIALIZED VIEW ${mvName} + BUILD IMMEDIATE REFRESH COMPLETE ON SCHEDULE EVERY 10 SECOND STARTS "9999-12-13 21:07:09" + KEY(`id`) + COMMENT "comment1" + DISTRIBUTED BY HASH(`id`) BUCKETS 2 + PROPERTIES ('replication_num' = '1',"grace_period"="333") + AS + SELECT * FROM ${catalogName}.`test_paimon_spark`.test_tb_mix_format; + """ + waitingMTMVTaskFinishedByMvName(mvName) + sql """drop materialized view if exists ${mvName};""" + + // refresh on schedule + sql """ + CREATE MATERIALIZED VIEW ${mvName} + BUILD IMMEDIATE REFRESH AUTO ON commit + KEY(`id`) + COMMENT "comment1" + DISTRIBUTED BY HASH(`id`) BUCKETS 2 + PROPERTIES ('replication_num' = '1',"grace_period"="333") + AS + SELECT * FROM ${catalogName}.`test_paimon_spark`.test_tb_mix_format; + """ + waitingMTMVTaskFinishedByMvName(mvName) + sql """drop materialized view if exists ${mvName};""" + + // cross db and join internal table + sql """ + CREATE MATERIALIZED VIEW ${mvName} + BUILD DEFERRED REFRESH AUTO ON MANUAL + partition by(`par`) + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1') + AS + SELECT * FROM ${catalogName}.`test_paimon_spark`.test_tb_mix_format a left join internal.${otherDbName}.${tableName} b on a.id=b.user_id; + """ + def showJoinPartitionsResult = sql """show partitions from ${mvName}""" + logger.info("showJoinPartitionsResult: " + showJoinPartitionsResult.toString()) + assertTrue(showJoinPartitionsResult.toString().contains("p_a")) + assertTrue(showJoinPartitionsResult.toString().contains("p_b")) + + sql """ + REFRESH MATERIALIZED VIEW ${mvName} partitions(p_a); + """ + waitingMTMVTaskFinishedByMvName(mvName) + order_qt_join_one_partition "SELECT * FROM ${mvName} " + sql """drop materialized view if exists ${mvName};""" + + sql """ + CREATE MATERIALIZED VIEW ${mvName} + BUILD DEFERRED REFRESH AUTO ON MANUAL + partition by(`create_date`) + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1') + AS + SELECT * FROM ${catalogName}.`test_paimon_spark`.two_partition; + """ + def showTwoPartitionsResult = sql """show partitions from ${mvName}""" + logger.info("showTwoPartitionsResult: " + showTwoPartitionsResult.toString()) + assertTrue(showTwoPartitionsResult.toString().contains("p_20200101")) + assertTrue(showTwoPartitionsResult.toString().contains("p_20380101")) + assertTrue(showTwoPartitionsResult.toString().contains("p_20380102")) + sql """ + REFRESH MATERIALIZED VIEW ${mvName} auto; + """ + waitingMTMVTaskFinishedByMvName(mvName) + order_qt_two_partition "SELECT * FROM ${mvName} " + sql """drop materialized view if exists ${mvName};""" + + sql """ + CREATE MATERIALIZED VIEW ${mvName} + BUILD DEFERRED REFRESH AUTO ON MANUAL + partition by(`create_date`) + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1','partition_sync_limit'='2','partition_date_format'='%Y-%m-%d', + 'partition_sync_time_unit'='MONTH') + AS + SELECT * FROM ${catalogName}.`test_paimon_spark`.two_partition; + """ + def showLimitPartitionsResult = sql """show partitions from ${mvName}""" + logger.info("showLimitPartitionsResult: " + showLimitPartitionsResult.toString()) + assertFalse(showLimitPartitionsResult.toString().contains("p_20200101")) + assertTrue(showLimitPartitionsResult.toString().contains("p_20380101")) + assertTrue(showLimitPartitionsResult.toString().contains("p_20380102")) + sql """ + REFRESH MATERIALIZED VIEW ${mvName} auto; + """ + waitingMTMVTaskFinishedByMvName(mvName) + order_qt_limit_partition "SELECT * FROM ${mvName} " + sql """drop materialized view if exists ${mvName};""" + + // not allow date trunc + test { + sql """ + CREATE MATERIALIZED VIEW ${mvName} + BUILD DEFERRED REFRESH AUTO ON MANUAL + partition by (date_trunc(`create_date`,'month')) + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1','partition_sync_limit'='2','partition_date_format'='%Y-%m-%d', + 'partition_sync_time_unit'='MONTH') + AS + SELECT * FROM ${catalogName}.`test_paimon_spark`.two_partition; + """ + exception "only support" + } + + sql """ + CREATE MATERIALIZED VIEW ${mvName} + BUILD DEFERRED REFRESH AUTO ON MANUAL + partition by(`region`) + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1') + AS + SELECT * FROM ${catalogName}.`test_paimon_spark`.null_partition; + """ + def showNullPartitionsResult = sql """show partitions from ${mvName}""" + logger.info("showNullPartitionsResult: " + showNullPartitionsResult.toString()) + assertTrue(showNullPartitionsResult.toString().contains("p_null")) + assertTrue(showNullPartitionsResult.toString().contains("p_NULL")) + assertTrue(showNullPartitionsResult.toString().contains("p_bj")) + sql """ + REFRESH MATERIALIZED VIEW ${mvName} auto; + """ + waitingMTMVTaskFinishedByMvName(mvName) + // Will lose null data + order_qt_null_partition "SELECT * FROM ${mvName} " + sql """drop materialized view if exists ${mvName};""" + sql """drop catalog if exists ${catalogName}""" } diff --git a/regression-test/suites/mtmv_p0/test_paimon_olap_rewrite_mtmv.groovy b/regression-test/suites/mtmv_p0/test_paimon_olap_rewrite_mtmv.groovy new file mode 100644 index 00000000000000..a3ac1c048d30da --- /dev/null +++ b/regression-test/suites/mtmv_p0/test_paimon_olap_rewrite_mtmv.groovy @@ -0,0 +1,115 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_paimon_olap_rewrite_mtmv", "p0,external,mtmv,external_docker,external_docker_doris") { + String enabled = context.config.otherConfigs.get("enablePaimonTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disabled paimon test") + return + } + String suiteName = "test_paimon_olap_rewrite_mtmv" + String catalogName = "${suiteName}_catalog" + String mvName = "${suiteName}_mv" + String dbName = context.config.getDbNameByFile(context.file) + String tableName = "${suiteName}_table" + sql """drop table if exists ${tableName}""" + sql """ + CREATE TABLE ${tableName} ( + `user_id` INT, + `num` INT + ) ENGINE=OLAP + DUPLICATE KEY(`user_id`) + DISTRIBUTED BY HASH(`user_id`) BUCKETS 2 + PROPERTIES ('replication_num' = '1') ; + """ + sql """ + insert into ${tableName} values(1,2); + """ + + sql """analyze table internal.`${dbName}`. ${tableName} with sync""" + sql """alter table internal.`${dbName}`. ${tableName} modify column user_id set stats ('row_count'='1');""" + + String minio_port = context.config.otherConfigs.get("iceberg_minio_port") + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + + sql """set materialized_view_rewrite_enable_contain_external_table=true;""" + String mvSql = "SELECT * FROM ${catalogName}.`test_paimon_spark`.test_tb_mix_format a left join ${tableName} b on a.id=b.user_id;"; + + sql """drop catalog if exists ${catalogName}""" + sql """CREATE CATALOG ${catalogName} PROPERTIES ( + 'type'='paimon', + 'warehouse' = 's3://warehouse/wh/', + "s3.access_key" = "admin", + "s3.secret_key" = "password", + "s3.endpoint" = "http://${externalEnvIp}:${minio_port}", + "s3.region" = "us-east-1" + );""" + + sql """analyze table ${catalogName}.`test_paimon_spark`.test_tb_mix_format with sync""" + sql """alter table ${catalogName}.`test_paimon_spark`.test_tb_mix_format modify column par set stats ('row_count'='20');""" + + sql """drop materialized view if exists ${mvName};""" + + sql """ + CREATE MATERIALIZED VIEW ${mvName} + BUILD DEFERRED REFRESH AUTO ON MANUAL + partition by(`par`) + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1') + AS + ${mvSql} + """ + def showPartitionsResult = sql """show partitions from ${mvName}""" + logger.info("showPartitionsResult: " + showPartitionsResult.toString()) + assertTrue(showPartitionsResult.toString().contains("p_a")) + assertTrue(showPartitionsResult.toString().contains("p_b")) + + // refresh one partitions + sql """ + REFRESH MATERIALIZED VIEW ${mvName} partitions(p_a); + """ + waitingMTMVTaskFinishedByMvName(mvName) + order_qt_refresh_one_partition "SELECT * FROM ${mvName} " + + def explainOnePartition = sql """ explain ${mvSql} """ + logger.info("explainOnePartition: " + explainOnePartition.toString()) + assertTrue(explainOnePartition.toString().contains("VUNION")) + order_qt_refresh_one_partition_rewrite "${mvSql}" + + mv_rewrite_success("${mvSql}", "${mvName}") + + // select p_b should not rewrite + mv_rewrite_fail("SELECT * FROM ${catalogName}.`test_paimon_spark`.test_tb_mix_format a left join ${tableName} b on a.id=b.user_id where a.par='b';", "${mvName}") + + //refresh auto + sql """ + REFRESH MATERIALIZED VIEW ${mvName} auto + """ + waitingMTMVTaskFinishedByMvName(mvName) + order_qt_refresh_auto "SELECT * FROM ${mvName} " + + def explainAllPartition = sql """ explain ${mvSql}; """ + logger.info("explainAllPartition: " + explainAllPartition.toString()) + assertTrue(explainAllPartition.toString().contains("VOlapScanNode")) + order_qt_refresh_all_partition_rewrite "${mvSql}" + + mv_rewrite_success("${mvSql}", "${mvName}") + + sql """drop materialized view if exists ${mvName};""" + sql """drop catalog if exists ${catalogName}""" +} + diff --git a/regression-test/suites/mtmv_p0/test_paimon_rewrite_mtmv.groovy b/regression-test/suites/mtmv_p0/test_paimon_rewrite_mtmv.groovy index 985443875c7b26..22a94d46635169 100644 --- a/regression-test/suites/mtmv_p0/test_paimon_rewrite_mtmv.groovy +++ b/regression-test/suites/mtmv_p0/test_paimon_rewrite_mtmv.groovy @@ -75,6 +75,9 @@ suite("test_paimon_rewrite_mtmv", "p0,external,mtmv,external_docker,external_doc mv_rewrite_success("${mvSql}", "${mvName}") + // select p_b should not rewrite + mv_rewrite_fail("SELECT par,count(*) as num FROM ${catalogName}.`test_paimon_spark`.test_tb_mix_format where par='b' group by par;", "${mvName}") + //refresh auto sql """ REFRESH MATERIALIZED VIEW ${mvName} auto diff --git a/regression-test/suites/mtmv_p0/test_partition_refresh_mtmv.groovy b/regression-test/suites/mtmv_p0/test_partition_refresh_mtmv.groovy index 8e084091f4d15a..21296fc5878874 100644 --- a/regression-test/suites/mtmv_p0/test_partition_refresh_mtmv.groovy +++ b/regression-test/suites/mtmv_p0/test_partition_refresh_mtmv.groovy @@ -113,20 +113,17 @@ suite("test_partition_refresh_mtmv") { PROPERTIES ('replication_num' = '1') ; """ - try { - sql """ - CREATE MATERIALIZED VIEW ${mvName} - BUILD DEFERRED REFRESH AUTO ON MANUAL - partition by(`date`) - DISTRIBUTED BY RANDOM BUCKETS 2 - PROPERTIES ('replication_num' = '1') - AS - SELECT * FROM ${tableNameNum}; - """ - Assert.fail(); - } catch (Exception e) { - log.info(e.getMessage()) - } + + sql """ + CREATE MATERIALIZED VIEW ${mvName} + BUILD DEFERRED REFRESH AUTO ON MANUAL + partition by(`date`) + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1') + AS + SELECT * FROM ${tableNameNum}; + """ + sql """drop table if exists `${tableNameNum}`""" sql """drop materialized view if exists ${mvName};"""