From 0cba9ba8ba904aa1c69b5efe97b5493797175c2a Mon Sep 17 00:00:00 2001 From: zy-kkk Date: Wed, 14 Aug 2024 21:14:40 +0800 Subject: [PATCH 01/13] [improvement](jdbc catalog) Compatible with ojdbc6 by adding version check (#39341) In previous versions, we used a method based on JDBC 4.2 to read data, so it was equivalent to abandoning support for ojdbc6. However, we recently found that a large number of users still use Oracle version 11g, which will have some unexpected compatibility issues when using ojdbc8 to connect. Therefore, I use version verification to make it compatible with both ojdbc6 and ojdbc8, so that good compatibility can be obtained through ojdbc6, and better reading efficiency can be obtained through ojdbc8. --- fe/be-java-extensions/jdbc-scanner/pom.xml | 4 + .../apache/doris/jdbc/BaseJdbcExecutor.java | 30 ++++- .../apache/doris/jdbc/OracleJdbcExecutor.java | 115 ++++++++++++------ fe/pom.xml | 6 + .../jdbc/test_oracle_jdbc_catalog.out | 4 + .../jdbc/test_oracle_jdbc_catalog.groovy | 16 +++ 6 files changed, 137 insertions(+), 38 deletions(-) diff --git a/fe/be-java-extensions/jdbc-scanner/pom.xml b/fe/be-java-extensions/jdbc-scanner/pom.xml index bebf1c4ffc48ba..54da3601cc4ba0 100644 --- a/fe/be-java-extensions/jdbc-scanner/pom.xml +++ b/fe/be-java-extensions/jdbc-scanner/pom.xml @@ -45,6 +45,10 @@ under the License. HikariCP provided + + org.semver4j + semver4j + diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/BaseJdbcExecutor.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/BaseJdbcExecutor.java index 9e731f051a76d8..b25294021ee8f0 100644 --- a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/BaseJdbcExecutor.java +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/BaseJdbcExecutor.java @@ -33,11 +33,13 @@ import org.apache.thrift.TDeserializer; import org.apache.thrift.TException; import org.apache.thrift.protocol.TBinaryProtocol; +import org.semver4j.Semver; import java.io.FileNotFoundException; import java.lang.reflect.Array; import java.net.MalformedURLException; import java.sql.Connection; +import java.sql.DatabaseMetaData; import java.sql.Date; import java.sql.PreparedStatement; import java.sql.ResultSet; @@ -66,6 +68,7 @@ public abstract class BaseJdbcExecutor implements JdbcExecutor { protected VectorTable outputTable = null; protected int batchSizeNum = 0; protected int curBlockRows = 0; + protected String jdbcDriverVersion; public BaseJdbcExecutor(byte[] thriftParams) throws Exception { TJdbcExecutorCtorParams request = new TJdbcExecutorCtorParams(); @@ -92,11 +95,12 @@ public BaseJdbcExecutor(byte[] thriftParams) throws Exception { .setConnectionPoolKeepAlive(request.connection_pool_keep_alive); JdbcDataSource.getDataSource().setCleanupInterval(request.connection_pool_cache_clear_time); init(config, request.statement); + this.jdbcDriverVersion = getJdbcDriverVersion(); } public void close() throws Exception { try { - if (stmt != null) { + if (stmt != null && !stmt.isClosed()) { try { stmt.cancel(); } catch (SQLException e) { @@ -525,6 +529,30 @@ private void insertNullColumn(int parameterIndex, ColumnType.Type dorisType) } } + private String getJdbcDriverVersion() { + try { + if (conn != null) { + DatabaseMetaData metaData = conn.getMetaData(); + return metaData.getDriverVersion(); + } else { + return null; + } + } catch (SQLException e) { + LOG.warn("Failed to retrieve JDBC Driver version", e); + return null; + } + } + + protected boolean isJdbcVersionGreaterThanOrEqualTo(String version) { + Semver currentVersion = Semver.coerce(jdbcDriverVersion); + Semver targetVersion = Semver.coerce(version); + if (currentVersion != null && targetVersion != null) { + return currentVersion.isGreaterThanOrEqualTo(targetVersion); + } else { + return false; + } + } + protected String trimSpaces(String str) { int end = str.length() - 1; while (end >= 0 && str.charAt(end) == ' ') { diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/OracleJdbcExecutor.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/OracleJdbcExecutor.java index 662f324eb23de7..6f38895335b986 100644 --- a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/OracleJdbcExecutor.java +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/OracleJdbcExecutor.java @@ -33,7 +33,7 @@ import java.nio.charset.StandardCharsets; import java.sql.Clob; import java.sql.SQLException; -import java.time.LocalDate; +import java.sql.Timestamp; import java.time.LocalDateTime; public class OracleJdbcExecutor extends BaseJdbcExecutor { @@ -65,42 +65,83 @@ protected void initializeBlock(int columnCount, String[] replaceStringList, int @Override protected Object getColumnValue(int columnIndex, ColumnType type, String[] replaceStringList) throws SQLException { - try { - switch (type.getType()) { - case TINYINT: - return resultSet.getObject(columnIndex + 1, Byte.class); - case SMALLINT: - return resultSet.getObject(columnIndex + 1, Short.class); - case INT: - return resultSet.getObject(columnIndex + 1, Integer.class); - case BIGINT: - return resultSet.getObject(columnIndex + 1, Long.class); - case FLOAT: - return resultSet.getObject(columnIndex + 1, Float.class); - case DOUBLE: - return resultSet.getObject(columnIndex + 1, Double.class); - case LARGEINT: - case DECIMALV2: - case DECIMAL32: - case DECIMAL64: - case DECIMAL128: - return resultSet.getObject(columnIndex + 1, BigDecimal.class); - case DATE: - case DATEV2: - return resultSet.getObject(columnIndex + 1, LocalDate.class); - case DATETIME: - case DATETIMEV2: - return resultSet.getObject(columnIndex + 1, LocalDateTime.class); - case CHAR: - case VARCHAR: - case STRING: - return resultSet.getObject(columnIndex + 1); - default: - throw new IllegalArgumentException("Unsupported column type: " + type.getType()); - } - } catch (AbstractMethodError e) { - LOG.warn("Detected an outdated ojdbc driver. Please use ojdbc8 or above.", e); - throw new SQLException("Detected an outdated ojdbc driver. Please use ojdbc8 or above."); + if (isJdbcVersionGreaterThanOrEqualTo("12.2.0")) { + return newGetColumnValue(columnIndex, type, replaceStringList); + } else { + return oldGetColumnValue(columnIndex, type, replaceStringList); + } + } + + private Object newGetColumnValue(int columnIndex, ColumnType type, String[] replaceStringList) throws SQLException { + switch (type.getType()) { + case TINYINT: + return resultSet.getObject(columnIndex + 1, Byte.class); + case SMALLINT: + return resultSet.getObject(columnIndex + 1, Short.class); + case INT: + return resultSet.getObject(columnIndex + 1, Integer.class); + case BIGINT: + return resultSet.getObject(columnIndex + 1, Long.class); + case FLOAT: + return resultSet.getObject(columnIndex + 1, Float.class); + case DOUBLE: + return resultSet.getObject(columnIndex + 1, Double.class); + case LARGEINT: + case DECIMALV2: + case DECIMAL32: + case DECIMAL64: + case DECIMAL128: + return resultSet.getObject(columnIndex + 1, BigDecimal.class); + case DATETIME: + case DATETIMEV2: + return resultSet.getObject(columnIndex + 1, LocalDateTime.class); + case CHAR: + case VARCHAR: + case STRING: + return resultSet.getObject(columnIndex + 1); + default: + throw new IllegalArgumentException("Unsupported column type: " + type.getType()); + } + } + + private Object oldGetColumnValue(int columnIndex, ColumnType type, String[] replaceStringList) throws SQLException { + switch (type.getType()) { + case TINYINT: + byte tinyIntVal = resultSet.getByte(columnIndex + 1); + return resultSet.wasNull() ? null : tinyIntVal; + case SMALLINT: + short smallIntVal = resultSet.getShort(columnIndex + 1); + return resultSet.wasNull() ? null : smallIntVal; + case INT: + int intVal = resultSet.getInt(columnIndex + 1); + return resultSet.wasNull() ? null : intVal; + case BIGINT: + long bigIntVal = resultSet.getLong(columnIndex + 1); + return resultSet.wasNull() ? null : bigIntVal; + case FLOAT: + float floatVal = resultSet.getFloat(columnIndex + 1); + return resultSet.wasNull() ? null : floatVal; + case DOUBLE: + double doubleVal = resultSet.getDouble(columnIndex + 1); + return resultSet.wasNull() ? null : doubleVal; + case LARGEINT: + case DECIMALV2: + case DECIMAL32: + case DECIMAL64: + case DECIMAL128: + BigDecimal decimalVal = resultSet.getBigDecimal(columnIndex + 1); + return resultSet.wasNull() ? null : decimalVal; + case DATETIME: + case DATETIMEV2: + Timestamp timestampVal = resultSet.getTimestamp(columnIndex + 1); + return resultSet.wasNull() ? null : timestampVal.toLocalDateTime(); + case CHAR: + case VARCHAR: + case STRING: + Object stringVal = resultSet.getObject(columnIndex + 1); + return resultSet.wasNull() ? null : stringVal; + default: + throw new IllegalArgumentException("Unsupported column type: " + type.getType()); } } diff --git a/fe/pom.xml b/fe/pom.xml index 96c152b7dc1854..c023d353d88600 100644 --- a/fe/pom.xml +++ b/fe/pom.xml @@ -370,6 +370,7 @@ under the License. 202 1.2.24 12.22.0 + 5.3.0 @@ -1754,6 +1755,11 @@ under the License. pom import + + org.semver4j + semver4j + ${semver4j.version} + diff --git a/regression-test/data/external_table_p0/jdbc/test_oracle_jdbc_catalog.out b/regression-test/data/external_table_p0/jdbc/test_oracle_jdbc_catalog.out index 9fea31242a29b9..82afecb61bd2b3 100644 --- a/regression-test/data/external_table_p0/jdbc/test_oracle_jdbc_catalog.out +++ b/regression-test/data/external_table_p0/jdbc/test_oracle_jdbc_catalog.out @@ -265,3 +265,7 @@ Doris -- !query_lower_3 -- doris +-- !query_ojdbc6_all_types -- +1 111 123 7456123.89 573 34 673.43 34.1264 60.0 23.231 99 9999 999999999 999999999999999999 999 99999 9999999999 9999999999999999999 1 china beijing alice abcdefghrjkmnopq 123.45 12300 0.0012345 2022-01-21T05:23:01 2019-11-12T20:33:57.999 2019-11-12T20:33:57.999998 2019-11-12T20:33:57.999996 2019-11-12T20:33:57.999997 223-9 12 10:23:1.123457000 +2 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N + diff --git a/regression-test/suites/external_table_p0/jdbc/test_oracle_jdbc_catalog.groovy b/regression-test/suites/external_table_p0/jdbc/test_oracle_jdbc_catalog.groovy index 057723a808e04d..571dda0e5d8085 100644 --- a/regression-test/suites/external_table_p0/jdbc/test_oracle_jdbc_catalog.groovy +++ b/regression-test/suites/external_table_p0/jdbc/test_oracle_jdbc_catalog.groovy @@ -21,6 +21,7 @@ suite("test_oracle_jdbc_catalog", "p0,external,oracle,external_docker,external_d String s3_endpoint = getS3Endpoint() String bucket = getS3BucketName() String driver_url = "https://${bucket}.${s3_endpoint}/regression/jdbc_driver/ojdbc8.jar" + String driver6_url = "https://${bucket}.${s3_endpoint}/regression/jdbc_driver/ojdbc6.jar" if (enabled != null && enabled.equalsIgnoreCase("true")) { String catalog_name = "oracle_catalog"; String internal_db_name = "regression_test_jdbc_catalog_p0"; @@ -281,5 +282,20 @@ suite("test_oracle_jdbc_catalog", "p0,external,oracle,external_docker,external_d qt_query_lower_3 """ select doris_3 from doris_test.lower_test; """ sql """drop catalog if exists ${catalog_name} """ + + // test for ojdbc6 + sql """drop catalog if exists oracle_ojdbc6; """ + sql """create catalog if not exists oracle_ojdbc6 properties( + "type"="jdbc", + "user"="doris_test", + "password"="123456", + "jdbc_url" = "jdbc:oracle:thin:@${externalEnvIp}:${oracle_port}:${SID}", + "driver_url" = "${driver6_url}", + "driver_class" = "oracle.jdbc.OracleDriver" + );""" + sql """ use oracle_ojdbc6.DORIS_TEST; """ + qt_query_ojdbc6_all_types """ select * from oracle_ojdbc6.DORIS_TEST.TEST_ALL_TYPES order by 1; """ + + sql """drop catalog if exists oracle_ojdbc6; """ } } From cd39d6b4f17b14576ec081ae365f0f48a2b04f45 Mon Sep 17 00:00:00 2001 From: zy-kkk Date: Wed, 14 Aug 2024 21:53:56 +0800 Subject: [PATCH 02/13] [test](jdbc catalog) add oceanbase ce jdbc catalog test (#34972) --- .../oceanbase/init/01-drop-db.sql | 18 +++++++ .../oceanbase/init/02-create-db.sql | 18 +++++++ .../oceanbase/init/03-create-table.sql | 51 +++++++++++++++++++ .../oceanbase/init/04-insert.sql | 30 +++++++++++ .../docker-compose/oceanbase/oceanbase.env | 19 +++++++ .../oceanbase/oceanbase.yaml.tpl | 51 +++++++++++++++++++ .../thirdparties/run-thirdparties-docker.sh | 21 ++++++-- regression-test/conf/regression-conf.groovy | 1 + .../jdbc/test_oceanbase_jdbc_catalog.out | 7 +++ .../external/conf/regression-conf.groovy | 1 + .../jdbc/test_oceanbase_jdbc_catalog.groovy | 45 ++++++++++++++++ 11 files changed, 259 insertions(+), 3 deletions(-) create mode 100644 docker/thirdparties/docker-compose/oceanbase/init/01-drop-db.sql create mode 100644 docker/thirdparties/docker-compose/oceanbase/init/02-create-db.sql create mode 100644 docker/thirdparties/docker-compose/oceanbase/init/03-create-table.sql create mode 100644 docker/thirdparties/docker-compose/oceanbase/init/04-insert.sql create mode 100644 docker/thirdparties/docker-compose/oceanbase/oceanbase.env create mode 100644 docker/thirdparties/docker-compose/oceanbase/oceanbase.yaml.tpl create mode 100644 regression-test/data/external_table_p0/jdbc/test_oceanbase_jdbc_catalog.out create mode 100644 regression-test/suites/external_table_p0/jdbc/test_oceanbase_jdbc_catalog.groovy diff --git a/docker/thirdparties/docker-compose/oceanbase/init/01-drop-db.sql b/docker/thirdparties/docker-compose/oceanbase/init/01-drop-db.sql new file mode 100644 index 00000000000000..d0db4a7e1ac925 --- /dev/null +++ b/docker/thirdparties/docker-compose/oceanbase/init/01-drop-db.sql @@ -0,0 +1,18 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +drop database if exists doris_test; diff --git a/docker/thirdparties/docker-compose/oceanbase/init/02-create-db.sql b/docker/thirdparties/docker-compose/oceanbase/init/02-create-db.sql new file mode 100644 index 00000000000000..f87b6922d9f003 --- /dev/null +++ b/docker/thirdparties/docker-compose/oceanbase/init/02-create-db.sql @@ -0,0 +1,18 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +create database doris_test; diff --git a/docker/thirdparties/docker-compose/oceanbase/init/03-create-table.sql b/docker/thirdparties/docker-compose/oceanbase/init/03-create-table.sql new file mode 100644 index 00000000000000..6753f93ec33761 --- /dev/null +++ b/docker/thirdparties/docker-compose/oceanbase/init/03-create-table.sql @@ -0,0 +1,51 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +create table doris_test.all_types ( + `tinyint_u` tinyint unsigned, + `smallint_u` smallint unsigned, + `mediumint_u` mediumint unsigned, + `int_u` int unsigned, + `bigint_u` bigint unsigned, + `decimal_u` decimal(18, 5) unsigned, + `double_u` double unsigned, + `float_u` float unsigned, + `boolean` boolean, + `tinyint` tinyint, + `smallint` smallint, + `year` year, + `mediumint` mediumint, + `int` int, + `bigint` bigint, + `date` date, + `timestamp` timestamp(4) null, + `datetime` datetime, + `float` float, + `double` double, + `decimal` decimal(12, 4), + `char` char(5), + `varchar` varchar(10), + `time` time(4), + `text` text, + `blob` blob, + `json` json, + `set` set('Option1', 'Option2', 'Option3'), + `bit` bit(6), + `binary` binary(12), + `varbinary` varbinary(12), + `enum` enum('Value1', 'Value2', 'Value3') +) engine=innodb charset=utf8; \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/oceanbase/init/04-insert.sql b/docker/thirdparties/docker-compose/oceanbase/init/04-insert.sql new file mode 100644 index 00000000000000..5dcbec9bcde941 --- /dev/null +++ b/docker/thirdparties/docker-compose/oceanbase/init/04-insert.sql @@ -0,0 +1,30 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +INSERT INTO doris_test.all_types VALUES +(201, 301, 401, 501, 601, 3.14159, 4.1415926, 5.141592, true, -123, -301, 2012, -401, -501, -601, '2012-10-30', '2012-10-25 12:05:36.3456712', '2012-10-25 08:08:08.3456712', + -4.14145001, -5.1400000001, -6.140000001, 'row1', 'line1', '09:09:09.56782346', 'text1', X'48656C6C6F20576F726C64', '{"name": "Alice", "age": 30, "city": "London"}', + 'Option1,Option3', b'101010', X'48656C6C6F', X'48656C6C6F', 'Value2'), +(202, 302, 402, 502, 602, 4.14159, 5.1415926, 6.141592, false, -124, -302, 2013, -402, -502, -602, '2012-11-01', '2012-10-26 02:08:39.3456712', '2013-10-26 08:09:18.3456712', + -5.14145001, -6.1400000001, -7.140000001, 'row2', 'line2', '09:11:09.56782346', 'text2', X'E86F6C6C6F20576F726C67', '{"name": "Gaoxin", "age": 18, "city": "ChongQing"}', + 'Option1,Option2', b'101111', X'58676C6C6F', X'88656C6C9F', 'Value3'), +(null, 302, null, 502, 602, 4.14159, null, 6.141592, null, -124, -302, 2013, -402, -502, -602, null, '2012-10-26 02:08:39.3456712', '2013-10-26 08:09:18.3456712', + -5.14145001, null, -7.140000001, 'row2', null, '09:11:09.56782346', 'text2', X'E86F6C6C6F20576F726C67', null, + null, b'101111', null, X'88656C6C9F', 'Value3'), +(203, 303, 403, 503, 603, 7.14159, 8.1415926, 9.141592, false, null, -402, 2017, -602, -902, -1102, '2012-11-02', null, '2013-10-27 08:11:18.3456712', + -5.14145000001, -6.1400000000001, -7.140000000001, 'row3', 'line3', '09:11:09.56782346', 'text3', X'E86F6C6C6F20576F726C67', '{"name": "ChenQi", "age": 24, "city": "ChongQing"}', + 'Option2', b'101111', X'58676C6C6F', null, 'Value1'); diff --git a/docker/thirdparties/docker-compose/oceanbase/oceanbase.env b/docker/thirdparties/docker-compose/oceanbase/oceanbase.env new file mode 100644 index 00000000000000..5a8998e7cd5cff --- /dev/null +++ b/docker/thirdparties/docker-compose/oceanbase/oceanbase.env @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +DOCKER_OCEANBASE_EXTERNAL_PORT=2881 diff --git a/docker/thirdparties/docker-compose/oceanbase/oceanbase.yaml.tpl b/docker/thirdparties/docker-compose/oceanbase/oceanbase.yaml.tpl new file mode 100644 index 00000000000000..6dc507bcec6c8c --- /dev/null +++ b/docker/thirdparties/docker-compose/oceanbase/oceanbase.yaml.tpl @@ -0,0 +1,51 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +version: "2.1" + +services: + doris--oceanbase: + image: oceanbase/oceanbase-ce:4.2.1 + restart: always + environment: + MODE: slim + OB_MEMORY_LIMIT: 5G + TZ: Asia/Shanghai + ports: + - ${DOCKER_OCEANBASE_EXTERNAL_PORT}:2881 + healthcheck: + test: ["CMD-SHELL", "obclient -h127.1 -uroot@sys -P2881 -e 'SELECT 1'"] + interval: 5s + timeout: 60s + retries: 120 + volumes: + - ./init:/root/boot/init.d + networks: + - doris--oceanbase + doris--oceanbase-hello-world: + image: hello-world + depends_on: + doris--oceanbase: + condition: service_healthy + networks: + - doris--oceanbase +networks: + doris--oceanbase: + ipam: + driver: default + config: + - subnet: 168.32.0.0/24 diff --git a/docker/thirdparties/run-thirdparties-docker.sh b/docker/thirdparties/run-thirdparties-docker.sh index 0da1f251ed4cdb..dd31689b3bf4f2 100755 --- a/docker/thirdparties/run-thirdparties-docker.sh +++ b/docker/thirdparties/run-thirdparties-docker.sh @@ -37,7 +37,7 @@ Usage: $0 --stop stop the specified components All valid components: - mysql,pg,oracle,sqlserver,clickhouse,es,hive2,hive3,iceberg,hudi,trino,kafka,mariadb,db2,lakesoul,kerberos + mysql,pg,oracle,sqlserver,clickhouse,es,hive2,hive3,iceberg,hudi,trino,kafka,mariadb,db2,oceanbase,lakesoul,kerberos " exit 1 } @@ -59,7 +59,7 @@ eval set -- "${OPTS}" if [[ "$#" == 1 ]]; then # default - COMPONENTS="mysql,es,hive2,hive3,pg,oracle,sqlserver,clickhouse,mariadb,iceberg,db2,kerberos" + COMPONENTS="mysql,es,hive2,hive3,pg,oracle,sqlserver,clickhouse,mariadb,iceberg,db2,oceanbase,kerberos" else while true; do case "$1" in @@ -91,7 +91,7 @@ else done if [[ "${COMPONENTS}"x == ""x ]]; then if [[ "${STOP}" -eq 1 ]]; then - COMPONENTS="mysql,es,pg,oracle,sqlserver,clickhouse,hive2,hive3,iceberg,hudi,trino,kafka,mariadb,db2,kerberos,lakesoul" + COMPONENTS="mysql,es,pg,oracle,sqlserver,clickhouse,hive2,hive3,iceberg,hudi,trino,kafka,mariadb,db2,oceanbase,kerberos,lakesoul" fi fi fi @@ -135,6 +135,7 @@ RUN_KAFKA=0 RUN_SPARK=0 RUN_MARIADB=0 RUN_DB2=0 +RUN_OCENABASE=0 RUN_LAKESOUL=0 RUN_KERBEROS=0 @@ -169,6 +170,8 @@ for element in "${COMPONENTS_ARR[@]}"; do RUN_MARIADB=1 elif [[ "${element}"x == "db2"x ]]; then RUN_DB2=1 + elif [[ "${element}"x == "oceanbase"x ]];then + RUN_OCEANBASE=1 elif [[ "${element}"x == "lakesoul"x ]]; then RUN_LAKESOUL=1 elif [[ "${element}"x == "kerberos"x ]]; then @@ -252,6 +255,18 @@ if [[ "${RUN_DB2}" -eq 1 ]]; then fi fi +if [[ "${RUN_OCEANBASE}" -eq 1 ]]; then + # oceanbase + cp "${ROOT}"/docker-compose/oceanbase/oceanbase.yaml.tpl "${ROOT}"/docker-compose/oceanbase/oceanbase.yaml + sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/oceanbase/oceanbase.yaml + sudo docker compose -f "${ROOT}"/docker-compose/oceanbase/oceanbase.yaml --env-file "${ROOT}"/docker-compose/oceanbase/oceanbase.env down + if [[ "${STOP}" -ne 1 ]]; then + sudo rm "${ROOT}"/docker-compose/oceanbase/data/* -rf + sudo mkdir -p "${ROOT}"/docker-compose/oceanbase/data/ + sudo docker compose -f "${ROOT}"/docker-compose/oceanbase/oceanbase.yaml --env-file "${ROOT}"/docker-compose/oceanbase/oceanbase.env up -d + fi +fi + if [[ "${RUN_SQLSERVER}" -eq 1 ]]; then # sqlserver cp "${ROOT}"/docker-compose/sqlserver/sqlserver.yaml.tpl "${ROOT}"/docker-compose/sqlserver/sqlserver.yaml diff --git a/regression-test/conf/regression-conf.groovy b/regression-test/conf/regression-conf.groovy index 29b90f037239b9..ac66e01f90626c 100644 --- a/regression-test/conf/regression-conf.groovy +++ b/regression-test/conf/regression-conf.groovy @@ -127,6 +127,7 @@ clickhouse_22_port=8123 doris_port=9030 mariadb_10_port=3326 db2_11_port=50000 +oceanbase_port=2881 // hive catalog test config // To enable hive/paimon test, you need first start hive container. diff --git a/regression-test/data/external_table_p0/jdbc/test_oceanbase_jdbc_catalog.out b/regression-test/data/external_table_p0/jdbc/test_oceanbase_jdbc_catalog.out new file mode 100644 index 00000000000000..fdf887af2a2a4e --- /dev/null +++ b/regression-test/data/external_table_p0/jdbc/test_oceanbase_jdbc_catalog.out @@ -0,0 +1,7 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !query -- +\N 302 \N 502 602 4.14159 \N 6.141592 \N -124 -302 2013 -402 -502 -602 \N 2012-10-26T02:08:39.345700 2013-10-26T08:09:18 -5.14145 \N -7.1400 row2 \N 09:11:09.567 text2 0xE86F6C6C6F20576F726C67 \N \N 0x2F \N 0x88656C6C9F Value3 +201 301 401 501 601 3.14159 4.1415926 5.141592 1 -123 -301 2012 -401 -501 -601 2012-10-30 2012-10-25T12:05:36.345700 2012-10-25T08:08:08 -4.14145 -5.1400000001 -6.1400 row1 line1 09:09:09.567 text1 0x48656C6C6F20576F726C64 {"age": 30, "city": "London", "name": "Alice"} Option1,Option3 0x2A 0x48656C6C6F00000000000000 0x48656C6C6F Value2 +202 302 402 502 602 4.14159 5.1415926 6.141592 0 -124 -302 2013 -402 -502 -602 2012-11-01 2012-10-26T02:08:39.345700 2013-10-26T08:09:18 -5.14145 -6.1400000001 -7.1400 row2 line2 09:11:09.567 text2 0xE86F6C6C6F20576F726C67 {"age": 18, "city": "ChongQing", "name": "Gaoxin"} Option1,Option2 0x2F 0x58676C6C6F00000000000000 0x88656C6C9F Value3 +203 303 403 503 603 7.14159 8.1415926 9.141592 0 \N -402 2017 -602 -902 -1102 2012-11-02 \N 2013-10-27T08:11:18 -5.14145 -6.1400000000001 -7.1400 row3 line3 09:11:09.567 text3 0xE86F6C6C6F20576F726C67 {"age": 24, "city": "ChongQing", "name": "ChenQi"} Option2 0x2F 0x58676C6C6F00000000000000 \N Value1 + diff --git a/regression-test/pipeline/external/conf/regression-conf.groovy b/regression-test/pipeline/external/conf/regression-conf.groovy index 395d3a99e63ccf..b836666f65d5a8 100644 --- a/regression-test/pipeline/external/conf/regression-conf.groovy +++ b/regression-test/pipeline/external/conf/regression-conf.groovy @@ -156,6 +156,7 @@ hdfs_port=8020 oracle_11_port=1521 sqlserver_2022_port=1433 clickhouse_22_port=8123 +oceanbase_port=2881 db2_11_port=50000 // trino-connector catalog test config diff --git a/regression-test/suites/external_table_p0/jdbc/test_oceanbase_jdbc_catalog.groovy b/regression-test/suites/external_table_p0/jdbc/test_oceanbase_jdbc_catalog.groovy new file mode 100644 index 00000000000000..3d07434b561ac8 --- /dev/null +++ b/regression-test/suites/external_table_p0/jdbc/test_oceanbase_jdbc_catalog.groovy @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_oceanbase_jdbc_catalog", "p0,external,oceanbase,external_docker,external_docker_oceanbase") { + String enabled = context.config.otherConfigs.get("enableJdbcTest"); + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String s3_endpoint = getS3Endpoint() + String bucket = getS3BucketName() + String driver_url = "https://${bucket}.${s3_endpoint}/regression/jdbc_driver/oceanbase-client-2.4.8.jar" + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String catalog_name = "oceanbase_catalog"; + String ex_db_name = "doris_test"; + String oceanbase_port = context.config.otherConfigs.get("oceanbase_port"); + + + sql """ drop catalog if exists ${catalog_name} """ + + sql """ create catalog if not exists ${catalog_name} properties( + "type"="jdbc", + "user"="root@test", + "password"="", + "jdbc_url" = "jdbc:oceanbase://${externalEnvIp}:${oceanbase_port}/doris_test", + "driver_url" = "${driver_url}", + "driver_class" = "com.oceanbase.jdbc.Driver" + );""" + + order_qt_query """ select * from ${catalog_name}.doris_test.all_types order by 1; """ + + sql """ drop catalog if exists ${catalog_name} """ + } +} From 0cc438c2af353da003f7b2636753edbc0d8700a8 Mon Sep 17 00:00:00 2001 From: Pxl Date: Wed, 14 Aug 2024 22:13:19 +0800 Subject: [PATCH 03/13] [Bug](runtime-filter) set inited to true on BloomFilterFuncBase::assign (#39335) ## Proposed changes set inited to true on BloomFilterFuncBase::assign ```cpp *** SIGABRT unknown detail explain (@0x1ba2) received by PID 7074 (TID 9018 OR 0x7f4adf5df640) from PID 7074; stack trace: *** 0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at /home/zcp/repo_center/doris_master/doris/be/src/common/signal_handler.h:421 1# 0x00007F525A7F6520 in /lib/x86_64-linux-gnu/libc.so.6 2# pthread_kill at ./nptl/pthread_kill.c:89 3# raise at ../sysdeps/posix/raise.c:27 4# abort at ./stdlib/abort.c:81 5# 0x0000560BDC6CE8DD in /mnt/hdd01/ci/master-deploy/be/lib/doris_be 6# 0x0000560BDC6C0F1A in /mnt/hdd01/ci/master-deploy/be/lib/doris_be 7# google::LogMessage::SendToLog() in /mnt/hdd01/ci/master-deploy/be/lib/doris_be 8# google::LogMessage::Flush() in /mnt/hdd01/ci/master-deploy/be/lib/doris_be 9# google::LogMessageFatal::~LogMessageFatal() in /mnt/hdd01/ci/master-deploy/be/lib/doris_be 10# doris::BloomFilterFuncBase::merge(doris::BloomFilterFuncBase*) at /home/zcp/repo_center/doris_master/doris/be/src/exprs/bloom_filter_func.h:159 11# doris::RuntimePredicateWrapper::merge(doris::RuntimePredicateWrapper const*) at /home/zcp/repo_center/doris_master/doris/be/src/exprs/runtime_filter.cpp:539 12# doris::IRuntimeFilter::merge_from(doris::RuntimePredicateWrapper const*) in /mnt/hdd01/ci/master-deploy/be/lib/doris_be 13# doris::RuntimeFilterMergeControllerEntity::merge(doris::PMergeFilterRequest const*, butil::IOBufAsZeroCopyInputStream*) at /home/zcp/repo_center/doris_master/doris/be/src/runtime/runtime_filter_mgr.cpp:399 14# doris::FragmentMgr::merge_filter(doris::PMergeFilterRequest const*, butil::IOBufAsZeroCopyInputStream*) at /home/zcp/repo_center/doris_master/doris/be/src/runtime/fragment_mgr.cpp:1170 15# std::_Function_handler::_M_invoke(std::_Any_data const&) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:291 16# doris::WorkThreadPool::work_thread(int) at /home/zcp/repo_center/doris_master/doris/be/src/util/work_thread_pool.hpp:159 17# execute_native_thread_routine at ../../../../../libstdc++-v3/src/c++11/thread.cc:84 18# start_thread at ./nptl/pthread_create.c:442 19# 0x00007F525A8DA850 at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:83 172.20.50.47 last coredump sql: ``` --- be/src/exprs/bloom_filter_func.h | 1 + 1 file changed, 1 insertion(+) diff --git a/be/src/exprs/bloom_filter_func.h b/be/src/exprs/bloom_filter_func.h index de093dfa5285df..6d452bbe9922dc 100644 --- a/be/src/exprs/bloom_filter_func.h +++ b/be/src/exprs/bloom_filter_func.h @@ -188,6 +188,7 @@ class BloomFilterFuncBase : public RuntimeFilterFuncBase { } _bloom_filter_alloced = data_size; + _inited = true; return _bloom_filter->init(data, data_size); } From e70e91bb51bcba60c42613363e2e119c956f044b Mon Sep 17 00:00:00 2001 From: wuwenchi Date: Wed, 14 Aug 2024 22:43:17 +0800 Subject: [PATCH 04/13] [improvement](external catalog)Optimize the process of refreshing catalog (#39205) ## Proposed changes When the catalog attributes have not changed, refreshing the catalog only requires processing the cache, without rebuilding the entire catalog. --- .../java/org/apache/doris/catalog/RefreshManager.java | 2 +- .../org/apache/doris/datasource/ExternalCatalog.java | 9 +++++++++ .../apache/doris/datasource/hive/HiveMetadataOps.java | 4 ++-- .../doris/datasource/iceberg/IcebergMetadataOps.java | 4 ++-- .../doris/datasource/jdbc/JdbcExternalCatalog.java | 5 +++++ .../apache/doris/datasource/RefreshCatalogTest.java | 11 ----------- 6 files changed, 19 insertions(+), 16 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/RefreshManager.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/RefreshManager.java index d017ba7829fd1a..52694e5a5bdffb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/RefreshManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/RefreshManager.java @@ -76,7 +76,7 @@ public void replayRefreshCatalog(CatalogLog log) { private void refreshCatalogInternal(CatalogIf catalog, boolean invalidCache) { String catalogName = catalog.getName(); if (!catalogName.equals(InternalCatalog.INTERNAL_CATALOG_NAME)) { - ((ExternalCatalog) catalog).onRefresh(invalidCache); + ((ExternalCatalog) catalog).onRefreshCache(invalidCache); LOG.info("refresh catalog {} with invalidCache {}", catalogName, invalidCache); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java index 2dfcec1d8aef4d..d99ac76c7b9f13 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java @@ -390,6 +390,15 @@ public void onRefresh(boolean invalidCache) { synchronized (this.propLock) { this.convertedProperties = null; } + + refreshOnlyCatalogCache(invalidCache); + } + + public void onRefreshCache(boolean invalidCache) { + refreshOnlyCatalogCache(invalidCache); + } + + private void refreshOnlyCatalogCache(boolean invalidCache) { if (useMetaCache.isPresent()) { if (useMetaCache.get() && metaCache != null) { metaCache.invalidateAll(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetadataOps.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetadataOps.java index dcfc6d1ad33f90..e855affc31a9de 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetadataOps.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetadataOps.java @@ -126,7 +126,7 @@ public void createDb(CreateDbStmt stmt) throws DdlException { catalogDatabase.setProperties(properties); catalogDatabase.setComment(properties.getOrDefault("comment", "")); client.createDatabase(catalogDatabase); - catalog.onRefresh(true); + catalog.onRefreshCache(true); } catch (Exception e) { throw new RuntimeException(e.getMessage(), e); } @@ -146,7 +146,7 @@ public void dropDb(DropDbStmt stmt) throws DdlException { } try { client.dropDatabase(dbName); - catalog.onRefresh(true); + catalog.onRefreshCache(true); } catch (Exception e) { throw new RuntimeException(e.getMessage(), e); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataOps.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataOps.java index 3236367afc003e..edb83ac5bc8970 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataOps.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataOps.java @@ -108,7 +108,7 @@ public void createDb(CreateDbStmt stmt) throws DdlException { } } nsCatalog.createNamespace(Namespace.of(dbName), properties); - dorisCatalog.onRefresh(true); + dorisCatalog.onRefreshCache(true); } @Override @@ -124,7 +124,7 @@ public void dropDb(DropDbStmt stmt) throws DdlException { } SupportsNamespaces nsCatalog = (SupportsNamespaces) catalog; nsCatalog.dropNamespace(Namespace.of(dbName)); - dorisCatalog.onRefresh(true); + dorisCatalog.onRefreshCache(true); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/JdbcExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/JdbcExternalCatalog.java index afa341fa879ac0..fb26265d19fe93 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/JdbcExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/JdbcExternalCatalog.java @@ -122,6 +122,11 @@ public void onRefresh(boolean invalidCache) { } } + @Override + public void onRefreshCache(boolean invalidCache) { + onRefresh(invalidCache); + } + @Override public void onClose() { super.onClose(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/RefreshCatalogTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/RefreshCatalogTest.java index 34177f49382744..89994c36142c96 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/RefreshCatalogTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/RefreshCatalogTest.java @@ -156,18 +156,7 @@ public void testRefreshCatalogLastUpdateTime() throws Exception { } catch (Exception e) { // Do nothing } - Assertions.assertFalse(((ExternalCatalog) test2).isInitialized()); - table.makeSureInitialized(); Assertions.assertTrue(((ExternalCatalog) test2).isInitialized()); - // table.makeSureInitialized() triggered init method - long l4 = test2.getLastUpdateTime(); - Assertions.assertTrue(l4 > l3); - try { - DdlExecutor.execute(Env.getCurrentEnv(), refreshCatalogStmt); - } catch (Exception e) { - // Do nothing - } - Assertions.assertFalse(((ExternalCatalog) test2).isInitialized()); } public static class RefreshCatalogProvider implements TestExternalCatalog.TestCatalogProvider { From aaf7a96ee6fc6c57791f21832f76cb00e51ee188 Mon Sep 17 00:00:00 2001 From: morrySnow <101034200+morrySnow@users.noreply.github.com> Date: Wed, 14 Aug 2024 23:12:04 +0800 Subject: [PATCH 05/13] [fix](nullif) revert nullif signature change by #38497 (#39342) #38497 let second arg follow first arg's type. but the correct way is let second arg convert to a new type get from processing like 1st = 2nd. and then compare this two arg with the 2nd arg type when execution. this pr revert nullif signature. and will let nullif work as expected in future PR. --- .../expressions/functions/scalar/NullIf.java | 39 +++++++++++++++++-- .../nereids_function_p0/type_coercion.out | 3 ++ .../nereids_function_p0/type_coercion.groovy | 1 + 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/NullIf.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/NullIf.java index d8985519a1bd0c..447e60a752fc64 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/NullIf.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/NullIf.java @@ -23,8 +23,22 @@ import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; -import org.apache.doris.nereids.types.coercion.AnyDataType; -import org.apache.doris.nereids.types.coercion.FollowToAnyDataType; +import org.apache.doris.nereids.types.BigIntType; +import org.apache.doris.nereids.types.BooleanType; +import org.apache.doris.nereids.types.DateTimeType; +import org.apache.doris.nereids.types.DateTimeV2Type; +import org.apache.doris.nereids.types.DateType; +import org.apache.doris.nereids.types.DateV2Type; +import org.apache.doris.nereids.types.DecimalV2Type; +import org.apache.doris.nereids.types.DecimalV3Type; +import org.apache.doris.nereids.types.DoubleType; +import org.apache.doris.nereids.types.FloatType; +import org.apache.doris.nereids.types.IntegerType; +import org.apache.doris.nereids.types.LargeIntType; +import org.apache.doris.nereids.types.SmallIntType; +import org.apache.doris.nereids.types.StringType; +import org.apache.doris.nereids.types.TinyIntType; +import org.apache.doris.nereids.types.VarcharType; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; @@ -38,7 +52,26 @@ public class NullIf extends ScalarFunction implements BinaryExpression, ExplicitlyCastableSignature, AlwaysNullable { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.retArgType(0).args(new AnyDataType(0), new FollowToAnyDataType(0)) + FunctionSignature.ret(BooleanType.INSTANCE).args(BooleanType.INSTANCE, BooleanType.INSTANCE), + FunctionSignature.ret(TinyIntType.INSTANCE).args(TinyIntType.INSTANCE, TinyIntType.INSTANCE), + FunctionSignature.ret(SmallIntType.INSTANCE).args(SmallIntType.INSTANCE, SmallIntType.INSTANCE), + FunctionSignature.ret(IntegerType.INSTANCE).args(IntegerType.INSTANCE, IntegerType.INSTANCE), + FunctionSignature.ret(BigIntType.INSTANCE).args(BigIntType.INSTANCE, BigIntType.INSTANCE), + FunctionSignature.ret(LargeIntType.INSTANCE).args(LargeIntType.INSTANCE, LargeIntType.INSTANCE), + FunctionSignature.ret(FloatType.INSTANCE).args(FloatType.INSTANCE, FloatType.INSTANCE), + FunctionSignature.ret(DoubleType.INSTANCE).args(DoubleType.INSTANCE, DoubleType.INSTANCE), + FunctionSignature.ret(DateTimeType.INSTANCE).args(DateTimeType.INSTANCE, DateTimeType.INSTANCE), + FunctionSignature.ret(DateType.INSTANCE).args(DateType.INSTANCE, DateType.INSTANCE), + FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT) + .args(DateTimeV2Type.SYSTEM_DEFAULT, DateTimeV2Type.SYSTEM_DEFAULT), + FunctionSignature.ret(DateV2Type.INSTANCE).args(DateV2Type.INSTANCE, DateV2Type.INSTANCE), + FunctionSignature.ret(DecimalV2Type.SYSTEM_DEFAULT) + .args(DecimalV2Type.SYSTEM_DEFAULT, DecimalV2Type.SYSTEM_DEFAULT), + FunctionSignature.ret(DecimalV3Type.WILDCARD) + .args(DecimalV3Type.WILDCARD, DecimalV3Type.WILDCARD), + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT) + .args(VarcharType.SYSTEM_DEFAULT, VarcharType.SYSTEM_DEFAULT), + FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE, StringType.INSTANCE) ); /** diff --git a/regression-test/data/nereids_function_p0/type_coercion.out b/regression-test/data/nereids_function_p0/type_coercion.out index 54fddd1e5b682a..b600040e8f6dc6 100644 --- a/regression-test/data/nereids_function_p0/type_coercion.out +++ b/regression-test/data/nereids_function_p0/type_coercion.out @@ -32,3 +32,6 @@ -- !pmod -- 0.0 +-- !nullif -- +13 + diff --git a/regression-test/suites/nereids_function_p0/type_coercion.groovy b/regression-test/suites/nereids_function_p0/type_coercion.groovy index 8c5dcb8134e49a..57b63030adb48e 100644 --- a/regression-test/suites/nereids_function_p0/type_coercion.groovy +++ b/regression-test/suites/nereids_function_p0/type_coercion.groovy @@ -27,4 +27,5 @@ suite("function_type_coercion") { qt_array_enumerate_uniq """select array_enumerate_uniq([1,1,1],['1','1','1.0'])""" qt_array_cum_sum """select array_cum_sum(array('1', '2', '3000'))""" qt_pmod """select pmod(2, '1.0')""" + qt_nullif """SELECT nullif(13, -4851)""" } From 0dca9eff22ac911e523eceefce6ac94ad9535320 Mon Sep 17 00:00:00 2001 From: lw112 <131352377+felixwluo@users.noreply.github.com> Date: Wed, 14 Aug 2024 23:20:41 +0800 Subject: [PATCH 06/13] [Enhancement](alter) support add partitions in batch (#37114) ## Proposed changes Issue Number: close #32524 --- fe/fe-core/src/main/cup/sql_parser.cup | 8 ++ .../java/org/apache/doris/alter/Alter.java | 10 +- .../analysis/AlterMultiPartitionClause.java | 73 ++++++++++++ .../java/org/apache/doris/catalog/Env.java | 6 + .../doris/datasource/InternalCatalog.java | 20 ++++ .../test_alter_add_multi_partition.groovy | 110 ++++++++++++++++++ 6 files changed, 226 insertions(+), 1 deletion(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/analysis/AlterMultiPartitionClause.java create mode 100644 regression-test/suites/alter_p0/test_alter_add_multi_partition.groovy diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index 619f13e62781f0..891a72645cdfe4 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -1746,6 +1746,14 @@ alter_table_clause ::= {: RESULT = new ModifyEngineClause(engine, properties); :} + | KW_ADD opt_tmp:isTempPartition KW_PARTITIONS KW_FROM LPAREN partition_key_list:lower RPAREN KW_TO LPAREN partition_key_list:upper RPAREN KW_INTERVAL INTEGER_LITERAL:time_interval ident:time_unit opt_properties:properties + {: + RESULT = new AlterMultiPartitionClause(PartitionKeyDesc.createMultiFixed(lower, upper, time_interval, time_unit), properties, isTempPartition); + :} + | KW_ADD opt_tmp:isTempPartition KW_PARTITIONS KW_FROM LPAREN partition_key_list:lower RPAREN KW_TO LPAREN partition_key_list:upper RPAREN KW_INTERVAL INTEGER_LITERAL:num_interval opt_properties:properties + {: + RESULT = new AlterMultiPartitionClause(PartitionKeyDesc.createMultiFixed(lower, upper, num_interval), properties, isTempPartition); + :} ; opt_enable_feature_properties ::= diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java b/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java index 1fcb4fe65c38a3..ced14ad4b8dc8a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java @@ -20,6 +20,7 @@ import org.apache.doris.analysis.AddPartitionClause; import org.apache.doris.analysis.AddPartitionLikeClause; import org.apache.doris.analysis.AlterClause; +import org.apache.doris.analysis.AlterMultiPartitionClause; import org.apache.doris.analysis.AlterSystemStmt; import org.apache.doris.analysis.AlterTableStmt; import org.apache.doris.analysis.AlterViewStmt; @@ -252,7 +253,8 @@ private boolean processAlterOlapTable(AlterTableStmt stmt, OlapTable olapTable, } else if (alterClause instanceof DropPartitionFromIndexClause) { // do nothing } else if (alterClause instanceof AddPartitionClause - || alterClause instanceof AddPartitionLikeClause) { + || alterClause instanceof AddPartitionLikeClause + || alterClause instanceof AlterMultiPartitionClause) { needProcessOutsideTableLock = true; } else { throw new DdlException("Invalid alter operation: " + alterClause.getOpType()); @@ -508,6 +510,12 @@ public void processAlterTable(AlterTableStmt stmt) throws UserException { } else if (alterClause instanceof ModifyTablePropertiesClause) { Map properties = alterClause.getProperties(); ((SchemaChangeHandler) schemaChangeHandler).updateTableProperties(db, tableName, properties); + } else if (alterClause instanceof AlterMultiPartitionClause) { + if (!((AlterMultiPartitionClause) alterClause).isTempPartition()) { + DynamicPartitionUtil.checkAlterAllowed( + (OlapTable) db.getTableOrMetaException(tableName, TableType.OLAP)); + } + Env.getCurrentEnv().addMultiPartitions(db, tableName, (AlterMultiPartitionClause) alterClause); } else { throw new DdlException("Invalid alter operation: " + alterClause.getOpType()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterMultiPartitionClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterMultiPartitionClause.java new file mode 100644 index 00000000000000..f7231e6da3e60f --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterMultiPartitionClause.java @@ -0,0 +1,73 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.analysis; + +import org.apache.doris.alter.AlterOpType; +import org.apache.doris.common.AnalysisException; + +import java.util.Map; + +public class AlterMultiPartitionClause extends AlterTableClause { + private PartitionKeyDesc partitionKeyDesc; + private Map properties; + private boolean isTempPartition; + + public AlterMultiPartitionClause(PartitionKeyDesc partitionKeyDesc, Map properties, + boolean isTempPartition) { + super(AlterOpType.ADD_PARTITION); + this.partitionKeyDesc = partitionKeyDesc; + this.properties = properties; + this.isTempPartition = isTempPartition; + } + + @Override + public void analyze(Analyzer analyzer) throws AnalysisException { + } + + @Override + public String toSql() { + return String.format("ADD PARTITIONS %s", partitionKeyDesc.toSql()); + } + + @Override + public String toString() { + return toSql(); + } + + @Override + public boolean allowOpMTMV() { + return false; + } + + @Override + public boolean needChangeMTMVState() { + return false; + } + + public PartitionKeyDesc getPartitionKeyDesc() { + return partitionKeyDesc; + } + + public Map getProperties() { + return properties; + } + + public boolean isTempPartition() { + return isTempPartition; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java index 907f25739208a0..121228f569af23 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java @@ -38,6 +38,7 @@ import org.apache.doris.analysis.AlterDatabaseQuotaStmt; import org.apache.doris.analysis.AlterDatabaseQuotaStmt.QuotaType; import org.apache.doris.analysis.AlterDatabaseRename; +import org.apache.doris.analysis.AlterMultiPartitionClause; import org.apache.doris.analysis.AlterSystemStmt; import org.apache.doris.analysis.AlterTableStmt; import org.apache.doris.analysis.AlterViewStmt; @@ -3278,6 +3279,11 @@ public PartitionPersistInfo addPartition(Database db, String tableName, AddParti isCreateTable, generatedPartitionId, writeEditLog); } + public void addMultiPartitions(Database db, String tableName, AlterMultiPartitionClause multiPartitionClause) + throws DdlException { + getInternalCatalog().addMultiPartitions(db, tableName, multiPartitionClause); + } + public void addPartitionLike(Database db, String tableName, AddPartitionLikeClause addPartitionLikeClause) throws DdlException { getInternalCatalog().addPartitionLike(db, tableName, addPartitionLikeClause); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java index 12093bb6696202..234119b992b860 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java @@ -25,6 +25,7 @@ import org.apache.doris.analysis.AlterDatabaseQuotaStmt; import org.apache.doris.analysis.AlterDatabaseQuotaStmt.QuotaType; import org.apache.doris.analysis.AlterDatabaseRename; +import org.apache.doris.analysis.AlterMultiPartitionClause; import org.apache.doris.analysis.Analyzer; import org.apache.doris.analysis.ColumnDef; import org.apache.doris.analysis.ColumnDef.DefaultValue; @@ -42,6 +43,7 @@ import org.apache.doris.analysis.HashDistributionDesc; import org.apache.doris.analysis.KeysDesc; import org.apache.doris.analysis.LiteralExpr; +import org.apache.doris.analysis.MultiPartitionDesc; import org.apache.doris.analysis.PartitionDesc; import org.apache.doris.analysis.PartitionKeyDesc; import org.apache.doris.analysis.PartitionNames; @@ -1867,6 +1869,24 @@ public PartitionPersistInfo addPartition(Database db, String tableName, AddParti } } + public void addMultiPartitions(Database db, String tableName, AlterMultiPartitionClause multiPartitionClause) + throws DdlException { + List singlePartitionDescs; + try { + MultiPartitionDesc multiPartitionDesc = new MultiPartitionDesc(multiPartitionClause.getPartitionKeyDesc(), + multiPartitionClause.getProperties()); + singlePartitionDescs = multiPartitionDesc.getSinglePartitionDescList(); + } catch (AnalysisException e) { + throw new DdlException("Failed to analyze multi partition clause: " + e.getMessage()); + } + + for (SinglePartitionDesc singlePartitionDesc : singlePartitionDescs) { + AddPartitionClause addPartitionClause = new AddPartitionClause(singlePartitionDesc, null, + multiPartitionClause.getProperties(), false); + addPartition(db, tableName, addPartitionClause, false, 0, true); + } + } + public void replayAddPartition(PartitionPersistInfo info) throws MetaNotFoundException { Database db = (Database) getDbOrMetaException(info.getDbId()); OlapTable olapTable = (OlapTable) db.getTableOrMetaException(info.getTableId(), diff --git a/regression-test/suites/alter_p0/test_alter_add_multi_partition.groovy b/regression-test/suites/alter_p0/test_alter_add_multi_partition.groovy new file mode 100644 index 00000000000000..a5a55d94beac90 --- /dev/null +++ b/regression-test/suites/alter_p0/test_alter_add_multi_partition.groovy @@ -0,0 +1,110 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_alter_add_multi_partition") { + sql "DROP TABLE IF EXISTS add_multi_partition FORCE" + + // Check if you can create a partition for a leap month + sql """ + CREATE TABLE IF NOT EXISTS add_multi_partition + ( + `k1` LARGEINT NOT NULL, + `date` DATE NOT NULL, + `k2` VARCHAR(20) + ) + ENGINE=OLAP + UNIQUE KEY(`k1`, `date`) + PARTITION BY RANGE(`date`) + ( + PARTITION `p_20000201` VALUES [("2000-02-01"), ("2000-02-05")) + ) + DISTRIBUTED BY HASH(`k1`) BUCKETS 1 + PROPERTIES + ( + "replication_num" = "1" + ); + """ + List> result1 = sql "show partitions from add_multi_partition;" + assertEquals(result1.size(), 1) + sql "ALTER TABLE add_multi_partition ADD PARTITIONS FROM ('2000-02-05') TO ('2000-03-01') INTERVAL 4 DAY;" + List> result2 = sql "show partitions from add_multi_partition;" + assertEquals(result2.size(), 8) + def partitionName = sql "show partitions from add_multi_partition where PartitionName = 'p_20000229';" + for (pn in partitionName) { + assertTrue(pn[1] == "p_20000229") + } + sql "DROP TABLE IF EXISTS add_multi_partition FORCE" + + + sql """ + CREATE TABLE `add_multi_partition` ( + `k1` LARGEINT NOT NULL, + `date` DATE NOT NULL, + `k2` VARCHAR(20) NULL + ) ENGINE=OLAP + UNIQUE KEY(`k1`, `date`) + PARTITION BY RANGE(`date`) + (PARTITION p_2024_01 VALUES [('2024-01-01'), ('2024-01-08')), + PARTITION p_2024_02 VALUES [('2024-01-08'), ('2024-01-15')), + PARTITION p_2024_03 VALUES [('2024-01-15'), ('2024-01-22')), + PARTITION p_2024_04 VALUES [('2024-01-22'), ('2024-01-29')), + PARTITION p_2024_05 VALUES [('2024-01-29'), ('2024-02-05')), + PARTITION p_2024_06 VALUES [('2024-02-05'), ('2024-02-12')), + PARTITION p_2024_07 VALUES [('2024-02-12'), ('2024-02-19')), + PARTITION p_2024_08 VALUES [('2024-02-19'), ('2024-02-26')), + PARTITION p_2024_09 VALUES [('2024-02-26'), ('2024-03-01'))) + DISTRIBUTED BY HASH(`k1`) BUCKETS 1 + PROPERTIES + ( + "replication_num" = "1" + ); + """ + List> result3 = sql "show partitions from add_multi_partition;" + assertEquals(result3.size(), 9) + sql "ALTER TABLE add_multi_partition ADD PARTITIONS FROM ('2024-04-01') TO ('2025-01-01') INTERVAL 1 WEEK;" + List> result4 = sql "show partitions from add_multi_partition;" + assertEquals(result4.size(), 49) + sql "DROP TABLE IF EXISTS add_multi_partition FORCE" + + + sql """ + CREATE TABLE IF NOT EXISTS add_multi_partition + ( + `k1` LARGEINT NOT NULL, + `age` SMALLINT, + `k2` VARCHAR(20) + ) + ENGINE=OLAP + UNIQUE KEY(`k1`, `age`) + PARTITION BY RANGE(`age`) + ( + FROM (1) TO (100) INTERVAL 10 + ) + DISTRIBUTED BY HASH(`k1`) BUCKETS 1 + PROPERTIES + ( + "replication_num" = "1" + ); + """ + List> result7 = sql "show partitions from add_multi_partition;" + assertEquals(result7.size(), 10) + sql "ALTER TABLE add_multi_partition ADD PARTITIONS FROM (100) TO (200) INTERVAL 10;" + List> result8 = sql "show partitions from add_multi_partition;" + assertEquals(result8.size(), 20) + sql "DROP TABLE IF EXISTS add_multi_partition FORCE" + +} \ No newline at end of file From 51d0dfd58e93a3fec69f8e834c03065f9a66abfc Mon Sep 17 00:00:00 2001 From: walter Date: Wed, 14 Aug 2024 23:22:30 +0800 Subject: [PATCH 07/13] [chore](recycler) log num deleted objects (#39353) --- cloud/src/recycler/obj_storage_client.cpp | 14 +++++++++++++- cloud/src/recycler/recycler.cpp | 12 ++++-------- cloud/src/recycler/sync_executor.h | 6 ++---- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/cloud/src/recycler/obj_storage_client.cpp b/cloud/src/recycler/obj_storage_client.cpp index 855fa110a4ce92..3402bb334680bf 100644 --- a/cloud/src/recycler/obj_storage_client.cpp +++ b/cloud/src/recycler/obj_storage_client.cpp @@ -17,9 +17,12 @@ #include "recycler/obj_storage_client.h" +#include + #include "cpp/sync_point.h" #include "recycler/sync_executor.h" -#include "recycler/util.h" + +using namespace std::chrono; namespace doris::cloud { @@ -28,6 +31,9 @@ ObjectStorageResponse ObjStorageClient::delete_objects_recursively_(ObjectStorag int64_t expired_time, size_t batch_size) { TEST_SYNC_POINT_CALLBACK("ObjStorageClient::delete_objects_recursively_", &batch_size); + size_t num_deleted_objects = 0; + auto start_time = steady_clock::now(); + auto list_iter = list_objects(path); ObjectStorageResponse ret; @@ -42,6 +48,7 @@ ObjectStorageResponse ObjStorageClient::delete_objects_recursively_(ObjectStorag continue; } + num_deleted_objects++; keys.emplace_back(std::move(obj->key)); if (keys.size() < batch_size) { continue; @@ -70,6 +77,11 @@ ObjectStorageResponse ObjStorageClient::delete_objects_recursively_(ObjectStorag } } + auto elapsed = duration_cast(steady_clock::now() - start_time).count(); + LOG(INFO) << "delete objects under " << path.bucket << "/" << path.key + << " finished, ret=" << ret.ret << ", finished=" << finished + << ", num_deleted_objects=" << num_deleted_objects << ", cost=" << elapsed << " ms"; + ret = finished ? ret : -1; return ret; diff --git a/cloud/src/recycler/recycler.cpp b/cloud/src/recycler/recycler.cpp index 0b2267e601dbe3..1ba33957a20e7f 100644 --- a/cloud/src/recycler/recycler.cpp +++ b/cloud/src/recycler/recycler.cpp @@ -1131,7 +1131,6 @@ int InstanceRecycler::recycle_tablets(int64_t table_id, int64_t index_id, int64_ return {std::string_view(), range_move}; } ++num_recycled; - LOG_INFO("k is {}, is empty {}", k, k.empty()); return {k, range_move}; }); } else { @@ -1157,10 +1156,7 @@ int InstanceRecycler::recycle_tablets(int64_t table_id, int64_t index_id, int64_ } return true; }()); - sync_executor.add([k]() mutable -> TabletKeyPair { - LOG_INFO("k is {}, is empty {}", k, k.empty()); - return {k, true}; - }); + sync_executor.add([k]() mutable -> TabletKeyPair { return {k, true}; }); ++num_recycled; } return 0; @@ -1433,7 +1429,7 @@ int InstanceRecycler::recycle_tablet(int64_t tablet_id) { std::unique_ptr> defer_log_statistics((int*)0x01, [&](int*) { auto cost = duration(steady_clock::now() - start_time).count(); - LOG_INFO("recycle rowsets finished, cost={}s", cost) + LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost) .tag("instance_id", instance_id_) .tag("tablet_id", tablet_id); }); @@ -1618,7 +1614,7 @@ int InstanceRecycler::recycle_rowsets() { // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv. LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key=" << hex(k) << " value=" << proto_to_json(rowset); - rowset_keys.push_back(std::string(k)); + rowset_keys.emplace_back(k); return -1; } // decode rowset_id @@ -1664,7 +1660,7 @@ int InstanceRecycler::recycle_rowsets() { return -1; } } else { - rowset_keys.push_back(std::string(k)); + rowset_keys.emplace_back(k); if (rowset_meta->num_segments() > 0) { // Skip empty rowset rowsets.push_back(std::move(*rowset_meta)); } diff --git a/cloud/src/recycler/sync_executor.h b/cloud/src/recycler/sync_executor.h index d7009a99ed436c..c84e5e22467a9c 100644 --- a/cloud/src/recycler/sync_executor.h +++ b/cloud/src/recycler/sync_executor.h @@ -23,10 +23,8 @@ #include #include -#include #include -#include "common/logging.h" #include "common/simple_thread_pool.h" namespace doris::cloud { @@ -54,10 +52,10 @@ class SyncExecutor { auto current_time_second = time(nullptr); current_time.tv_sec = current_time_second + 300; current_time.tv_nsec = 0; - auto msg = fmt::format("{} has already taken 5 min", _name_tag); while (0 != _count.timed_wait(current_time)) { current_time.tv_sec += 300; - LOG(WARNING) << msg; + LOG(WARNING) << _name_tag << " has already taken 5 min, cost: " + << time(nullptr) - current_time_second << " seconds"; } *finished = !_stop_token; std::vector res; From 5b7278fea00089af28bcce641c459fffe5df3c2d Mon Sep 17 00:00:00 2001 From: elon-X <34712973+elon-X@users.noreply.github.com> Date: Wed, 14 Aug 2024 23:27:12 +0800 Subject: [PATCH 08/13] [opt](mtmv) support creating views based on Hive tables with the LIMIT (#38812) --- .../exploration/mv/MaterializedViewUtils.java | 2 ++ .../mv/MaterializedViewUtilsTest.java | 20 ++++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java index 429afcc0419618..a6baed9d085cee 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java @@ -59,6 +59,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalFileScan; import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalLimit; import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; @@ -509,6 +510,7 @@ public Void visitLogicalWindow(LogicalWindow window, IncrementCh @Override public Void visit(Plan plan, IncrementCheckerContext context) { if (plan instanceof LogicalProject + || plan instanceof LogicalLimit || plan instanceof LogicalFilter || plan instanceof LogicalJoin || plan instanceof LogicalAggregate diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtilsTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtilsTest.java index 9ac6d7d3a753ed..b44e1cc3ec681a 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtilsTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtilsTest.java @@ -286,7 +286,7 @@ public void getRelatedTableInfoTestWithoutGroupTest() { + "JOIN " + "(SELECT abs(sqrt(PS_SUPPLYCOST)) as c2_abs, PS_AVAILQTY, PS_PARTKEY, PS_SUPPKEY " + "FROM partsupp) as ps " - + "ON l.L_PARTKEY = ps.PS_PARTKEY and l.L_SUPPKEY = ps.PS_SUPPKEY", + + "ON l.L_PARTKEY = ps.PS_PARTKEY and l.L_SUPPKEY = ps.PS_SUPPKEY limit 1", nereidsPlanner -> { Plan rewrittenPlan = nereidsPlanner.getRewrittenPlan(); RelatedTableInfo relatedTableInfo = @@ -586,6 +586,24 @@ public void getRelatedTableInfoTestWithWindowButNotPartitionTest() { }); } + @Test + public void getRelatedTableInfoTestWithLimitTest() { + PlanChecker.from(connectContext) + .checkExplain("SELECT l.L_SHIPDATE, l.L_ORDERKEY " + + "FROM " + + "lineitem as l limit 1", + nereidsPlanner -> { + Plan rewrittenPlan = nereidsPlanner.getRewrittenPlan(); + RelatedTableInfo relatedTableInfo = + MaterializedViewUtils.getRelatedTableInfo("l_shipdate", null, + rewrittenPlan, nereidsPlanner.getCascadesContext()); + checkRelatedTableInfo(relatedTableInfo, + "lineitem", + "L_SHIPDATE", + true); + }); + } + @Test public void testPartitionDateTrunc() { PlanChecker.from(connectContext) From 15095333af60761dc0eedb59f55d02bc1894f8d7 Mon Sep 17 00:00:00 2001 From: Gavin Chou Date: Thu, 15 Aug 2024 09:31:31 +0800 Subject: [PATCH 09/13] [fix](file cache) Fix data race of rowset meta when download segment data (#39361) The original impl. _rs_metas is not protected correctly when doing traversal. We have to access the rowset meta via tablet level interface to ensure integrity. --- be/src/cloud/cloud_warm_up_manager.cpp | 12 +++++++++++- be/src/io/cache/block_file_cache_downloader.cpp | 12 +++++++++++- be/src/olap/tablet_meta.h | 12 ------------ 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/be/src/cloud/cloud_warm_up_manager.cpp b/be/src/cloud/cloud_warm_up_manager.cpp index 47046de36985ad..07beeaeb078a46 100644 --- a/be/src/cloud/cloud_warm_up_manager.cpp +++ b/be/src/cloud/cloud_warm_up_manager.cpp @@ -49,6 +49,16 @@ CloudWarmUpManager::~CloudWarmUpManager() { } } +std::unordered_map snapshot_rs_metas(BaseTablet* tablet) { + std::unordered_map id_to_rowset_meta_map; + auto visitor = [&id_to_rowset_meta_map](const RowsetSharedPtr& r) { + id_to_rowset_meta_map.emplace(r->rowset_meta()->rowset_id().to_string(), r->rowset_meta()); + }; + constexpr bool include_stale = false; + tablet->traverse_rowsets(visitor, include_stale); + return id_to_rowset_meta_map; +} + void CloudWarmUpManager::handle_jobs() { #ifndef BE_TEST constexpr int WAIT_TIME_SECONDS = 600; @@ -78,7 +88,7 @@ void CloudWarmUpManager::handle_jobs() { std::shared_ptr wait = std::make_shared(0); auto tablet_meta = tablet->tablet_meta(); - auto rs_metas = tablet_meta->snapshot_rs_metas(); + auto rs_metas = snapshot_rs_metas(tablet.get()); for (auto& [_, rs] : rs_metas) { for (int64_t seg_id = 0; seg_id < rs->num_segments(); seg_id++) { auto storage_resource = rs->remote_storage_resource(); diff --git a/be/src/io/cache/block_file_cache_downloader.cpp b/be/src/io/cache/block_file_cache_downloader.cpp index 585c0ff015993b..026f7e2a01741d 100644 --- a/be/src/io/cache/block_file_cache_downloader.cpp +++ b/be/src/io/cache/block_file_cache_downloader.cpp @@ -130,6 +130,16 @@ void FileCacheBlockDownloader::check_download_task(const std::vector& t } } +std::unordered_map snapshot_rs_metas(BaseTablet* tablet) { + std::unordered_map id_to_rowset_meta_map; + auto visitor = [&id_to_rowset_meta_map](const RowsetSharedPtr& r) { + id_to_rowset_meta_map.emplace(r->rowset_meta()->rowset_id().to_string(), r->rowset_meta()); + }; + constexpr bool include_stale = false; + tablet->traverse_rowsets(visitor, include_stale); + return id_to_rowset_meta_map; +} + void FileCacheBlockDownloader::download_file_cache_block( const DownloadTask::FileCacheBlockMetaVec& metas) { std::ranges::for_each(metas, [&](const FileCacheBlockMeta& meta) { @@ -141,7 +151,7 @@ void FileCacheBlockDownloader::download_file_cache_block( tablet = std::move(res).value(); } - auto id_to_rowset_meta_map = tablet->tablet_meta()->snapshot_rs_metas(); + auto id_to_rowset_meta_map = snapshot_rs_metas(tablet.get()); auto find_it = id_to_rowset_meta_map.find(meta.rowset_id()); if (find_it == id_to_rowset_meta_map.end()) { return; diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h index bb6b5b8cd51725..41455c051c7f44 100644 --- a/be/src/olap/tablet_meta.h +++ b/be/src/olap/tablet_meta.h @@ -192,9 +192,6 @@ class TabletMeta { void revise_delete_bitmap_unlocked(const DeleteBitmap& delete_bitmap); const std::vector& all_stale_rs_metas() const; - // return the snapshot of rowset_meta - // the return value is map - std::unordered_map snapshot_rs_metas() const; RowsetMetaSharedPtr acquire_rs_meta_by_version(const Version& version) const; void delete_stale_rs_meta_by_version(const Version& version); RowsetMetaSharedPtr acquire_stale_rs_meta_by_version(const Version& version) const; @@ -698,15 +695,6 @@ inline bool TabletMeta::all_beta() const { return true; } -inline std::unordered_map TabletMeta::snapshot_rs_metas() const { - std::unordered_map id_to_rowset_meta_map; - std::shared_lock rlock(_meta_lock); - std::for_each(_rs_metas.cbegin(), _rs_metas.cend(), [&](const auto& rowset_meta) { - id_to_rowset_meta_map.emplace(rowset_meta->rowset_id().to_string(), rowset_meta); - }); - return id_to_rowset_meta_map; -} - std::string tablet_state_name(TabletState state); // Only for unit test now. From d0d3ad0dd31f4e3c0cdf79dba39814c8cf850e16 Mon Sep 17 00:00:00 2001 From: meiyi Date: Thu, 15 Aug 2024 09:36:27 +0800 Subject: [PATCH 10/13] [fix](group commit) fix group commit core if be inject FragmentMgr.exec_plan_fragment.failed (#39339) ``` *** SIGSEGV address not mapped to object (@0x0) received by PID 1898955 (TID 1900522 OR 0x7f4f94abc640) from PID 0; stack trace: *** 0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at /home/zcp/repo_center/doris_branch-3.0.2-tmp/doris/be/src/common/signal_handler.h:421 1# PosixSignals::chained_handler(int, siginfo*, void*) [clone .part.0] in /usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so 2# JVM_handle_linux_signal in /usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so 3# 0x00007F5335001520 in /lib/x86_64-linux-gnu/libc.so.6 4# brpc::Socket::Write(brpc::SocketMessagePtr&, brpc::Socket::WriteOptions const*) in /mnt/disk1/STRESS_ENV/be/lib/doris_be 5# brpc::policy::HttpResponseSender::~HttpResponseSender() in /mnt/disk1/STRESS_ENV/be/lib/doris_be 6# brpc::policy::HttpResponseSenderAsDone::~HttpResponseSenderAsDone() in /mnt/disk1/STRESS_ENV/be/lib/doris_be 7# std::_Function_handler::_M_invoke(std::_Any_data const&) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:291 8# doris::WorkThreadPool::work_thread(int) at /home/zcp/repo_center/doris_branch-3.0.2-tmp/doris/be/src/util/work_thread_pool.hpp:159 ``` --- be/src/service/internal_service.cpp | 5 +- .../test_group_commit_error.groovy | 55 +++++++++++++++++++ 2 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 regression-test/suites/insert_p0/group_commit/test_group_commit_error.groovy diff --git a/be/src/service/internal_service.cpp b/be/src/service/internal_service.cpp index a60b279391cd56..d8cd7356ddb4c3 100644 --- a/be/src/service/internal_service.cpp +++ b/be/src/service/internal_service.cpp @@ -2063,10 +2063,11 @@ void PInternalService::group_commit_insert(google::protobuf::RpcController* cont st = Status::Error(ErrorCode::INTERNAL_ERROR, "_exec_plan_fragment_impl meet unknown error"); } + closure_guard.release(); if (!st.ok()) { - LOG(WARNING) << "exec plan fragment failed, errmsg=" << st; + LOG(WARNING) << "exec plan fragment failed, load_id=" << print_id(load_id) + << ", errmsg=" << st; } else { - closure_guard.release(); for (int i = 0; i < request->data().size(); ++i) { std::unique_ptr row(new PDataRow()); row->CopyFrom(request->data(i)); diff --git a/regression-test/suites/insert_p0/group_commit/test_group_commit_error.groovy b/regression-test/suites/insert_p0/group_commit/test_group_commit_error.groovy new file mode 100644 index 00000000000000..1416a86e5e9c8b --- /dev/null +++ b/regression-test/suites/insert_p0/group_commit/test_group_commit_error.groovy @@ -0,0 +1,55 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_group_commit_error", "nonConcurrent") { + def tableName = "test_group_commit_error" + + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} ( + `k` int , + `v` int , + ) engine=olap + DISTRIBUTED BY HASH(`k`) + BUCKETS 5 + properties("replication_num" = "1", "group_commit_interval_ms"="2000") + """ + + GetDebugPoint().clearDebugPointsForAllBEs() + GetDebugPoint().clearDebugPointsForAllFEs() + try { + GetDebugPoint().enableDebugPointForAllBEs("FragmentMgr.exec_plan_fragment.failed") + sql """ set group_commit = async_mode """ + sql """ insert into ${tableName} values (1, 1) """ + assertTrue(false) + } catch (Exception e) { + logger.info("failed: " + e.getMessage()) + } finally { + GetDebugPoint().clearDebugPointsForAllBEs() + } + + try { + GetDebugPoint().enableDebugPointForAllBEs("FragmentMgr.exec_plan_fragment.failed") + sql """ set group_commit = async_mode """ + sql """ set enable_nereids_planner = false """ + sql """ insert into ${tableName} values (2, 2) """ + } catch (Exception e) { + logger.info("failed: " + e.getMessage()) + } finally { + GetDebugPoint().clearDebugPointsForAllBEs() + } +} \ No newline at end of file From a67429de1e5772140f1493b461a207cb414dc2b1 Mon Sep 17 00:00:00 2001 From: starocean999 <40539150+starocean999@users.noreply.github.com> Date: Thu, 15 Aug 2024 09:49:19 +0800 Subject: [PATCH 11/13] [fix](nereids)fix unstable test case (#39244) --- .../nereids_syntax_p0/rollup/date.groovy | 53 ++----------------- 1 file changed, 4 insertions(+), 49 deletions(-) diff --git a/regression-test/suites/nereids_syntax_p0/rollup/date.groovy b/regression-test/suites/nereids_syntax_p0/rollup/date.groovy index 4fdcd4f2c83b16..c91c219c5f9669 100644 --- a/regression-test/suites/nereids_syntax_p0/rollup/date.groovy +++ b/regression-test/suites/nereids_syntax_p0/rollup/date.groovy @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. import java.util.concurrent.TimeUnit -import org.awaitility.Awaitility suite("date", "rollup") { @@ -24,10 +23,6 @@ suite("date", "rollup") { def tbName1 = "test_materialized_view_date1" - def getJobState = { tableName -> - def jobStateResult = sql """ SHOW ALTER TABLE MATERIALIZED VIEW WHERE TableName='${tableName}' ORDER BY CreateTime DESC LIMIT 1; """ - return jobStateResult[0][8] - } sql "DROP TABLE IF EXISTS ${tbName1}" sql """ CREATE TABLE IF NOT EXISTS ${tbName1}( @@ -44,51 +39,11 @@ suite("date", "rollup") { DISTRIBUTED BY HASH(record_id) properties("replication_num" = "1"); """ - int max_try_secs = 120 - sql "CREATE materialized VIEW amt_max1 AS SELECT store_id, max(sale_date1) FROM ${tbName1} GROUP BY store_id;" - String res = "NOT_FINISHED" - Awaitility.await().atMost(max_try_secs, TimeUnit.SECONDS).with().pollDelay(100, TimeUnit.MILLISECONDS).await().until(() -> { - res = getJobState(tbName1) - if (res == "FINISHED" || res == "CANCELLED") { - assertEquals("FINISHED", res) - return true; - } - return false; - }); - assertEquals("FINISHED",res) - - sql "CREATE materialized VIEW amt_max2 AS SELECT store_id, max(sale_datetime1) FROM ${tbName1} GROUP BY store_id;" - Awaitility.await().atMost(max_try_secs, TimeUnit.SECONDS).with().pollDelay(100, TimeUnit.MILLISECONDS).await().until(() -> { - res = getJobState(tbName1) - if (res == "FINISHED" || res == "CANCELLED") { - assertEquals("FINISHED", res) - return true; - } - return false; - }); - assertEquals("FINISHED",res) - - sql "CREATE materialized VIEW amt_max3 AS SELECT store_id, max(sale_datetime2) FROM ${tbName1} GROUP BY store_id;" - Awaitility.await().atMost(max_try_secs, TimeUnit.SECONDS).with().pollDelay(100, TimeUnit.MILLISECONDS).await().until(() -> { - res = getJobState(tbName1) - if (res == "FINISHED" || res == "CANCELLED") { - assertEquals("FINISHED", res) - return true; - } - return false; - }); - assertEquals("FINISHED",res) + createMV("CREATE materialized VIEW amt_max1 AS SELECT store_id, max(sale_date1) FROM ${tbName1} GROUP BY store_id;") + createMV("CREATE materialized VIEW amt_max2 AS SELECT store_id, max(sale_datetime1) FROM ${tbName1} GROUP BY store_id;") + createMV("CREATE materialized VIEW amt_max3 AS SELECT store_id, max(sale_datetime2) FROM ${tbName1} GROUP BY store_id;") + createMV("CREATE materialized VIEW amt_max4 AS SELECT store_id, max(sale_datetime3) FROM ${tbName1} GROUP BY store_id;") - sql "CREATE materialized VIEW amt_max4 AS SELECT store_id, max(sale_datetime3) FROM ${tbName1} GROUP BY store_id;" - Awaitility.await().atMost(max_try_secs, TimeUnit.SECONDS).with().pollDelay(100, TimeUnit.MILLISECONDS).await().until(() -> { - res = getJobState(tbName1) - if (res == "FINISHED" || res == "CANCELLED") { - assertEquals("FINISHED", res) - return true; - } - return false; - }); - assertEquals("FINISHED",res) sql "SHOW ALTER TABLE MATERIALIZED VIEW WHERE TableName='${tbName1}';" sql "insert into ${tbName1} values(1, 1, 1, '2020-05-30', '2020-05-30', '2020-05-30 11:11:11.111111', '2020-05-30 11:11:11.111111', '2020-05-30 11:11:11.111111',100);" From 520c8538fd2f5ea9034445f7a5456a2349a168d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=8B=8F=E5=B0=8F=E5=88=9A?= Date: Thu, 15 Aug 2024 10:23:52 +0800 Subject: [PATCH 12/13] [fix](regress-test) fix presto dialect regression-test case (#39326) ## Proposed changes disable the unavailable sql of presto dialect --- .../sql/presto/scalar/TestJsonFunctions.out | 37 +++++++++---------- .../sql/presto/scalar/TestJsonFunctions.sql | 2 +- 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/regression-test/data/dialect_compatible/sql/presto/scalar/TestJsonFunctions.out b/regression-test/data/dialect_compatible/sql/presto/scalar/TestJsonFunctions.out index 02b29e52056be5..b34789e4d73826 100644 --- a/regression-test/data/dialect_compatible/sql/presto/scalar/TestJsonFunctions.out +++ b/regression-test/data/dialect_compatible/sql/presto/scalar/TestJsonFunctions.out @@ -546,46 +546,46 @@ false \N -- !TestJsonFunctions_183 -- -\N +3.14 -- !TestJsonFunctions_184 -- -3.14 +3.54 -- !TestJsonFunctions_185 -- -3.54 +5.02 -- !TestJsonFunctions_186 -- -5.02 +3.14 -- !TestJsonFunctions_187 -- -3.14 +3.54 -- !TestJsonFunctions_188 -- -3.54 +5.02 -- !TestJsonFunctions_189 -- -5.02 +\N -- !TestJsonFunctions_190 -- -\N +true -- !TestJsonFunctions_191 -- -true +false -- !TestJsonFunctions_192 -- false -- !TestJsonFunctions_193 -- -false +true -- !TestJsonFunctions_194 -- -true +false -- !TestJsonFunctions_195 -- false -- !TestJsonFunctions_196 -- -false +\N -- !TestJsonFunctions_197 -- \N @@ -618,13 +618,13 @@ false \N -- !TestJsonFunctions_207 -- -\N +{} -- !TestJsonFunctions_208 -- {} -- !TestJsonFunctions_209 -- -{} +\N -- !TestJsonFunctions_210 -- \N @@ -636,13 +636,13 @@ false \N -- !TestJsonFunctions_213 -- -\N +3 -- !TestJsonFunctions_214 -- 3 -- !TestJsonFunctions_215 -- -3 +\N -- !TestJsonFunctions_216 -- \N @@ -651,11 +651,8 @@ false \N -- !TestJsonFunctions_218 -- -\N - --- !TestJsonFunctions_219 -- 3 --- !TestJsonFunctions_220 -- +-- !TestJsonFunctions_219 -- \N diff --git a/regression-test/suites/dialect_compatible/sql/presto/scalar/TestJsonFunctions.sql b/regression-test/suites/dialect_compatible/sql/presto/scalar/TestJsonFunctions.sql index d70fc3b30f90d7..4cfd45753326e1 100644 --- a/regression-test/suites/dialect_compatible/sql/presto/scalar/TestJsonFunctions.sql +++ b/regression-test/suites/dialect_compatible/sql/presto/scalar/TestJsonFunctions.sql @@ -322,7 +322,7 @@ SELECT json_array_get('[]', null); SELECT json_array_get('[1]', null); SELECT json_array_get('', null); SELECT json_array_get('', 1); -SELECT json_array_get('', -1); +-- SELECT json_array_get('', -1); # error: errCode = 2, detailMessage = (127.0.0.1)[INVALID_ARGUMENT]Json path error: Invalid Json Path for value: -1 -- SELECT json_array_get('[1]', -9223372036854775807 - 1); # error: errCode = 2, detailMessage = (127.0.0.1)[INVALID_ARGUMENT]Json path error: Invalid Json Path for value: -9223372036854775808 SELECT json_array_get('[\, ]', 0); SELECT json_array_get('[\, , null]', 1); From dedf15c6df57a20b86b2772054449074ae4b1a64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=8B=8F=E5=B0=8F=E5=88=9A?= Date: Thu, 15 Aug 2024 10:24:12 +0800 Subject: [PATCH 13/13] [fix])(function) add function regexp_extract_or_null (#38296) ## Proposed changes Add function regexp_extract_or_null to be compatible with presto. The function is same as regexp_extract, except that it returns null when no match is found. --- be/src/vec/functions/function_regexp.cpp | 18 +++-- be/test/vec/function/function_like_test.cpp | 39 ++++++++++ .../doris/catalog/BuiltinScalarFunctions.java | 2 + .../functions/scalar/RegexpExtractOrNull.java | 73 +++++++++++++++++++ .../visitor/ScalarFunctionVisitor.java | 5 ++ .../test_string_function_regexp.out | 12 +++ .../test_string_function_regexp.groovy | 5 ++ 7 files changed, 148 insertions(+), 6 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/RegexpExtractOrNull.java diff --git a/be/src/vec/functions/function_regexp.cpp b/be/src/vec/functions/function_regexp.cpp index 4da133a6f51ede..525d99b6cc7dc2 100644 --- a/be/src/vec/functions/function_regexp.cpp +++ b/be/src/vec/functions/function_regexp.cpp @@ -184,8 +184,9 @@ struct RegexpReplaceOneImpl { } }; +template struct RegexpExtractImpl { - static constexpr auto name = "regexp_extract"; + static constexpr auto name = ReturnNull ? "regexp_extract_or_null" : "regexp_extract"; // 3 args static void execute_impl(FunctionContext* context, ColumnPtr argument_columns[], size_t input_rows_count, ColumnString::Chars& result_data, @@ -201,7 +202,8 @@ struct RegexpExtractImpl { } const auto& index_data = index_col->get_int(i); if (index_data < 0) { - StringOP::push_empty_string(i, result_data, result_offset); + ReturnNull ? StringOP::push_null_string(i, result_data, result_offset, null_map) + : StringOP::push_empty_string(i, result_data, result_offset); continue; } _execute_inner_loop(context, str_col, pattern_col, index_data, result_data, @@ -220,7 +222,8 @@ struct RegexpExtractImpl { const auto& index_data = index_col->get_int(0); if (index_data < 0) { for (size_t i = 0; i < input_rows_count; ++i) { - StringOP::push_empty_string(i, result_data, result_offset); + ReturnNull ? StringOP::push_null_string(i, result_data, result_offset, null_map) + : StringOP::push_empty_string(i, result_data, result_offset); } return; } @@ -260,7 +263,8 @@ struct RegexpExtractImpl { int max_matches = 1 + re->NumberOfCapturingGroups(); if (index_data >= max_matches) { - StringOP::push_empty_string(index_now, result_data, result_offset); + ReturnNull ? StringOP::push_null_string(index_now, result_data, result_offset, null_map) + : StringOP::push_empty_string(index_now, result_data, result_offset); return; } @@ -268,7 +272,8 @@ struct RegexpExtractImpl { bool success = re->Match(str_sp, 0, str.size, re2::RE2::UNANCHORED, &matches[0], max_matches); if (!success) { - StringOP::push_empty_string(index_now, result_data, result_offset); + ReturnNull ? StringOP::push_null_string(index_now, result_data, result_offset, null_map) + : StringOP::push_empty_string(index_now, result_data, result_offset); return; } const re2::StringPiece& match = matches[index_data]; @@ -486,7 +491,8 @@ class FunctionRegexp : public IFunction { void register_function_regexp_extract(SimpleFunctionFactory& factory) { factory.register_function>(); - factory.register_function>(); + factory.register_function>>(); + factory.register_function>>(); factory.register_function>(); factory.register_function>(); } diff --git a/be/test/vec/function/function_like_test.cpp b/be/test/vec/function/function_like_test.cpp index 794bc8c5eae259..e39b2cf43b0243 100644 --- a/be/test/vec/function/function_like_test.cpp +++ b/be/test/vec/function/function_like_test.cpp @@ -155,6 +155,45 @@ TEST(FunctionLikeTest, regexp_extract) { } } +TEST(FunctionLikeTest, regexp_extract_or_null) { + std::string func_name = "regexp_extract_or_null"; + + DataSet data_set = {{{std::string("x=a3&x=18abc&x=2&y=3&x=4"), + std::string("x=([0-9]+)([a-z]+)"), (int64_t)0}, + std::string("x=18abc")}, + {{std::string("x=a3&x=18abc&x=2&y=3&x=4"), + std::string("^x=([a-z]+)([0-9]+)"), (int64_t)0}, + std::string("x=a3")}, + {{std::string("x=a3&x=18abc&x=2&y=3&x=4"), + std::string("^x=([a-z]+)([0-9]+)"), (int64_t)1}, + std::string("a")}, + {{std::string("http://a.m.baidu.com/i41915173660.htm"), + std::string("i([0-9]+)"), (int64_t)0}, + std::string("i41915173660")}, + {{std::string("http://a.m.baidu.com/i41915173660.htm"), + std::string("i([0-9]+)"), (int64_t)1}, + std::string("41915173660")}, + + {{std::string("hitdecisiondlist"), std::string("(i)(.*?)(e)"), (int64_t)0}, + std::string("itde")}, + {{std::string("hitdecisiondlist"), std::string("(i)(.*?)(e)"), (int64_t)1}, + std::string("i")}, + {{std::string("hitdecisiondlist"), std::string("(i)(.*?)(e)"), (int64_t)2}, + std::string("td")}, + // null + {{std::string("abc"), Null(), (int64_t)0}, Null()}, + {{Null(), std::string("i([0-9]+)"), (int64_t)0}, Null()}}; + + // pattern is constant value + InputTypeSet const_pattern_input_types = {TypeIndex::String, Consted {TypeIndex::String}, + TypeIndex::Int64}; + for (const auto& line : data_set) { + DataSet const_pattern_dataset = {line}; + static_cast(check_function(func_name, const_pattern_input_types, + const_pattern_dataset)); + } +} + TEST(FunctionLikeTest, regexp_extract_all) { std::string func_name = "regexp_extract_all"; diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java index 76c6f0663013ce..cddacdefc53490 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java @@ -341,6 +341,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.RandomBytes; import org.apache.doris.nereids.trees.expressions.functions.scalar.RegexpExtract; import org.apache.doris.nereids.trees.expressions.functions.scalar.RegexpExtractAll; +import org.apache.doris.nereids.trees.expressions.functions.scalar.RegexpExtractOrNull; import org.apache.doris.nereids.trees.expressions.functions.scalar.RegexpReplace; import org.apache.doris.nereids.trees.expressions.functions.scalar.RegexpReplaceOne; import org.apache.doris.nereids.trees.expressions.functions.scalar.Repeat; @@ -811,6 +812,7 @@ public class BuiltinScalarFunctions implements FunctionHelper { scalar(Regexp.class, "regexp"), scalar(RegexpExtract.class, "regexp_extract"), scalar(RegexpExtractAll.class, "regexp_extract_all"), + scalar(RegexpExtractOrNull.class, "regexp_extract_or_null"), scalar(RegexpReplace.class, "regexp_replace"), scalar(RegexpReplaceOne.class, "regexp_replace_one"), scalar(Repeat.class, "repeat"), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/RegexpExtractOrNull.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/RegexpExtractOrNull.java new file mode 100644 index 00000000000000..78f94db338d280 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/RegexpExtractOrNull.java @@ -0,0 +1,73 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.scalar; + +import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; +import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; +import org.apache.doris.nereids.trees.expressions.shape.TernaryExpression; +import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.BigIntType; +import org.apache.doris.nereids.types.StringType; +import org.apache.doris.nereids.types.VarcharType; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import java.util.List; + +/** + * ScalarFunction 'regexp_extract_or_null'. This class is generated by GenerateFunction. + */ +public class RegexpExtractOrNull extends ScalarFunction + implements TernaryExpression, ExplicitlyCastableSignature, AlwaysNullable { + + public static final List SIGNATURES = ImmutableList.of( + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT) + .args(VarcharType.SYSTEM_DEFAULT, VarcharType.SYSTEM_DEFAULT, BigIntType.INSTANCE), + FunctionSignature.ret(StringType.INSTANCE) + .args(StringType.INSTANCE, StringType.INSTANCE, BigIntType.INSTANCE) + ); + + /** + * constructor with 3 arguments. + */ + public RegexpExtractOrNull(Expression arg0, Expression arg1, Expression arg2) { + super("regexp_extract_or_null", arg0, arg1, arg2); + } + + /** + * withChildren. + */ + @Override + public RegexpExtractOrNull withChildren(List children) { + Preconditions.checkArgument(children.size() == 3); + return new RegexpExtractOrNull(children.get(0), children.get(1), children.get(2)); + } + + @Override + public List getSignatures() { + return SIGNATURES; + } + + @Override + public R accept(ExpressionVisitor visitor, C context) { + return visitor.visitRegexpExtractOrNull(this, context); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java index 78d8ca0f7019fb..093bf9f1acf39f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java @@ -340,6 +340,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.RandomBytes; import org.apache.doris.nereids.trees.expressions.functions.scalar.RegexpExtract; import org.apache.doris.nereids.trees.expressions.functions.scalar.RegexpExtractAll; +import org.apache.doris.nereids.trees.expressions.functions.scalar.RegexpExtractOrNull; import org.apache.doris.nereids.trees.expressions.functions.scalar.RegexpReplace; import org.apache.doris.nereids.trees.expressions.functions.scalar.RegexpReplaceOne; import org.apache.doris.nereids.trees.expressions.functions.scalar.Repeat; @@ -1726,6 +1727,10 @@ default R visitRegexpExtractAll(RegexpExtractAll regexpExtractAll, C context) { return visitScalarFunction(regexpExtractAll, context); } + default R visitRegexpExtractOrNull(RegexpExtractOrNull regexpExtractOrNull, C context) { + return visitScalarFunction(regexpExtractOrNull, context); + } + default R visitRegexpReplace(RegexpReplace regexpReplace, C context) { return visitScalarFunction(regexpReplace, context); } diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out index cfe2fd3eaf7c85..60719fded1add2 100644 --- a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out +++ b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out @@ -49,6 +49,18 @@ b -- !sql -- d +-- !sql -- + + +-- !sql -- +b + +-- !sql -- +d + +-- !sql -- +\N + -- !sql -- ['18','17'] diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy index 5926492ac4d6aa..2066452b0d6c4e 100644 --- a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy +++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy @@ -51,6 +51,11 @@ suite("test_string_function_regexp") { qt_sql "SELECT regexp_extract('AbCdE', '([[:lower:]]+)C([[:lower:]]+)', 1);" qt_sql "SELECT regexp_extract('AbCdE', '([[:lower:]]+)C([[:lower:]]+)', 2);" + qt_sql "SELECT regexp_extract('AbCdE', '([[:lower:]]+)C([[:lower:]]+)', 3);" + + qt_sql "SELECT regexp_extract_or_null('AbCdE', '([[:lower:]]+)C([[:lower:]]+)', 1);" + qt_sql "SELECT regexp_extract_or_null('AbCdE', '([[:lower:]]+)C([[:lower:]]+)', 2);" + qt_sql "SELECT regexp_extract_or_null('AbCdE', '([[:lower:]]+)C([[:lower:]]+)', 3);" qt_sql "SELECT regexp_extract_all('x=a3&x=18abc&x=2&y=3&x=4&x=17bcd', 'x=([0-9]+)([a-z]+)');" qt_sql "SELECT regexp_extract_all('http://a.m.baidu.com/i41915i73660.htm', 'i([0-9]+)');"