From 43ded30d431372be68d1e2716325e6eb8e4a77cc Mon Sep 17 00:00:00 2001 From: gnehil Date: Fri, 27 Dec 2024 09:54:42 +0800 Subject: [PATCH 1/2] [feature](load) new insgestion load (#45937) ### What problem does this PR solve? Problem Summary: Ingestion Load is used to load pre-processed data into doris. Preprocessing refers to writing the result data to an external storage system after the data is processed according to the partitioning, bucketing and aggregation methods defined by the doris table. The preprocessing is completed by the external system, and then the BE reads the data and converts it into segment files and saves it. The basic flow is as follows: ![ingestion_load](https://github.com/apache/doris/assets/30104232/aa468cd4-90bf-4d9d-b69b-0425b66b15f4) ### Release note [feature](load) new insgestion load (cherry picked from commit 6580f6bfbcea53efaf0f21ea556f781ce65b98b7) --- .github/actions/action-pr-title | 2 +- .github/actions/ccache-action | 2 +- .github/actions/get-workflow-origin | 2 +- .github/actions/paths-filter | 2 +- be/src/apache-orc | 2 +- be/src/clucene | 2 +- be/src/olap/push_handler.cpp | 53 +- .../java/org/apache/doris/common/Config.java | 4 + .../apache/doris/sparkdpp/EtlJobConfig.java | 6 +- .../apache/doris/catalog/SparkResource.java | 1 + .../apache/doris/httpv2/rest/LoadAction.java | 211 +++ .../org/apache/doris/load/EtlJobType.java | 1 + .../doris/load/loadv2/IngestionLoadJob.java | 1139 +++++++++++++++++ .../org/apache/doris/load/loadv2/LoadJob.java | 5 + .../apache/doris/load/loadv2/LoadManager.java | 50 +- .../doris/load/loadv2/SparkEtlJobHandler.java | 1 + .../load/loadv2/SparkLauncherMonitor.java | 1 + .../doris/load/loadv2/SparkLoadAppHandle.java | 1 + .../doris/load/loadv2/SparkLoadJob.java | 1 + .../load/loadv2/SparkLoadPendingTask.java | 8 +- .../loadv2/SparkPendingTaskAttachment.java | 1 + .../doris/load/loadv2/SparkRepository.java | 1 + .../load/loadv2/SparkYarnConfigFiles.java | 1 + .../org/apache/doris/master/MasterImpl.java | 7 +- .../apache/doris/persist/gson/GsonUtils.java | 9 +- .../load/loadv2/SparkLoadPendingTaskTest.java | 26 +- .../MinimumCoverageRollupTreeBuilderTest.java | 8 +- .../load/loadv2/etl/SparkEtlJobTest.java | 4 +- .../data/load_p0/ingestion_load/data.parquet | Bin 0 -> 5745 bytes .../data/load_p0/ingestion_load/data1.parquet | Bin 0 -> 4057 bytes .../load_p0/ingestion_load/data2-0.parquet | Bin 0 -> 851 bytes .../load_p0/ingestion_load/data2-1.parquet | Bin 0 -> 781 bytes .../load_p0/ingestion_load/data2-2.parquet | Bin 0 -> 781 bytes .../load_p0/ingestion_load/data2-3.parquet | Bin 0 -> 839 bytes .../ingestion_load/test_ingestion_load.out | 37 + .../test_ingestion_load_multi_table.out | 25 + ...est_ingestion_load_with_inverted_index.out | 13 + .../test_ingestion_load_with_partition.out | 7 + .../ingestion_load/test_ingestion_load.groovy | 222 ++++ .../test_ingestion_load_alter_column.groovy | 208 +++ ...test_ingestion_load_alter_partition.groovy | 224 ++++ .../test_ingestion_load_drop_table.groovy | 196 +++ .../test_ingestion_load_multi_table.groovy | 208 +++ ..._ingestion_load_with_inverted_index.groovy | 166 +++ .../test_ingestion_load_with_partition.groovy | 160 +++ 45 files changed, 2972 insertions(+), 45 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/load/loadv2/IngestionLoadJob.java create mode 100644 regression-test/data/load_p0/ingestion_load/data.parquet create mode 100644 regression-test/data/load_p0/ingestion_load/data1.parquet create mode 100644 regression-test/data/load_p0/ingestion_load/data2-0.parquet create mode 100644 regression-test/data/load_p0/ingestion_load/data2-1.parquet create mode 100644 regression-test/data/load_p0/ingestion_load/data2-2.parquet create mode 100644 regression-test/data/load_p0/ingestion_load/data2-3.parquet create mode 100644 regression-test/data/load_p0/ingestion_load/test_ingestion_load.out create mode 100644 regression-test/data/load_p0/ingestion_load/test_ingestion_load_multi_table.out create mode 100644 regression-test/data/load_p0/ingestion_load/test_ingestion_load_with_inverted_index.out create mode 100644 regression-test/data/load_p0/ingestion_load/test_ingestion_load_with_partition.out create mode 100644 regression-test/suites/load_p0/ingestion_load/test_ingestion_load.groovy create mode 100644 regression-test/suites/load_p0/ingestion_load/test_ingestion_load_alter_column.groovy create mode 100644 regression-test/suites/load_p0/ingestion_load/test_ingestion_load_alter_partition.groovy create mode 100644 regression-test/suites/load_p0/ingestion_load/test_ingestion_load_drop_table.groovy create mode 100644 regression-test/suites/load_p0/ingestion_load/test_ingestion_load_multi_table.groovy create mode 100644 regression-test/suites/load_p0/ingestion_load/test_ingestion_load_with_inverted_index.groovy create mode 100644 regression-test/suites/load_p0/ingestion_load/test_ingestion_load_with_partition.groovy diff --git a/.github/actions/action-pr-title b/.github/actions/action-pr-title index 10f7ff082a0f12..077bddd7bdabd0 160000 --- a/.github/actions/action-pr-title +++ b/.github/actions/action-pr-title @@ -1 +1 @@ -Subproject commit 10f7ff082a0f1239f8cc39ccba39d11f32ca2407 +Subproject commit 077bddd7bdabd0d2b1b25ed0754c7e62e184d7ee diff --git a/.github/actions/ccache-action b/.github/actions/ccache-action index ca3acd2731eef1..3cfe8f57e1c7bf 160000 --- a/.github/actions/ccache-action +++ b/.github/actions/ccache-action @@ -1 +1 @@ -Subproject commit ca3acd2731eef11f1572ccb126356c2f9298d35e +Subproject commit 3cfe8f57e1c7bffe434f38879f1ebca09e169288 diff --git a/.github/actions/get-workflow-origin b/.github/actions/get-workflow-origin index e2dae063368361..3778755869bc9c 160000 --- a/.github/actions/get-workflow-origin +++ b/.github/actions/get-workflow-origin @@ -1 +1 @@ -Subproject commit e2dae063368361e4cd1f510e8785cd73bca9352e +Subproject commit 3778755869bc9ca829e7b45b5d179fa000f97b44 diff --git a/.github/actions/paths-filter b/.github/actions/paths-filter index 4512585405083f..de90cc6fb38fc0 160000 --- a/.github/actions/paths-filter +++ b/.github/actions/paths-filter @@ -1 +1 @@ -Subproject commit 4512585405083f25c027a35db413c2b3b9006d50 +Subproject commit de90cc6fb38fc0963ad72b210f1f284cd68cea36 diff --git a/be/src/apache-orc b/be/src/apache-orc index ef68c6ff736a84..72787269f5f52a 160000 --- a/be/src/apache-orc +++ b/be/src/apache-orc @@ -1 +1 @@ -Subproject commit ef68c6ff736a84c8c7185d4a08397c67eff53ad6 +Subproject commit 72787269f5f52ab0174bac1dbf54050bb7b60242 diff --git a/be/src/clucene b/be/src/clucene index 4f5449c903778f..30b63dc3406899 160000 --- a/be/src/clucene +++ b/be/src/clucene @@ -1 +1 @@ -Subproject commit 4f5449c903778fae32884586c728587c24a58806 +Subproject commit 30b63dc34068996c15d451a27d0593c519cb97fc diff --git a/be/src/olap/push_handler.cpp b/be/src/olap/push_handler.cpp index 99637eaf764a7b..896c472f79abe2 100644 --- a/be/src/olap/push_handler.cpp +++ b/be/src/olap/push_handler.cpp @@ -34,16 +34,17 @@ #include #include #include +#include #include "common/compiler_util.h" // IWYU pragma: keep #include "common/config.h" #include "common/logging.h" #include "common/status.h" #include "olap/cumulative_compaction_time_series_policy.h" +#include "io/hdfs_builder.h" #include "olap/delete_handler.h" #include "olap/olap_define.h" #include "olap/rowset/pending_rowset_helper.h" -#include "olap/rowset/rowset_meta.h" #include "olap/rowset/rowset_writer.h" #include "olap/rowset/rowset_writer_context.h" #include "olap/schema.h" @@ -54,10 +55,11 @@ #include "olap/txn_manager.h" #include "runtime/descriptors.h" #include "runtime/exec_env.h" -#include "util/runtime_profile.h" #include "util/time.h" #include "vec/core/block.h" +#include "vec/core/column_with_type_and_name.h" #include "vec/data_types/data_type_factory.hpp" +#include "vec/data_types/data_type_nullable.h" #include "vec/exec/format/parquet/vparquet_reader.h" #include "vec/exprs/vexpr_context.h" #include "vec/functions/simple_function_factory.h" @@ -355,8 +357,12 @@ PushBrokerReader::PushBrokerReader(const Schema* schema, const TBrokerScanRange& _file_params.expr_of_dest_slot = _params.expr_of_dest_slot; _file_params.dest_sid_to_src_sid_without_trans = _params.dest_sid_to_src_sid_without_trans; _file_params.strict_mode = _params.strict_mode; - _file_params.__isset.broker_addresses = true; - _file_params.broker_addresses = t_scan_range.broker_addresses; + if (_ranges[0].file_type == TFileType::FILE_HDFS) { + _file_params.hdfs_params = parse_properties(_params.properties); + } else { + _file_params.__isset.broker_addresses = true; + _file_params.broker_addresses = t_scan_range.broker_addresses; + } for (const auto& range : _ranges) { TFileRangeDesc file_range; @@ -485,17 +491,36 @@ Status PushBrokerReader::_cast_to_input_block() { auto& arg = _src_block_ptr->get_by_name(slot_desc->col_name()); // remove nullable here, let the get_function decide whether nullable auto return_type = slot_desc->get_data_type_ptr(); - vectorized::ColumnsWithTypeAndName arguments { - arg, - {vectorized::DataTypeString().create_column_const( - arg.column->size(), remove_nullable(return_type)->get_family_name()), - std::make_shared(), ""}}; - auto func_cast = vectorized::SimpleFunctionFactory::instance().get_function( - "CAST", arguments, return_type); idx = _src_block_name_to_idx[slot_desc->col_name()]; - RETURN_IF_ERROR( - func_cast->execute(nullptr, *_src_block_ptr, {idx}, idx, arg.column->size())); - _src_block_ptr->get_by_position(idx).type = std::move(return_type); + // bitmap convert:src -> to_base64 -> bitmap_from_base64 + if (slot_desc->type().is_bitmap_type()) { + auto base64_return_type = vectorized::DataTypeFactory::instance().create_data_type( + vectorized::DataTypeString().get_type_as_type_descriptor(), + slot_desc->is_nullable()); + auto func_to_base64 = vectorized::SimpleFunctionFactory::instance().get_function( + "to_base64", {arg}, base64_return_type); + RETURN_IF_ERROR(func_to_base64->execute(nullptr, *_src_block_ptr, {idx}, idx, + arg.column->size())); + _src_block_ptr->get_by_position(idx).type = std::move(base64_return_type); + auto& arg_base64 = _src_block_ptr->get_by_name(slot_desc->col_name()); + auto func_bitmap_from_base64 = + vectorized::SimpleFunctionFactory::instance().get_function( + "bitmap_from_base64", {arg_base64}, return_type); + RETURN_IF_ERROR(func_bitmap_from_base64->execute(nullptr, *_src_block_ptr, {idx}, idx, + arg_base64.column->size())); + _src_block_ptr->get_by_position(idx).type = std::move(return_type); + } else { + vectorized::ColumnsWithTypeAndName arguments { + arg, + {vectorized::DataTypeString().create_column_const( + arg.column->size(), remove_nullable(return_type)->get_family_name()), + std::make_shared(), ""}}; + auto func_cast = vectorized::SimpleFunctionFactory::instance().get_function( + "CAST", arguments, return_type); + RETURN_IF_ERROR( + func_cast->execute(nullptr, *_src_block_ptr, {idx}, idx, arg.column->size())); + _src_block_ptr->get_by_position(idx).type = std::move(return_type); + } } return Status::OK(); } diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index ab09c68998f24b..e7f1f38b2ecafb 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -667,6 +667,10 @@ public class Config extends ConfigBase { @ConfField(description = {"Yarn 配置文件的路径", "Yarn config path"}) public static String yarn_config_dir = EnvUtils.getDorisHome() + "/lib/yarn-config"; + @ConfField(mutable = true, masterOnly = true, description = {"Ingestion load 的默认超时时间,单位是秒。", + "Default timeout for ingestion load job, in seconds."}) + public static int ingestion_load_default_timeout_second = 86400; // 1 day + @ConfField(mutable = true, masterOnly = true, description = {"Broker Load 的最大等待 job 数量。" + "这个值是一个期望值。在某些情况下,比如切换 master,当前等待的 job 数量可能会超过这个值。", "Maximal number of waiting jobs for Broker Load. This is a desired number. " diff --git a/fe/fe-common/src/main/java/org/apache/doris/sparkdpp/EtlJobConfig.java b/fe/fe-common/src/main/java/org/apache/doris/sparkdpp/EtlJobConfig.java index c59901d383b648..8d9d5de54b59f1 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/sparkdpp/EtlJobConfig.java +++ b/fe/fe-common/src/main/java/org/apache/doris/sparkdpp/EtlJobConfig.java @@ -371,14 +371,17 @@ public static class EtlIndex implements Serializable { public String indexType; @SerializedName(value = "isBaseIndex") public boolean isBaseIndex; + @SerializedName(value = "schemaVersion") + public int schemaVersion; public EtlIndex(long indexId, List etlColumns, int schemaHash, - String indexType, boolean isBaseIndex) { + String indexType, boolean isBaseIndex, int schemaVersion) { this.indexId = indexId; this.columns = etlColumns; this.schemaHash = schemaHash; this.indexType = indexType; this.isBaseIndex = isBaseIndex; + this.schemaVersion = schemaVersion; } public EtlColumn getColumn(String name) { @@ -398,6 +401,7 @@ public String toString() { + ", schemaHash=" + schemaHash + ", indexType='" + indexType + '\'' + ", isBaseIndex=" + isBaseIndex + + ", schemaVersion=" + schemaVersion + '}'; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/SparkResource.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/SparkResource.java index 59b6d16801e3f1..4e85b8208a78a5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/SparkResource.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/SparkResource.java @@ -71,6 +71,7 @@ * * DROP RESOURCE "spark0"; */ +@Deprecated public class SparkResource extends Resource { private static final Logger LOG = LogManager.getLogger(SparkResource.class); diff --git a/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/LoadAction.java b/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/LoadAction.java index 3511d01d210db7..415e8947467751 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/LoadAction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/LoadAction.java @@ -27,13 +27,21 @@ import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; import org.apache.doris.common.LoadException; +import org.apache.doris.common.MetaNotFoundException; import org.apache.doris.common.Pair; +import org.apache.doris.common.QuotaExceedException; import org.apache.doris.common.UserException; import org.apache.doris.common.util.DebugPointUtil; +import org.apache.doris.datasource.InternalCatalog; import org.apache.doris.httpv2.entity.ResponseEntityBuilder; import org.apache.doris.httpv2.entity.RestBaseResult; import org.apache.doris.httpv2.exception.UnauthorizedException; +import org.apache.doris.httpv2.rest.manager.HttpUtils; +import org.apache.doris.load.FailMsg; import org.apache.doris.load.StreamLoadHandler; +import org.apache.doris.load.loadv2.IngestionLoadJob; +import org.apache.doris.load.loadv2.LoadJob; +import org.apache.doris.load.loadv2.LoadManager; import org.apache.doris.mysql.privilege.Auth; import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.planner.GroupCommitPlanner; @@ -45,9 +53,14 @@ import org.apache.doris.system.BeSelectionPolicy; import org.apache.doris.system.SystemInfoService; import org.apache.doris.thrift.TNetworkAddress; +import org.apache.doris.transaction.BeginTransactionException; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.json.JsonMapper; import com.google.common.base.Strings; import io.netty.handler.codec.http.HttpHeaderNames; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.validator.routines.InetAddressValidator; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -59,10 +72,14 @@ import org.springframework.web.bind.annotation.RestController; import org.springframework.web.servlet.view.RedirectView; +import java.io.IOException; import java.net.InetAddress; import java.net.URI; import java.util.Enumeration; +import java.util.HashMap; +import java.util.LinkedList; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Set; import javax.servlet.http.HttpServletRequest; @@ -716,4 +733,198 @@ private Backend selectBackendForGroupCommit(String clusterName, HttpServletReque } return backend; } + + /** + * Request body example: + * { + * "label": "test", + * "tableToPartition": { + * "tbl_test_spark_load": ["p1","p2"] + * }, + * "properties": { + * "strict_mode": "true", + * "timeout": 3600000 + * } + * } + * + */ + @RequestMapping(path = "/api/ingestion_load/{" + CATALOG_KEY + "}/{" + DB_KEY + + "}/_create", method = RequestMethod.POST) + public Object createIngestionLoad(HttpServletRequest request, HttpServletResponse response, + @PathVariable(value = CATALOG_KEY) String catalog, + @PathVariable(value = DB_KEY) String db) { + if (needRedirect(request.getScheme())) { + return redirectToHttps(request); + } + + executeCheckPassword(request, response); + + if (!InternalCatalog.INTERNAL_CATALOG_NAME.equals(catalog)) { + return ResponseEntityBuilder.okWithCommonError("Only support internal catalog. " + + "Current catalog is " + catalog); + } + + Object redirectView = redirectToMaster(request, response); + if (redirectView != null) { + return redirectView; + } + + String fullDbName = getFullDbName(db); + + Map resultMap = new HashMap<>(); + + try { + + String body = HttpUtils.getBody(request); + JsonMapper mapper = JsonMapper.builder().build(); + JsonNode jsonNode = mapper.reader().readTree(body); + + String label = jsonNode.get("label").asText(); + Map> tableToPartition = mapper.reader() + .readValue(jsonNode.get("tableToPartition").traverse(), + new TypeReference>>() { + }); + List tableNames = new LinkedList<>(tableToPartition.keySet()); + for (String tableName : tableNames) { + checkTblAuth(ConnectContext.get().getCurrentUserIdentity(), fullDbName, tableName, PrivPredicate.LOAD); + } + + Map properties = new HashMap<>(); + if (jsonNode.hasNonNull("properties")) { + properties = mapper.readValue(jsonNode.get("properties").traverse(), + new TypeReference>() { + }); + } + + executeCreateAndStartIngestionLoad(fullDbName, label, tableNames, properties, tableToPartition, resultMap, + ConnectContext.get().getCurrentUserIdentity()); + + } catch (Exception e) { + LOG.warn("create ingestion load job failed, db: {}, err: {}", db, e.getMessage()); + return ResponseEntityBuilder.okWithCommonError(e.getMessage()); + } + + return ResponseEntityBuilder.ok(resultMap); + + } + + private void executeCreateAndStartIngestionLoad(String dbName, String label, List tableNames, + Map properties, + Map> tableToPartition, + Map resultMap, UserIdentity userInfo) + throws DdlException, BeginTransactionException, MetaNotFoundException, AnalysisException, + QuotaExceedException, LoadException { + + long loadId = -1; + try { + + LoadManager loadManager = Env.getCurrentEnv().getLoadManager(); + loadId = loadManager.createIngestionLoadJob(dbName, label, tableNames, properties, userInfo); + IngestionLoadJob loadJob = (IngestionLoadJob) loadManager.getLoadJob(loadId); + resultMap.put("loadId", loadId); + + long txnId = loadJob.beginTransaction(); + resultMap.put("txnId", txnId); + + Map loadMeta = loadJob.getLoadMeta(tableToPartition); + resultMap.put("dbId", loadMeta.get("dbId")); + resultMap.put("signature", loadMeta.get("signature")); + resultMap.put("tableMeta", loadMeta.get("tableMeta")); + + loadJob.startEtlJob(); + + } catch (DdlException | BeginTransactionException | MetaNotFoundException | AnalysisException + | QuotaExceedException | LoadException e) { + LOG.warn("create ingestion load job failed, db: {}, load id: {}, err: {}", dbName, loadId, e.getMessage()); + if (loadId != -1L) { + try { + Env.getCurrentEnv().getLoadManager().getLoadJob(loadId).cancelJob( + new FailMsg(FailMsg.CancelType.UNKNOWN, StringUtils.defaultIfBlank(e.getMessage(), ""))); + } catch (DdlException ex) { + LOG.warn("cancel ingestion load failed, db: {}, load id: {}, err: {}", dbName, loadId, + e.getMessage()); + } + } + throw e; + } + + } + + /** + * Request body example: + * { + * "statusInfo": { + * "msg": "", + * "hadoopProperties": "{\"fs.defaultFS\":\"hdfs://hadoop01:8020\",\"hadoop.username\":\"hadoop\"}", + * "appId": "local-1723088141438", + * "filePathToSize": "{\"hdfs://hadoop01:8020/spark-load/jobs/25054/test/36019/dpp_result.json\":179, + * \"hdfs://hadoop01:8020/spark-load/jobs/25054/test/36019/load_meta.json\":3441,\"hdfs://hadoop01:8020 + * /spark-load/jobs/25054/test/36019/V1.test.25056.29373.25057.0.366242211.parquet\":5745}", + * "dppResult": "{\"isSuccess\":true,\"failedReason\":\"\",\"scannedRows\":10,\"fileNumber\":1, + * \"fileSize\":2441,\"normalRows\":10,\"abnormalRows\":0,\"unselectRows\":0,\"partialAbnormalRows\":\"[]\", + * \"scannedBytes\":0}", + * "status": "SUCCESS" + * }, + * "loadId": 36018 + * } + * + */ + @RequestMapping(path = "/api/ingestion_load/{" + CATALOG_KEY + "}/{" + DB_KEY + + "}/_update", method = RequestMethod.POST) + public Object updateIngestionLoad(HttpServletRequest request, HttpServletResponse response, + @PathVariable(value = CATALOG_KEY) String catalog, + @PathVariable(value = DB_KEY) String db) { + if (needRedirect(request.getScheme())) { + return redirectToHttps(request); + } + + executeCheckPassword(request, response); + + if (!InternalCatalog.INTERNAL_CATALOG_NAME.equals(catalog)) { + return ResponseEntityBuilder.okWithCommonError("Only support internal catalog. " + + "Current catalog is " + catalog); + } + + Object redirectView = redirectToMaster(request, response); + if (redirectView != null) { + return redirectView; + } + + String fullDbName = getFullDbName(db); + + long loadId = -1; + try { + + String body = HttpUtils.getBody(request); + JsonMapper mapper = JsonMapper.builder().build(); + JsonNode jsonNode = mapper.readTree(body); + LoadJob loadJob = null; + + if (jsonNode.hasNonNull("loadId")) { + loadId = jsonNode.get("loadId").asLong(); + loadJob = Env.getCurrentEnv().getLoadManager().getLoadJob(loadId); + } + + if (loadJob == null) { + return ResponseEntityBuilder.okWithCommonError("load job not exists, load id: " + loadId); + } + + IngestionLoadJob ingestionLoadJob = (IngestionLoadJob) loadJob; + Set tableNames = ingestionLoadJob.getTableNames(); + for (String tableName : tableNames) { + checkTblAuth(ConnectContext.get().getCurrentUserIdentity(), fullDbName, tableName, PrivPredicate.LOAD); + } + Map statusInfo = mapper.readValue(jsonNode.get("statusInfo").traverse(), + new TypeReference>() { + }); + ingestionLoadJob.updateJobStatus(statusInfo); + } catch (IOException | MetaNotFoundException | UnauthorizedException e) { + LOG.warn("cancel ingestion load job failed, db: {}, load id: {}, err: {}", db, loadId, e.getMessage()); + return ResponseEntityBuilder.okWithCommonError(e.getMessage()); + } + + return ResponseEntityBuilder.ok(); + + } + } diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/EtlJobType.java b/fe/fe-core/src/main/java/org/apache/doris/load/EtlJobType.java index 95333d0f0250b9..7eaa89c97d0850 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/EtlJobType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/EtlJobType.java @@ -28,5 +28,6 @@ public enum EtlJobType { LOCAL_FILE, // create by job scheduler,inner use INSERT_JOB, + INGESTION, UNKNOWN } diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/IngestionLoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/IngestionLoadJob.java new file mode 100644 index 00000000000000..1d6dbfa752a9e5 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/IngestionLoadJob.java @@ -0,0 +1,1139 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.load.loadv2; + +import org.apache.doris.analysis.CastExpr; +import org.apache.doris.analysis.DescriptorTable; +import org.apache.doris.analysis.Expr; +import org.apache.doris.analysis.LiteralExpr; +import org.apache.doris.analysis.SlotDescriptor; +import org.apache.doris.analysis.SlotRef; +import org.apache.doris.analysis.TupleDescriptor; +import org.apache.doris.analysis.UserIdentity; +import org.apache.doris.catalog.AggregateType; +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.DistributionInfo; +import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.HashDistributionInfo; +import org.apache.doris.catalog.KeysType; +import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.MaterializedIndexMeta; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.Partition; +import org.apache.doris.catalog.PartitionItem; +import org.apache.doris.catalog.PartitionKey; +import org.apache.doris.catalog.PartitionType; +import org.apache.doris.catalog.PrimitiveType; +import org.apache.doris.catalog.RangePartitionInfo; +import org.apache.doris.catalog.Replica; +import org.apache.doris.catalog.ScalarType; +import org.apache.doris.catalog.Table; +import org.apache.doris.catalog.TableIf; +import org.apache.doris.catalog.Tablet; +import org.apache.doris.catalog.Type; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.DataQualityException; +import org.apache.doris.common.DdlException; +import org.apache.doris.common.DuplicatedRequestException; +import org.apache.doris.common.LabelAlreadyUsedException; +import org.apache.doris.common.LoadException; +import org.apache.doris.common.MetaNotFoundException; +import org.apache.doris.common.Pair; +import org.apache.doris.common.QuotaExceedException; +import org.apache.doris.common.UserException; +import org.apache.doris.common.io.Text; +import org.apache.doris.common.util.LogBuilder; +import org.apache.doris.common.util.LogKey; +import org.apache.doris.common.util.MetaLockUtils; +import org.apache.doris.load.EtlJobType; +import org.apache.doris.load.EtlStatus; +import org.apache.doris.load.FailMsg; +import org.apache.doris.service.ExecuteEnv; +import org.apache.doris.service.FrontendOptions; +import org.apache.doris.sparkdpp.DppResult; +import org.apache.doris.sparkdpp.EtlJobConfig; +import org.apache.doris.task.AgentBatchTask; +import org.apache.doris.task.AgentTaskExecutor; +import org.apache.doris.task.AgentTaskQueue; +import org.apache.doris.task.PushTask; +import org.apache.doris.thrift.TBrokerRangeDesc; +import org.apache.doris.thrift.TBrokerScanRange; +import org.apache.doris.thrift.TBrokerScanRangeParams; +import org.apache.doris.thrift.TColumn; +import org.apache.doris.thrift.TDescriptorTable; +import org.apache.doris.thrift.TEtlState; +import org.apache.doris.thrift.TFileFormatType; +import org.apache.doris.thrift.TFileType; +import org.apache.doris.thrift.TPriority; +import org.apache.doris.thrift.TPushType; +import org.apache.doris.thrift.TUniqueId; +import org.apache.doris.transaction.BeginTransactionException; +import org.apache.doris.transaction.TabletCommitInfo; +import org.apache.doris.transaction.TabletQuorumFailedException; +import org.apache.doris.transaction.TransactionState; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Range; +import com.google.common.collect.Sets; +import com.google.gson.Gson; +import com.google.gson.annotations.SerializedName; +import com.google.gson.reflect.TypeToken; +import lombok.Setter; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.io.DataInput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Ingestion Load + *

+ * Load data file which has been pre-processed + *

+ * There are 4 steps in IngestionLoadJob: + * Step1: Outside system execute ingestion etl job. + * Step2: LoadEtlChecker will check ingestion etl job status periodically + * and send push tasks to be when ingestion etl job is finished. + * Step3: LoadLoadingChecker will check loading status periodically and commit transaction when push tasks are finished. + * Step4: PublishVersionDaemon will send publish version tasks to be and finish transaction. + */ +public class IngestionLoadJob extends LoadJob { + + public static final Logger LOG = LogManager.getLogger(IngestionLoadJob.class); + + @Setter + @SerializedName("ests") + private EtlStatus etlStatus; + + // members below updated when job state changed to loading + // { tableId.partitionId.indexId.bucket.schemaHash -> (etlFilePath, etlFileSize) } + @SerializedName(value = "tm2fi") + private final Map> tabletMetaToFileInfo = Maps.newHashMap(); + + @SerializedName(value = "hp") + private final Map hadoopProperties = new HashMap<>(); + + @SerializedName(value = "i2sv") + private final Map indexToSchemaVersion = new HashMap<>(); + + private final Map indexToSchemaHash = Maps.newHashMap(); + + private final Map filePathToSize = new HashMap<>(); + + private final Set finishedReplicas = Sets.newHashSet(); + private final Set quorumTablets = Sets.newHashSet(); + private final Set fullTablets = Sets.newHashSet(); + + private final List commitInfos = Lists.newArrayList(); + + private final Map> tableToLoadPartitions = Maps.newHashMap(); + + private final Map> tabletToSentReplicaPushTask = Maps.newHashMap(); + + private long etlStartTimestamp = -1; + + private long quorumFinishTimestamp = -1; + + private List loadTableIds = new ArrayList<>(); + + public IngestionLoadJob() { + super(EtlJobType.INGESTION); + } + + public IngestionLoadJob(long dbId, String label, List tableNames, UserIdentity userInfo) + throws LoadException { + super(EtlJobType.INGESTION, dbId, label); + this.loadTableIds = getLoadTableIds(tableNames); + this.userInfo = userInfo; + } + + @Override + public Set getTableNamesForShow() { + return Collections.emptySet(); + } + + @Override + public Set getTableNames() throws MetaNotFoundException { + Set result = Sets.newHashSet(); + Database database = Env.getCurrentInternalCatalog().getDbOrMetaException(dbId); + for (long tableId : loadTableIds) { + Table table = database.getTableOrMetaException(tableId); + result.add(table.getName()); + } + return result; + } + + @Override + public void afterVisible(TransactionState txnState, boolean txnOperated) { + super.afterVisible(txnState, txnOperated); + clearJob(); + } + + @Override + public void afterAborted(TransactionState txnState, boolean txnOperated, String txnStatusChangeReason) + throws UserException { + super.afterAborted(txnState, txnOperated, txnStatusChangeReason); + clearJob(); + } + + @Override + public void cancelJobWithoutCheck(FailMsg failMsg, boolean abortTxn, boolean needLog) { + super.cancelJobWithoutCheck(failMsg, abortTxn, needLog); + clearJob(); + } + + @Override + public void cancelJob(FailMsg failMsg) throws DdlException { + super.cancelJob(failMsg); + clearJob(); + } + + private List getLoadTableIds(List tableNames) throws LoadException { + Database db = Env.getCurrentInternalCatalog() + .getDbOrException(dbId, s -> new LoadException("db does not exist. id: " + s)); + List list = new ArrayList<>(tableNames.size()); + for (String tableName : tableNames) { + OlapTable olapTable = (OlapTable) db.getTableOrException(tableName, + s -> new LoadException("table does not exist. id: " + s)); + list.add(olapTable.getId()); + } + return list; + } + + @Override + protected long getEtlStartTimestamp() { + return etlStartTimestamp; + } + + public long beginTransaction() + throws BeginTransactionException, MetaNotFoundException, AnalysisException, QuotaExceedException, + LabelAlreadyUsedException, DuplicatedRequestException { + this.transactionId = Env.getCurrentGlobalTransactionMgr() + .beginTransaction(dbId, loadTableIds, label, null, + new TransactionState.TxnCoordinator(TransactionState.TxnSourceType.FE, 0, + FrontendOptions.getLocalHostAddress(), ExecuteEnv.getInstance().getStartupTime()), + TransactionState.LoadJobSourceType.FRONTEND, id, getTimeout()); + return transactionId; + } + + public Map getLoadMeta(Map> tableToPartitionMap) + throws LoadException { + + if (tableToPartitionMap == null || tableToPartitionMap.isEmpty()) { + throw new IllegalArgumentException("tableToPartitionMap is empty"); + } + + Database db = Env.getCurrentInternalCatalog() + .getDbOrException(dbId, s -> new LoadException("db does not exist. id: " + s)); + Map loadMeta = new HashMap<>(); + loadMeta.put("dbId", db.getId()); + Long signature = Env.getCurrentEnv().getNextId(); + loadMeta.put("signature", signature); + + List tables; + try { + tables = db.getTablesOnIdOrderOrThrowException(loadTableIds); + } catch (MetaNotFoundException e) { + throw new LoadException(e.getMessage()); + } + + MetaLockUtils.readLockTables(tables); + try { + Map> tableMeta = new HashMap<>(tableToPartitionMap.size()); + for (Map.Entry> entry : tableToPartitionMap.entrySet()) { + String tableName = entry.getKey(); + Map meta = tableMeta.getOrDefault(tableName, new HashMap<>()); + OlapTable olapTable = (OlapTable) db.getTableOrException(tableName, + s -> new LoadException("table does not exist. id: " + s)); + meta.put("id", olapTable.getId()); + List indices = createEtlIndexes(olapTable); + meta.put("indexes", indices); + List partitionNames = entry.getValue(); + Set partitionIds; + if (partitionNames != null && !partitionNames.isEmpty()) { + partitionIds = new HashSet<>(partitionNames.size()); + for (String partitionName : partitionNames) { + Partition partition = olapTable.getPartition(partitionName); + if (partition == null) { + throw new LoadException(String.format("partition %s is not exists", partitionName)); + } + partitionIds.add(partition.getId()); + } + } else { + partitionIds = + olapTable.getAllPartitions().stream().map(Partition::getId).collect(Collectors.toSet()); + } + EtlJobConfig.EtlPartitionInfo etlPartitionInfo = createEtlPartitionInfo(olapTable, partitionIds); + meta.put("partitionInfo", etlPartitionInfo); + tableMeta.put(tableName, meta); + + if (tableToLoadPartitions.containsKey(olapTable.getId())) { + tableToLoadPartitions.get(olapTable.getId()).addAll(partitionIds); + } else { + tableToLoadPartitions.put(olapTable.getId(), partitionIds); + } + + } + loadMeta.put("tableMeta", tableMeta); + } finally { + MetaLockUtils.readUnlockTables(tables); + } + return loadMeta; + + } + + private List createEtlIndexes(OlapTable table) throws LoadException { + List etlIndexes = Lists.newArrayList(); + + for (Map.Entry> entry : table.getIndexIdToSchema().entrySet()) { + long indexId = entry.getKey(); + // todo(liheng): get schema hash and version from materialized index meta directly + MaterializedIndexMeta indexMeta = table.getIndexMetaByIndexId(indexId); + int schemaHash = indexMeta.getSchemaHash(); + int schemaVersion = indexMeta.getSchemaVersion(); + + boolean changeAggType = table.getKeysTypeByIndexId(indexId).equals(KeysType.UNIQUE_KEYS) + && table.getTableProperty().getEnableUniqueKeyMergeOnWrite(); + + // columns + List etlColumns = Lists.newArrayList(); + for (Column column : entry.getValue()) { + etlColumns.add(createEtlColumn(column, changeAggType)); + } + + // check distribution type + DistributionInfo distributionInfo = table.getDefaultDistributionInfo(); + if (distributionInfo.getType() != DistributionInfo.DistributionInfoType.HASH) { + // RANDOM not supported + String errMsg = "Unsupported distribution type. type: " + distributionInfo.getType().name(); + LOG.warn(errMsg); + throw new LoadException(errMsg); + } + + // index type + String indexType; + KeysType keysType = table.getKeysTypeByIndexId(indexId); + switch (keysType) { + case DUP_KEYS: + indexType = "DUPLICATE"; + break; + case AGG_KEYS: + indexType = "AGGREGATE"; + break; + case UNIQUE_KEYS: + indexType = "UNIQUE"; + break; + default: + String errMsg = "unknown keys type. type: " + keysType.name(); + LOG.warn(errMsg); + throw new LoadException(errMsg); + } + + indexToSchemaVersion.put(indexId, schemaVersion); + + etlIndexes.add(new EtlJobConfig.EtlIndex(indexId, etlColumns, schemaHash, indexType, + indexId == table.getBaseIndexId(), schemaVersion)); + } + + return etlIndexes; + } + + private EtlJobConfig.EtlColumn createEtlColumn(Column column, boolean changeAggType) { + // column name + String name = column.getName().toLowerCase(Locale.ROOT); + // column type + PrimitiveType type = column.getDataType(); + String columnType = column.getDataType().toString(); + // is allow null + boolean isAllowNull = column.isAllowNull(); + // is key + boolean isKey = column.isKey(); + + // aggregation type + String aggregationType = null; + if (column.getAggregationType() != null) { + if (changeAggType && !column.isKey()) { + aggregationType = AggregateType.REPLACE.toSql(); + } else { + aggregationType = column.getAggregationType().toString(); + } + } + + // default value + String defaultValue = null; + if (column.getDefaultValue() != null) { + defaultValue = column.getDefaultValue(); + } + if (column.isAllowNull() && column.getDefaultValue() == null) { + defaultValue = "\\N"; + } + + // string length + int stringLength = 0; + if (type.isStringType()) { + stringLength = column.getStrLen(); + } + + // decimal precision scale + int precision = 0; + int scale = 0; + if (type.isDecimalV2Type() || type.isDecimalV3Type()) { + precision = column.getPrecision(); + scale = column.getScale(); + } + + return new EtlJobConfig.EtlColumn(name, columnType, isAllowNull, isKey, aggregationType, defaultValue, + stringLength, precision, scale); + } + + private EtlJobConfig.EtlPartitionInfo createEtlPartitionInfo(OlapTable table, Set partitionIds) + throws LoadException { + PartitionType type = table.getPartitionInfo().getType(); + + List partitionColumnRefs = Lists.newArrayList(); + List etlPartitions = Lists.newArrayList(); + if (type == PartitionType.RANGE) { + RangePartitionInfo rangePartitionInfo = (RangePartitionInfo) table.getPartitionInfo(); + for (Column column : rangePartitionInfo.getPartitionColumns()) { + partitionColumnRefs.add(column.getName()); + } + + for (Map.Entry entry : rangePartitionInfo.getAllPartitionItemEntryList(true)) { + long partitionId = entry.getKey(); + if (!partitionIds.contains(partitionId)) { + continue; + } + + Partition partition = table.getPartition(partitionId); + if (partition == null) { + throw new LoadException("partition does not exist. id: " + partitionId); + } + + // bucket num + int bucketNum = partition.getDistributionInfo().getBucketNum(); + + // is max partition + Range range = entry.getValue().getItems(); + boolean isMaxPartition = range.upperEndpoint().isMaxValue(); + + // start keys + List rangeKeyExprs = range.lowerEndpoint().getKeys(); + List startKeys = Lists.newArrayList(); + for (LiteralExpr literalExpr : rangeKeyExprs) { + Object keyValue = literalExpr.getRealValue(); + startKeys.add(keyValue); + } + + // end keys + // is empty list when max partition + List endKeys = Lists.newArrayList(); + if (!isMaxPartition) { + rangeKeyExprs = range.upperEndpoint().getKeys(); + for (LiteralExpr literalExpr : rangeKeyExprs) { + Object keyValue = literalExpr.getRealValue(); + endKeys.add(keyValue); + } + } + + etlPartitions.add( + new EtlJobConfig.EtlPartition(partitionId, startKeys, endKeys, isMaxPartition, bucketNum)); + } + } else if (type == PartitionType.UNPARTITIONED) { + Preconditions.checkState(partitionIds.size() == 1, "partition size must be eqauls to 1"); + + for (Long partitionId : partitionIds) { + Partition partition = table.getPartition(partitionId); + if (partition == null) { + throw new LoadException("partition does not exist. id: " + partitionId); + } + + // bucket num + int bucketNum = partition.getDistributionInfo().getBucketNum(); + + etlPartitions.add(new EtlJobConfig.EtlPartition(partitionId, Lists.newArrayList(), Lists.newArrayList(), + true, bucketNum)); + } + } else { + throw new LoadException("Spark Load does not support list partition yet"); + } + + // distribution column refs + List distributionColumnRefs = Lists.newArrayList(); + DistributionInfo distributionInfo = table.getDefaultDistributionInfo(); + Preconditions.checkState(distributionInfo.getType() == DistributionInfo.DistributionInfoType.HASH); + for (Column column : ((HashDistributionInfo) distributionInfo).getDistributionColumns()) { + distributionColumnRefs.add(column.getName()); + } + + return new EtlJobConfig.EtlPartitionInfo(type.typeString, partitionColumnRefs, distributionColumnRefs, + etlPartitions); + } + + public void updateEtlStatus() throws Exception { + + if (!checkState(JobState.ETL) || etlStatus == null) { + return; + } + + writeLock(); + try { + switch (etlStatus.getState()) { + case FINISHED: + unprotectedProcessEtlFinish(); + break; + case CANCELLED: + throw new LoadException("spark etl job failed. msg: " + etlStatus.getFailMsg()); + default: + break; + } + } finally { + writeUnlock(); + } + + if (checkState(JobState.LOADING)) { + submitPushTasks(); + } + + } + + private boolean checkState(JobState expectState) { + readLock(); + try { + return state == expectState; + } finally { + readUnlock(); + } + } + + private Set submitPushTasks() throws UserException { + + // check db exist + Database db = null; + try { + db = getDb(); + } catch (MetaNotFoundException e) { + String errMsg = new LogBuilder(LogKey.LOAD_JOB, id).add("database_id", dbId).add("label", label) + .add("error_msg", "db has been deleted when job is loading").build(); + throw new MetaNotFoundException(errMsg); + } + + AgentBatchTask batchTask = new AgentBatchTask(); + boolean hasLoadPartitions = false; + Set totalTablets = Sets.newHashSet(); + List tableList = db.getTablesOnIdOrderOrThrowException( + Lists.newArrayList(tableToLoadPartitions.keySet())); + MetaLockUtils.readLockTables(tableList); + try { + writeLock(); + try { + // check state is still loading. If state is cancelled or finished, return. + // if state is cancelled or finished and not return, + // this would throw all partitions have no load data exception, + // because tableToLoadPartitions was already cleaned up, + if (state != JobState.LOADING) { + LOG.warn("job state is not loading. job id: {}, state: {}", id, state); + return totalTablets; + } + + for (TableIf table : tableList) { + Set partitionIds = tableToLoadPartitions.get(table.getId()); + OlapTable olapTable = (OlapTable) table; + for (long partitionId : partitionIds) { + Partition partition = olapTable.getPartition(partitionId); + if (partition == null) { + throw new LoadException("partition does not exist. id: " + partitionId); + } + + hasLoadPartitions = true; + int quorumReplicaNum = + olapTable.getPartitionInfo().getReplicaAllocation(partitionId).getTotalReplicaNum() / 2 + + 1; + + List indexes = partition.getMaterializedIndices( + MaterializedIndex.IndexExtState.ALL); + for (MaterializedIndex index : indexes) { + long indexId = index.getId(); + MaterializedIndexMeta indexMeta = olapTable.getIndexMetaByIndexId(indexId); + int schemaVersion = indexMeta.getSchemaVersion(); + int schemaHash = indexMeta.getSchemaHash(); + + // check schemaHash and schemaVersion whether is changed + checkIndexSchema(indexId, schemaHash, schemaVersion); + + int bucket = 0; + for (Tablet tablet : index.getTablets()) { + long tabletId = tablet.getId(); + totalTablets.add(tabletId); + Set tabletAllReplicas = Sets.newHashSet(); + Set tabletFinishedReplicas = Sets.newHashSet(); + for (Replica replica : tablet.getReplicas()) { + long replicaId = replica.getId(); + tabletAllReplicas.add(replicaId); + if (!tabletToSentReplicaPushTask.containsKey(tabletId) + || !tabletToSentReplicaPushTask.get(tabletId).containsKey(replicaId)) { + long backendId = replica.getBackendId(); + long taskSignature = Env.getCurrentGlobalTransactionMgr() + .getNextTransactionId(); + + PushTask pushTask = + buildPushTask(backendId, olapTable, taskSignature, partitionId, indexId, + tabletId, replicaId, schemaHash, schemaVersion, bucket++); + if (AgentTaskQueue.addTask(pushTask)) { + batchTask.addTask(pushTask); + if (!tabletToSentReplicaPushTask.containsKey(tabletId)) { + tabletToSentReplicaPushTask.put(tabletId, Maps.newHashMap()); + } + tabletToSentReplicaPushTask.get(tabletId).put(replicaId, pushTask); + } + } + + if (finishedReplicas.contains(replicaId) && replica.getLastFailedVersion() < 0) { + tabletFinishedReplicas.add(replicaId); + } + } + + if (tabletAllReplicas.isEmpty()) { + LOG.error("invalid situation. tablet is empty. id: {}", tabletId); + } + + // check tablet push states + if (tabletFinishedReplicas.size() >= quorumReplicaNum) { + quorumTablets.add(tabletId); + if (tabletFinishedReplicas.size() == tabletAllReplicas.size()) { + fullTablets.add(tabletId); + } + } + } + } + } + } + + if (batchTask.getTaskNum() > 0) { + AgentTaskExecutor.submit(batchTask); + } + + if (!hasLoadPartitions) { + String errMsg = new LogBuilder(LogKey.LOAD_JOB, id).add("database_id", dbId).add("label", label) + .add("error_msg", "all partitions have no load data").build(); + throw new LoadException(errMsg); + } + + return totalTablets; + } finally { + writeUnlock(); + } + } finally { + MetaLockUtils.readUnlockTables(tableList); + } + + } + + public void updateJobStatus(Map statusInfo) { + + updateState(statusInfo.get("status"), statusInfo.get("msg")); + + etlStatus.setTrackingUrl(statusInfo.get("appId")); + etlStatus.setProgress(progress); + + if (etlStatus.getState() == TEtlState.FINISHED) { + Gson gson = new Gson(); + DppResult dppResult = gson.fromJson(statusInfo.get("dppResult"), DppResult.class); + loadStatistic.fileNum = (int) dppResult.fileNumber; + loadStatistic.totalFileSizeB = dppResult.fileSize; + TUniqueId dummyId = new TUniqueId(0, 0); + long dummyBackendId = -1L; + loadStatistic.initLoad(dummyId, Sets.newHashSet(dummyId), Lists.newArrayList(dummyBackendId)); + loadStatistic.updateLoadProgress(dummyBackendId, dummyId, dummyId, dppResult.scannedRows, + dppResult.scannedBytes, true); + loadingStatus.setDppResult(dppResult); + Map counters = loadingStatus.getCounters(); + counters.put(DPP_NORMAL_ALL, String.valueOf(dppResult.normalRows)); + counters.put(DPP_ABNORMAL_ALL, String.valueOf(dppResult.abnormalRows)); + counters.put(UNSELECTED_ROWS, String.valueOf(dppResult.unselectRows)); + filePathToSize.putAll( + gson.fromJson(statusInfo.get("filePathToSize"), new TypeToken>() { + })); + hadoopProperties.putAll( + gson.fromJson(statusInfo.get("hadoopProperties"), new TypeToken>() { + })); + } + + } + + private void updateState(String stateStr, String msg) { + + switch (stateStr.toLowerCase()) { + case "running": + etlStatus.setState(TEtlState.RUNNING); + break; + case "success": + etlStatus.setState(TEtlState.FINISHED); + break; + case "failed": + boolean res = etlStatus.setState(TEtlState.CANCELLED); + if (!res) { + etlStatus = new EtlStatus(); + etlStatus.setState(TEtlState.CANCELLED); + } + etlStatus.setFailMsg(msg); + break; + default: + etlStatus.setState(TEtlState.UNKNOWN); + break; + } + + } + + public void startEtlJob() { + etlStartTimestamp = System.currentTimeMillis(); + state = JobState.ETL; + etlStatus = new EtlStatus(); + unprotectedLogUpdateStateInfo(); + } + + private void unprotectedUpdateToLoadingState(EtlStatus etlStatus, Map filePathToSize) + throws LoadException { + try { + for (Map.Entry entry : filePathToSize.entrySet()) { + String filePath = entry.getKey(); + if (!filePath.endsWith(EtlJobConfig.ETL_OUTPUT_FILE_FORMAT)) { + continue; + } + String tabletMetaStr = EtlJobConfig.getTabletMetaStr(filePath); + tabletMetaToFileInfo.put(tabletMetaStr, Pair.of(filePath, entry.getValue())); + } + + loadingStatus = etlStatus; + progress = 0; + Env.getCurrentProgressManager().registerProgressSimple(String.valueOf(id)); + unprotectedUpdateState(JobState.LOADING); + LOG.info("update to {} state success. job id: {}", state, id); + } catch (Exception e) { + LOG.warn("update to {} state failed. job id: {}", state, id, e); + throw new LoadException(e.getMessage(), e); + } + } + + private void unprotectedPrepareLoadingInfos() { + for (String tabletMetaStr : tabletMetaToFileInfo.keySet()) { + String[] fileNameArr = tabletMetaStr.split("\\."); + // tableId.partitionId.indexId.bucket.schemaHash + Preconditions.checkState(fileNameArr.length == 5); + long tableId = Long.parseLong(fileNameArr[0]); + long partitionId = Long.parseLong(fileNameArr[1]); + long indexId = Long.parseLong(fileNameArr[2]); + int schemaHash = Integer.parseInt(fileNameArr[4]); + + if (!tableToLoadPartitions.containsKey(tableId)) { + tableToLoadPartitions.put(tableId, Sets.newHashSet()); + } + tableToLoadPartitions.get(tableId).add(partitionId); + + indexToSchemaHash.put(indexId, schemaHash); + } + } + + private void unprotectedProcessEtlFinish() throws Exception { + // checkDataQuality + if (!checkDataQuality()) { + throw new DataQualityException(DataQualityException.QUALITY_FAIL_MSG); + } + + // get etl output files and update loading state + unprotectedUpdateToLoadingState(etlStatus, filePathToSize); + // log loading state + unprotectedLogUpdateStateInfo(); + // prepare loading infos + unprotectedPrepareLoadingInfos(); + } + + private TBrokerScanRange getTBrokerScanRange(DescriptorTable descTable, TupleDescriptor destTupleDesc, + List columns, Map properties) + throws AnalysisException { + + TBrokerScanRange brokerScanRange = new TBrokerScanRange(); + + TBrokerScanRangeParams params = new TBrokerScanRangeParams(); + params.setStrictMode(false); + params.setProperties(properties); + TupleDescriptor srcTupleDesc = descTable.createTupleDescriptor(); + Map srcSlotDescByName = Maps.newHashMap(); + for (Column column : columns) { + SlotDescriptor srcSlotDesc = descTable.addSlotDescriptor(srcTupleDesc); + srcSlotDesc.setIsMaterialized(true); + srcSlotDesc.setIsNullable(true); + + if (column.getDataType() == PrimitiveType.BITMAP) { + // cast to bitmap when the target column type is bitmap + srcSlotDesc.setType(ScalarType.createType(PrimitiveType.BITMAP)); + srcSlotDesc.setColumn(new Column(column.getName(), PrimitiveType.BITMAP)); + } else { + srcSlotDesc.setType(ScalarType.createType(PrimitiveType.VARCHAR)); + srcSlotDesc.setColumn(new Column(column.getName(), PrimitiveType.VARCHAR)); + } + + params.addToSrcSlotIds(srcSlotDesc.getId().asInt()); + srcSlotDescByName.put(column.getName(), srcSlotDesc); + } + + Map destSidToSrcSidWithoutTrans = Maps.newHashMap(); + for (SlotDescriptor destSlotDesc : destTupleDesc.getSlots()) { + if (!destSlotDesc.isMaterialized()) { + continue; + } + + SlotDescriptor srcSlotDesc = srcSlotDescByName.get(destSlotDesc.getColumn().getName()); + destSidToSrcSidWithoutTrans.put(destSlotDesc.getId().asInt(), srcSlotDesc.getId().asInt()); + Expr expr = new SlotRef(srcSlotDesc); + expr = castToSlot(destSlotDesc, expr); + params.putToExprOfDestSlot(destSlotDesc.getId().asInt(), expr.treeToThrift()); + } + params.setDestSidToSrcSidWithoutTrans(destSidToSrcSidWithoutTrans); + params.setSrcTupleId(srcTupleDesc.getId().asInt()); + params.setDestTupleId(destTupleDesc.getId().asInt()); + brokerScanRange.setParams(params); + + // broker address updated for each replica + brokerScanRange.setBrokerAddresses(Lists.newArrayList()); + + // broker range desc + TBrokerRangeDesc tBrokerRangeDesc = new TBrokerRangeDesc(); + tBrokerRangeDesc.setFileType(TFileType.FILE_HDFS); + tBrokerRangeDesc.setFormatType(TFileFormatType.FORMAT_PARQUET); + tBrokerRangeDesc.setSplittable(false); + tBrokerRangeDesc.setStartOffset(0); + tBrokerRangeDesc.setSize(-1); + // path and file size updated for each replica + brokerScanRange.setRanges(Collections.singletonList(tBrokerRangeDesc)); + + return brokerScanRange; + + } + + private Expr castToSlot(SlotDescriptor slotDesc, Expr expr) throws AnalysisException { + PrimitiveType dstType = slotDesc.getType().getPrimitiveType(); + PrimitiveType srcType = expr.getType().getPrimitiveType(); + if (dstType == PrimitiveType.BOOLEAN && srcType == PrimitiveType.VARCHAR) { + // there is no cast VARCHAR to BOOLEAN function, + // so we cast VARCHAR to TINYINT first, then cast TINYINT to BOOLEAN + return new CastExpr(Type.BOOLEAN, new CastExpr(Type.TINYINT, expr)); + } + if (dstType != srcType) { + return expr.castTo(slotDesc.getType()); + } + return expr; + } + + private TDescriptorTable getTDescriptorTable(DescriptorTable descTable) { + descTable.computeStatAndMemLayout(); + return descTable.toThrift(); + } + + private PushTask buildPushTask(long backendId, OlapTable olapTable, long taskSignature, long partitionId, + long indexId, long tabletId, long replicaId, int schemaHash, int schemaVersion, + long bucket) + throws AnalysisException { + + DescriptorTable descTable = new DescriptorTable(); + TupleDescriptor destTupleDesc = descTable.createTupleDescriptor(); + + List columnsDesc = new ArrayList<>(); + List columns = new ArrayList<>(); + for (Column column : olapTable.getSchemaByIndexId(indexId)) { + Column col = new Column(column); + col.setName(column.getName().toLowerCase(Locale.ROOT)); + columns.add(col); + columnsDesc.add(col.toThrift()); + // use index schema to fill the descriptor table + SlotDescriptor destSlotDesc = descTable.addSlotDescriptor(destTupleDesc); + destSlotDesc.setIsMaterialized(true); + destSlotDesc.setColumn(col); + destSlotDesc.setIsNullable(col.isAllowNull()); + } + + // deep copy TBrokerScanRange because filePath and fileSize will be updated + // in different tablet push task + TBrokerScanRange tBrokerScanRange = + getTBrokerScanRange(descTable, destTupleDesc, columns, hadoopProperties); + // update filePath fileSize + TBrokerRangeDesc tBrokerRangeDesc = tBrokerScanRange.getRanges().get(0); + tBrokerRangeDesc.setFileType(TFileType.FILE_HDFS); + tBrokerRangeDesc.setPath(""); + tBrokerRangeDesc.setFileSize(-1); + String tabletMetaStr = String.format("%d.%d.%d.%d.%d", olapTable.getId(), partitionId, + indexId, bucket, schemaHash); + if (tabletMetaToFileInfo.containsKey(tabletMetaStr)) { + Pair fileInfo = tabletMetaToFileInfo.get(tabletMetaStr); + tBrokerRangeDesc.setPath(fileInfo.first); + tBrokerRangeDesc.setFileSize(fileInfo.second); + } + + TDescriptorTable tDescriptorTable = getTDescriptorTable(descTable); + + return new PushTask(backendId, dbId, olapTable.getId(), + partitionId, indexId, tabletId, replicaId, schemaHash, 0, id, + TPushType.LOAD_V2, TPriority.NORMAL, transactionId, taskSignature, + tBrokerScanRange, tDescriptorTable, columnsDesc, + olapTable.getStorageVaultId(), schemaVersion); + } + + public void updateLoadingStatus() throws UserException { + if (!checkState(JobState.LOADING)) { + return; + } + + if (etlStatus.getState() == TEtlState.CANCELLED) { + throw new LoadException(etlStatus.getFailMsg()); + } + + // submit push tasks + Set totalTablets = submitPushTasks(); + if (totalTablets.isEmpty()) { + LOG.warn("total tablets set is empty. job id: {}, state: {}", id, state); + return; + } + + // update status + boolean canCommitJob = false; + writeLock(); + try { + // loading progress + // 100: txn status is visible and load has been finished + progress = fullTablets.size() * 100 / totalTablets.size(); + if (progress == 100) { + progress = 99; + } + + // quorum finish ts + if (quorumFinishTimestamp < 0 && quorumTablets.containsAll(totalTablets)) { + quorumFinishTimestamp = System.currentTimeMillis(); + } + + // if all replicas are finished or stay in quorum finished for long time, try to commit it. + long stragglerTimeout = 300 * 1000; + if ((quorumFinishTimestamp > 0 && System.currentTimeMillis() - quorumFinishTimestamp > stragglerTimeout) + || fullTablets.containsAll(totalTablets)) { + canCommitJob = true; + } + } finally { + writeUnlock(); + } + + // try commit transaction + if (canCommitJob) { + tryCommitJob(); + } + } + + private void tryCommitJob() throws UserException { + LOG.info(new LogBuilder(LogKey.LOAD_JOB, id).add("txn_id", transactionId) + .add("msg", "Load job try to commit txn").build()); + Database db = getDb(); + List
tableList = db.getTablesOnIdOrderOrThrowException( + Lists.newArrayList(tableToLoadPartitions.keySet())); + MetaLockUtils.writeLockTablesOrMetaException(tableList); + try { + Env.getCurrentGlobalTransactionMgr().commitTransactionWithoutLock( + dbId, tableList, transactionId, commitInfos, + new LoadJobFinalOperation(id, loadingStatus, progress, loadStartTimestamp, + finishTimestamp, state, failMsg)); + } catch (TabletQuorumFailedException e) { + // retry in next loop + } finally { + MetaLockUtils.writeUnlockTables(tableList); + } + } + + public void addFinishedReplica(long replicaId, long tabletId, long backendId) { + writeLock(); + try { + if (finishedReplicas.add(replicaId)) { + commitInfos.add(new TabletCommitInfo(tabletId, backendId)); + // set replica push task null + Map sentReplicaPushTask = tabletToSentReplicaPushTask.get(tabletId); + if (sentReplicaPushTask != null) { + if (sentReplicaPushTask.containsKey(replicaId)) { + sentReplicaPushTask.put(replicaId, null); + } + } + } + } finally { + writeUnlock(); + } + } + + private void clearJob() { + Preconditions.checkState(state == JobState.FINISHED || state == JobState.CANCELLED); + + if (LOG.isDebugEnabled()) { + LOG.debug("clear push tasks and infos that not persist. id: {}, state: {}", id, state); + } + writeLock(); + try { + // clear push task first + for (Map sentReplicaPushTask : tabletToSentReplicaPushTask.values()) { + for (PushTask pushTask : sentReplicaPushTask.values()) { + if (pushTask == null) { + continue; + } + AgentTaskQueue.removeTask(pushTask.getBackendId(), pushTask.getTaskType(), pushTask.getSignature()); + } + } + tableToLoadPartitions.clear(); + indexToSchemaHash.clear(); + tabletToSentReplicaPushTask.clear(); + finishedReplicas.clear(); + quorumTablets.clear(); + fullTablets.clear(); + + Env.getCurrentProgressManager().removeProgress(String.valueOf(progress)); + } finally { + writeUnlock(); + } + } + + private void unprotectedLogUpdateStateInfo() { + IngestionLoadJobStateUpdateInfo info = + new IngestionLoadJobStateUpdateInfo(id, state, transactionId, etlStartTimestamp, loadStartTimestamp, + etlStatus, tabletMetaToFileInfo, hadoopProperties, indexToSchemaVersion); + Env.getCurrentEnv().getEditLog().logUpdateLoadJob(info); + } + + public static class IngestionLoadJobStateUpdateInfo extends LoadJobStateUpdateInfo { + + @SerializedName(value = "etlStartTimestamp") + private long etlStartTimestamp; + @SerializedName(value = "etlStatus") + private EtlStatus etlStatus; + @SerializedName(value = "tabletMetaToFileInfo") + private Map> tabletMetaToFileInfo; + @SerializedName(value = "hadoopProperties") + private Map hadoopProperties; + @SerializedName(value = "indexToSchemaVersion") + private Map indexToSchemaVersion; + + public IngestionLoadJobStateUpdateInfo(long jobId, JobState state, long transactionId, + long etlStartTimestamp, long loadStartTimestamp, EtlStatus etlStatus, + Map> tabletMetaToFileInfo, + Map hadoopProperties, + Map indexToSchemaVersion) { + super(jobId, state, transactionId, loadStartTimestamp); + this.etlStartTimestamp = etlStartTimestamp; + this.etlStatus = etlStatus; + this.tabletMetaToFileInfo = tabletMetaToFileInfo; + this.hadoopProperties = hadoopProperties; + this.indexToSchemaVersion = indexToSchemaVersion; + } + + public long getEtlStartTimestamp() { + return etlStartTimestamp; + } + + public EtlStatus getEtlStatus() { + return etlStatus; + } + + public Map> getTabletMetaToFileInfo() { + return tabletMetaToFileInfo; + } + + public Map getHadoopProperties() { + return hadoopProperties; + } + + public Map getIndexToSchemaVersion() { + return indexToSchemaVersion; + } + } + + @Override + public void replayUpdateStateInfo(LoadJobStateUpdateInfo info) { + super.replayUpdateStateInfo(info); + IngestionLoadJobStateUpdateInfo stateUpdateInfo = (IngestionLoadJobStateUpdateInfo) info; + this.etlStartTimestamp = stateUpdateInfo.getEtlStartTimestamp(); + this.etlStatus = stateUpdateInfo.getEtlStatus(); + if (stateUpdateInfo.getTabletMetaToFileInfo() != null) { + this.tabletMetaToFileInfo.putAll(stateUpdateInfo.getTabletMetaToFileInfo()); + } + if (stateUpdateInfo.getHadoopProperties() != null) { + this.hadoopProperties.putAll(stateUpdateInfo.getHadoopProperties()); + } + if (stateUpdateInfo.getIndexToSchemaVersion() != null) { + this.indexToSchemaVersion.putAll(stateUpdateInfo.getIndexToSchemaVersion()); + } + switch (state) { + case ETL: + break; + case LOADING: + unprotectedPrepareLoadingInfos(); + break; + default: + LOG.warn("replay update load job state info failed. error: wrong state. job id: {}, state: {}", id, + state); + break; + } + } + + @Override + protected void readFields(DataInput in) throws IOException { + super.readFields(in); + this.etlStartTimestamp = in.readLong(); + this.etlStatus = new EtlStatus(); + this.etlStatus.readFields(in); + int size = in.readInt(); + for (int i = 0; i < size; i++) { + String tabletMetaStr = Text.readString(in); + Pair fileInfo = Pair.of(Text.readString(in), in.readLong()); + tabletMetaToFileInfo.put(tabletMetaStr, fileInfo); + } + size = in.readInt(); + for (int i = 0; i < size; i++) { + String propKey = Text.readString(in); + String propValue = Text.readString(in); + hadoopProperties.put(propKey, propValue); + } + size = in.readInt(); + for (int i = 0; i < size; i++) { + indexToSchemaVersion.put(in.readLong(), in.readInt()); + } + } + + private void checkIndexSchema(long indexId, int schemaHash, int schemaVersion) throws LoadException { + if (indexToSchemaHash.containsKey(indexId) && indexToSchemaHash.get(indexId) == schemaHash + && indexToSchemaVersion.containsKey(indexId) && indexToSchemaVersion.get(indexId) == schemaVersion) { + return; + } + throw new LoadException( + "schema of index [" + indexId + "] has changed, old schemaHash: " + indexToSchemaHash.get(indexId) + + ", current schemaHash: " + schemaHash + ", old schemaVersion: " + + indexToSchemaVersion.get(indexId) + ", current schemaVersion: " + schemaVersion); + } + +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadJob.java index a9c48469ac50e6..4213b0f3a3a772 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadJob.java @@ -340,6 +340,9 @@ private void initDefaultJobProperties() { case MINI: timeout = Config.stream_load_default_timeout_second; break; + case INGESTION: + timeout = Config.ingestion_load_default_timeout_second; + break; default: break; } @@ -869,6 +872,8 @@ public static LoadJob read(DataInput in) throws IOException { job = new MiniLoadJob(); } else if (type == EtlJobType.COPY) { job = new CopyJob(); + } else if (type == EtlJobType.INGESTION) { + job = new IngestionLoadJob(); } else { throw new IOException("Unknown load type: " + type.name()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadManager.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadManager.java index f661513cd66558..da47bf81c6b07f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadManager.java @@ -32,6 +32,7 @@ import org.apache.doris.common.DataQualityException; import org.apache.doris.common.DdlException; import org.apache.doris.common.LabelAlreadyUsedException; +import org.apache.doris.common.LoadException; import org.apache.doris.common.MetaNotFoundException; import org.apache.doris.common.Pair; import org.apache.doris.common.PatternMatcher; @@ -494,10 +495,16 @@ private void removeLoadJobIf(Predicate pred) { * Only for those jobs which have etl state, like SparkLoadJob. **/ public void processEtlStateJobs() { - idToLoadJob.values().stream().filter(job -> (job.jobType == EtlJobType.SPARK && job.state == JobState.ETL)) + idToLoadJob.values().stream() + .filter(job -> ((job.jobType == EtlJobType.SPARK || job.jobType == EtlJobType.INGESTION) + && job.state == JobState.ETL)) .forEach(job -> { try { - ((SparkLoadJob) job).updateEtlStatus(); + if (job instanceof SparkLoadJob) { + ((SparkLoadJob) job).updateEtlStatus(); + } else if (job instanceof IngestionLoadJob) { + ((IngestionLoadJob) job).updateEtlStatus(); + } } catch (DataQualityException e) { LOG.info("update load job etl status failed. job id: {}", job.getId(), e); job.cancelJobWithoutCheck(new FailMsg(FailMsg.CancelType.ETL_QUALITY_UNSATISFIED, @@ -515,10 +522,16 @@ public void processEtlStateJobs() { * Only for those jobs which load by PushTask. **/ public void processLoadingStateJobs() { - idToLoadJob.values().stream().filter(job -> (job.jobType == EtlJobType.SPARK && job.state == JobState.LOADING)) + idToLoadJob.values().stream() + .filter(job -> ((job.jobType == EtlJobType.SPARK || job.jobType == EtlJobType.INGESTION) + && job.state == JobState.LOADING)) .forEach(job -> { try { - ((SparkLoadJob) job).updateLoadingStatus(); + if (job instanceof SparkLoadJob) { + ((SparkLoadJob) job).updateLoadingStatus(); + } else if (job instanceof IngestionLoadJob) { + ((IngestionLoadJob) job).updateLoadingStatus(); + } } catch (UserException e) { LOG.warn("update load job loading status failed. job id: {}", job.getId(), e); job.cancelJobWithoutCheck(new FailMsg(CancelType.LOAD_RUN_FAIL, e.getMessage()), true, true); @@ -569,8 +582,8 @@ public List> getCreateLoadStmt(long dbId, String label) throw * @param accurateMatch true: filter jobs which's label is labelValue. false: filter jobs which's label like itself. * @param statesValue used to filter jobs which's state within the statesValue set. * @return The result is the list of jobInfo. - * JobInfo is a list which includes the comparable object: jobId, label, state etc. - * The result is unordered. + * JobInfo is a list which includes the comparable object: jobId, label, state etc. + * The result is unordered. */ public List> getLoadJobInfosByDb(long dbId, String labelValue, boolean accurateMatch, Set statesValue) throws AnalysisException { @@ -988,4 +1001,29 @@ public long createLoadJobFromStmt(InsertStmt insertStmt) throws DdlException { loadJobScheduler.submitJob(loadJob); return loadJob.getId(); } + + public long createIngestionLoadJob(String dbName, String label, List tableNames, + Map properties, + UserIdentity userInfo) + throws DdlException, LoadException { + Database db = checkDb(dbName); + long dbId = db.getId(); + LoadJob loadJob; + writeLock(); + try { + checkLabelUsed(dbId, label); + if (unprotectedGetUnfinishedJobNum() >= Config.desired_max_waiting_jobs) { + throw new DdlException("There are more than " + Config.desired_max_waiting_jobs + + " unfinished load jobs, please retry later. You can use `SHOW LOAD` to view submitted jobs"); + } + loadJob = new IngestionLoadJob(dbId, label, tableNames, userInfo); + loadJob.setJobProperties(properties); + createLoadJob(loadJob); + } finally { + writeUnlock(); + } + Env.getCurrentEnv().getEditLog().logCreateLoadJob(loadJob); + return loadJob.getId(); + } + } diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkEtlJobHandler.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkEtlJobHandler.java index 69a41bd12836d0..f0533bb80cd121 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkEtlJobHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkEtlJobHandler.java @@ -60,6 +60,7 @@ * 4. get spark etl file paths * 5. delete etl output path */ +@Deprecated public class SparkEtlJobHandler { private static final Logger LOG = LogManager.getLogger(SparkEtlJobHandler.class); diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkLauncherMonitor.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkLauncherMonitor.java index 4b919cd993821c..68d6c571536c9e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkLauncherMonitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkLauncherMonitor.java @@ -38,6 +38,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +@Deprecated public class SparkLauncherMonitor { private static final Logger LOG = LogManager.getLogger(SparkLauncherMonitor.class); diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkLoadAppHandle.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkLoadAppHandle.java index a6327ff02a934d..60e82d76557d6a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkLoadAppHandle.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkLoadAppHandle.java @@ -33,6 +33,7 @@ import java.util.Iterator; import java.util.List; +@Deprecated public class SparkLoadAppHandle implements Writable { private static final Logger LOG = LogManager.getLogger(SparkLoadAppHandle.class); // 5min diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkLoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkLoadJob.java index ccc067ec1c8a70..a89435d357c00b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkLoadJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkLoadJob.java @@ -118,6 +118,7 @@ * Step3: LoadLoadingChecker will check loading status periodically and commit transaction when push tasks are finished. * Step4: PublishVersionDaemon will send publish version tasks to be and finish transaction. */ +@Deprecated public class SparkLoadJob extends BulkLoadJob { private static final Logger LOG = LogManager.getLogger(SparkLoadJob.class); diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkLoadPendingTask.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkLoadPendingTask.java index b26603248d66d2..4b57ac6db1653e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkLoadPendingTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkLoadPendingTask.java @@ -31,6 +31,7 @@ import org.apache.doris.catalog.HashDistributionInfo; import org.apache.doris.catalog.HiveTable; import org.apache.doris.catalog.KeysType; +import org.apache.doris.catalog.MaterializedIndexMeta; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.PartitionItem; @@ -79,6 +80,7 @@ // 1. create etl job config and write it into jobconfig.json file // 2. submit spark etl job +@Deprecated public class SparkLoadPendingTask extends LoadTask { private static final Logger LOG = LogManager.getLogger(SparkLoadPendingTask.class); @@ -247,7 +249,9 @@ private List createEtlIndexes(OlapTable table) throws LoadException { for (Map.Entry> entry : table.getIndexIdToSchema().entrySet()) { long indexId = entry.getKey(); - int schemaHash = table.getSchemaHashByIndexId(indexId); + MaterializedIndexMeta indexMeta = table.getIndexMetaByIndexId(indexId); + int schemaHash = indexMeta.getSchemaHash(); + int schemaVersion = indexMeta.getSchemaVersion(); boolean changeAggType = table.getKeysTypeByIndexId(indexId).equals(KeysType.UNIQUE_KEYS) && table.getTableProperty().getEnableUniqueKeyMergeOnWrite(); @@ -289,7 +293,7 @@ private List createEtlIndexes(OlapTable table) throws LoadException { // is base index boolean isBaseIndex = indexId == table.getBaseIndexId() ? true : false; - etlIndexes.add(new EtlIndex(indexId, etlColumns, schemaHash, indexType, isBaseIndex)); + etlIndexes.add(new EtlIndex(indexId, etlColumns, schemaHash, indexType, isBaseIndex, schemaVersion)); } return etlIndexes; diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkPendingTaskAttachment.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkPendingTaskAttachment.java index a2bbb058e934c6..315f1ae0cd80a8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkPendingTaskAttachment.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkPendingTaskAttachment.java @@ -17,6 +17,7 @@ package org.apache.doris.load.loadv2; +@Deprecated public class SparkPendingTaskAttachment extends TaskAttachment { private SparkLoadAppHandle handle; private String appId; diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkRepository.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkRepository.java index 19b21ff11fe25b..54279250bf5944 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkRepository.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkRepository.java @@ -54,6 +54,7 @@ * * __archive_3_2_0/ * * ... */ +@Deprecated public class SparkRepository { private static final Logger LOG = LogManager.getLogger(SparkRepository.class); diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkYarnConfigFiles.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkYarnConfigFiles.java index 88038d081b2ccf..7f5894804dcc20 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkYarnConfigFiles.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkYarnConfigFiles.java @@ -42,6 +42,7 @@ import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; +@Deprecated public class SparkYarnConfigFiles { private static final Logger LOG = LogManager.getLogger(SparkYarnConfigFiles.class); diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java b/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java index ad4f1a1bacdb09..5dd263891d6fdc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java @@ -34,6 +34,7 @@ import org.apache.doris.common.Config; import org.apache.doris.common.MetaNotFoundException; import org.apache.doris.load.DeleteJob; +import org.apache.doris.load.loadv2.IngestionLoadJob; import org.apache.doris.load.loadv2.SparkLoadJob; import org.apache.doris.system.Backend; import org.apache.doris.task.AgentTask; @@ -430,7 +431,11 @@ private void finishRealtimePush(AgentTask task, TFinishTaskRequest request) thro olapTable, partition, backendId, tabletId, tabletMeta.getIndexId()); // if the replica is under schema change, could not find the replica with aim schema hash if (replica != null) { - ((SparkLoadJob) job).addFinishedReplica(replica.getId(), pushTabletId, backendId); + if (job instanceof SparkLoadJob) { + ((SparkLoadJob) job).addFinishedReplica(replica.getId(), pushTabletId, backendId); + } else if (job instanceof IngestionLoadJob) { + ((IngestionLoadJob) job).addFinishedReplica(replica.getId(), pushTabletId, backendId); + } } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java b/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java index 1a7d3e4eff3d7c..c699ddaca3fd85 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java @@ -184,6 +184,8 @@ import org.apache.doris.job.extensions.mtmv.MTMVJob; import org.apache.doris.load.loadv2.BrokerLoadJob; import org.apache.doris.load.loadv2.BulkLoadJob; +import org.apache.doris.load.loadv2.IngestionLoadJob; +import org.apache.doris.load.loadv2.IngestionLoadJob.IngestionLoadJobStateUpdateInfo; import org.apache.doris.load.loadv2.InsertLoadJob; import org.apache.doris.load.loadv2.LoadJob; import org.apache.doris.load.loadv2.LoadJob.LoadJobStateUpdateInfo; @@ -388,7 +390,9 @@ public class GsonUtils { // runtime adapter for class "LoadJobStateUpdateInfo" private static RuntimeTypeAdapterFactory loadJobStateUpdateInfoTypeAdapterFactory = RuntimeTypeAdapterFactory.of(LoadJobStateUpdateInfo.class, "clazz") - .registerSubtype(SparkLoadJobStateUpdateInfo.class, SparkLoadJobStateUpdateInfo.class.getSimpleName()); + .registerSubtype(SparkLoadJobStateUpdateInfo.class, SparkLoadJobStateUpdateInfo.class.getSimpleName()) + .registerSubtype(IngestionLoadJobStateUpdateInfo.class, + IngestionLoadJobStateUpdateInfo.class.getSimpleName()); // runtime adapter for class "Policy" private static RuntimeTypeAdapterFactory policyTypeAdapterFactory = RuntimeTypeAdapterFactory.of( @@ -589,7 +593,8 @@ public class GsonUtils { .registerSubtype(CopyJob.class, CopyJob.class.getSimpleName()) .registerSubtype(InsertLoadJob.class, InsertLoadJob.class.getSimpleName()) .registerSubtype(MiniLoadJob.class, MiniLoadJob.class.getSimpleName()) - .registerSubtype(SparkLoadJob.class, SparkLoadJob.class.getSimpleName()); + .registerSubtype(SparkLoadJob.class, SparkLoadJob.class.getSimpleName()) + .registerSubtype(IngestionLoadJob.class, IngestionLoadJob.class.getSimpleName()); private static RuntimeTypeAdapterFactory partitionItemTypeAdapterFactory = RuntimeTypeAdapterFactory.of(PartitionItem.class, "clazz") diff --git a/fe/fe-core/src/test/java/org/apache/doris/load/loadv2/SparkLoadPendingTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/load/loadv2/SparkLoadPendingTaskTest.java index 332088b080f690..dcb8e6c878a060 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/load/loadv2/SparkLoadPendingTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/load/loadv2/SparkLoadPendingTaskTest.java @@ -29,6 +29,7 @@ import org.apache.doris.catalog.Env; import org.apache.doris.catalog.HashDistributionInfo; import org.apache.doris.catalog.KeysType; +import org.apache.doris.catalog.MaterializedIndexMeta; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.PartitionInfo; @@ -50,6 +51,7 @@ import org.apache.doris.sparkdpp.EtlJobConfig.EtlPartition; import org.apache.doris.sparkdpp.EtlJobConfig.EtlPartitionInfo; import org.apache.doris.sparkdpp.EtlJobConfig.EtlTable; +import org.apache.doris.thrift.TStorageType; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -87,6 +89,9 @@ public void testExecuteTask(@Injectable SparkLoadJob sparkLoadJob, // partition and distribution infos long partitionId = 2L; DistributionInfo distributionInfo = new HashDistributionInfo(2, Lists.newArrayList(columns.get(0))); + MaterializedIndexMeta indexMeta = + new MaterializedIndexMeta(indexId, columns, 0, 123, (short) 1, TStorageType.COLUMN, KeysType.DUP_KEYS, + null, null, null); PartitionInfo partitionInfo = new SinglePartitionInfo(); Partition partition = new Partition(partitionId, "p1", null, distributionInfo); List partitions = Lists.newArrayList(partition); @@ -113,8 +118,8 @@ public void testExecuteTask(@Injectable SparkLoadJob sparkLoadJob, result = indexIdToSchema; table.getDefaultDistributionInfo(); result = distributionInfo; - table.getSchemaHashByIndexId(indexId); - result = 123; + table.getIndexMetaByIndexId(indexId); + result = indexMeta; table.getPartitionInfo(); result = partitionInfo; table.getPartition(partitionId); @@ -171,6 +176,12 @@ public void testRangePartitionHashDistribution(@Injectable SparkLoadJob sparkLoa long partition2Id = 5L; int distributionColumnIndex = 1; DistributionInfo distributionInfo = new HashDistributionInfo(3, Lists.newArrayList(columns.get(distributionColumnIndex))); + MaterializedIndexMeta indexMeta1 = + new MaterializedIndexMeta(index1Id, columns, 0, 123, (short) 1, TStorageType.COLUMN, KeysType.DUP_KEYS, + null, null, null); + MaterializedIndexMeta indexMeta2 = + new MaterializedIndexMeta(index2Id, columns, 0, 234, (short) 1, TStorageType.COLUMN, KeysType.DUP_KEYS, + null, null, null); Partition partition1 = new Partition(partition1Id, "p1", null, distributionInfo); Partition partition2 = new Partition(partition2Id, "p2", null, @@ -207,10 +218,10 @@ public void testRangePartitionHashDistribution(@Injectable SparkLoadJob sparkLoa result = indexIdToSchema; table.getDefaultDistributionInfo(); result = distributionInfo; - table.getSchemaHashByIndexId(index1Id); - result = 123; - table.getSchemaHashByIndexId(index2Id); - result = 234; + table.getIndexMetaByIndexId(index1Id); + result = indexMeta1; + table.getIndexMetaByIndexId(index2Id); + result = indexMeta2; table.getPartitionInfo(); result = partitionInfo; table.getPartition(partition1Id); @@ -226,7 +237,8 @@ public void testRangePartitionHashDistribution(@Injectable SparkLoadJob sparkLoa } }; - SparkLoadPendingTask task = new SparkLoadPendingTask(sparkLoadJob, aggKeyToFileGroups, resource, brokerDesc, LoadTask.Priority.NORMAL); + SparkLoadPendingTask task = new SparkLoadPendingTask(sparkLoadJob, aggKeyToFileGroups, resource, brokerDesc, + LoadTask.Priority.NORMAL); EtlJobConfig etlJobConfig = Deencapsulation.getField(task, "etlJobConfig"); Assert.assertEquals(null, etlJobConfig); task.init(); diff --git a/fe/spark-dpp/src/test/java/org/apache/doris/load/loadv2/dpp/MinimumCoverageRollupTreeBuilderTest.java b/fe/spark-dpp/src/test/java/org/apache/doris/load/loadv2/dpp/MinimumCoverageRollupTreeBuilderTest.java index 90c95cf04fabb9..7c0cd5a542b5ae 100644 --- a/fe/spark-dpp/src/test/java/org/apache/doris/load/loadv2/dpp/MinimumCoverageRollupTreeBuilderTest.java +++ b/fe/spark-dpp/src/test/java/org/apache/doris/load/loadv2/dpp/MinimumCoverageRollupTreeBuilderTest.java @@ -55,24 +55,24 @@ public void testBuild() { baseColumns.add(column3); baseColumns.add(column4); EtlJobConfig.EtlIndex baseIndex = new EtlJobConfig.EtlIndex(10000, - baseColumns, 12345, "DUPLICATE", true); + baseColumns, 12345, "DUPLICATE", true, 1); List roll1Columns = new ArrayList<>(); roll1Columns.add(column1); roll1Columns.add(column2); roll1Columns.add(column4); EtlJobConfig.EtlIndex roll1Index = new EtlJobConfig.EtlIndex(10001, - roll1Columns, 12346, "AGGREGATE", false); + roll1Columns, 12346, "AGGREGATE", false, 1); List roll2Columns = new ArrayList<>(); roll2Columns.add(column1); roll2Columns.add(column4); EtlJobConfig.EtlIndex roll2Index = new EtlJobConfig.EtlIndex(10002, - roll2Columns, 12347, "AGGREGATE", false); + roll2Columns, 12347, "AGGREGATE", false, 1); List roll3Columns = new ArrayList<>(); roll3Columns.add(column3); roll3Columns.add(column4); EtlJobConfig.EtlIndex roll3Index = new EtlJobConfig.EtlIndex(10003, - roll3Columns, 12348, "AGGREGATE", false); + roll3Columns, 12348, "AGGREGATE", false, 1); List indexes = new ArrayList<>(); indexes.add(baseIndex); diff --git a/fe/spark-dpp/src/test/java/org/apache/doris/load/loadv2/etl/SparkEtlJobTest.java b/fe/spark-dpp/src/test/java/org/apache/doris/load/loadv2/etl/SparkEtlJobTest.java index 0ea7f660923834..bb9de716438d71 100644 --- a/fe/spark-dpp/src/test/java/org/apache/doris/load/loadv2/etl/SparkEtlJobTest.java +++ b/fe/spark-dpp/src/test/java/org/apache/doris/load/loadv2/etl/SparkEtlJobTest.java @@ -68,9 +68,9 @@ public void setUp() { EtlColumn k1 = new EtlColumn("k1", "INT", false, true, "NONE", "0", 0, 0, 0); EtlColumn k2 = new EtlColumn("k2", "VARCHAR", false, true, "NONE", "0", 10, 0, 0); EtlColumn v1 = new EtlColumn("v1", "BIGINT", false, false, "NONE", "0", 0, 0, 0); - EtlIndex index1 = new EtlIndex(index1Id, Lists.newArrayList(k1, k2, v1), 666666, "DUPLICATE", true); + EtlIndex index1 = new EtlIndex(index1Id, Lists.newArrayList(k1, k2, v1), 666666, "DUPLICATE", true, 1); v1 = new EtlColumn("v1", "BIGINT", false, false, "SUM", "0", 0, 0, 0); - EtlIndex index2 = new EtlIndex(index2Id, Lists.newArrayList(k1, v1), 888888, "AGGREGATE", true); + EtlIndex index2 = new EtlIndex(index2Id, Lists.newArrayList(k1, v1), 888888, "AGGREGATE", true, 1); List indexes = Lists.newArrayList(index1, index2); // partition info List partitions = Lists.newArrayList(); diff --git a/regression-test/data/load_p0/ingestion_load/data.parquet b/regression-test/data/load_p0/ingestion_load/data.parquet new file mode 100644 index 0000000000000000000000000000000000000000..81d0d01460cc5bc858503f2801db043fa0d30c5d GIT binary patch literal 5745 zcmdT|du&_P89z>()J;uOXzboV+66a_T1bQM)Av>#ERN$>oWyxMZXsOT*Rf+ej-Ppy zhE9YI7@)EtmB(Nbn>vO{hqjJKk*a2bu3KSLjBPC%>&8?-9pllKtt+j(@7PZ6OGi=A7?4-}n0-=bZ05sk^C*)FItw-R%v!Dwve(G&O|!@Mlv)ggXA~L(_V+RzV?z z8uymK?x zWq1y)tsL)n_FDRz`uO%}G;XvKm1K{jTePLkM$#Nk^rgYXE2`TImMVc@$UY`G)ZFTz z?U8U?QZ46$elHp4ydE3h+hGl{z_|zlILWS{nC^9kJ381vLKpy!+utX)HZyc0=9tWk zV@|8v)ju2x`D~sg^?&u@P6u9WjRk49@EQ#kxa_=LK#{P9{1AI_<(lx7|I~9AjG5U5>0jn2ct8oest{)+_Rz4p%7CP4jK$$gqp>nR}dFlYPn2HWNSTY)aYN zT5O?+l{JSgaX&rb%*6YQDat6Mto}qx)SC)P=An4hY>Qf>f`@4diWv*XSp8Nj)fQ&@ zjDc2faMUs+x?ElEppEQqa`l-Yq~q!{x0MujVKa~xuT4x)%s?V!>K=>v+&!GbL(#** zX12XGVKWW22+khH<{Ar&o#}W7ALUwxL$>}-x8O0g+hfClM4Qji&PLiLOC%WVNJhK4 zWZW?}OpSR)osEDW9%H=Wi4;TGy1II-?U6pW)Y;M`IM|lXwAo^0#)DGOl5#iqw;Qub zQVklB1d|b9Z0Q+JBzxO?M;vUVI~Eax!~Mao9vg3Wj0*gy%jOY>+DAQJd!obP^;)_j zahoKDI>sWx)u7xPYeV*{Is(<kPzOtsRor!TM}MS8I%%bd2-_EN%T{yV2e1U&9qt2Hls2eJhP#IyZqONc#Rb{WKKsL82ndH6u@N)6lTGDKFZ~=fcIvE zRAA*N69`Sw2nAOlgcXkL%H>{rE)Tw2rTeK$t`LOgl;@Wl%*S+7goZh>cxl~z$96&t zm*IlBzE64!Ss2-JLA&Ob@C#zgMbzYktXIZlVQ}H<~z_G15HE zaTLQ7RqIK{#G6=wp#`Hr@vMm_@&24v9Vxh01;l!eN77)GA_e%z@eHjRv;)2X9vKCm=V=pd5=ct5 z!cMX@%dvt0aPoGRd#IpwkGucn~T9-DSd3Dq2W-AsBzq|g` zLsQ?eTzle4>kkfJb?~N_+7ANwB?sh&nrGjAK>I=yq3K&^cp|&#Pc!&{T*?J=a|gtJ z;djF?1z(&Q=vcp+i5;FXpS&NrfBVYJox8M~|Gwdknb*ft(_gQDwrLjS+|u7ngvt?hD^DSm?LC`&{DCFta)8fYr~tr2ou4TIG5K@-x!j)B<*31ZKG%HwZ0?QE z&gEQ@3WOCbiN)yX+yA`Gz5PQQzPI~b2;ps!DTTMUfCv`Ai*fi~hxyN7JOSg|@a=(l zC43iqOQ>G~E}gtrG7b01?6P_`1;&Y(u z*RkuMgks?BpRSy2KRp35Y^fmeTG-}9u@u=m48`p1B)lXl8V=KIXsvGcg3DOL? z#sg$~dbzx3^0`#(+w^kSIQcPz3zbYEr5pRJWGKafR$%R~0xbFMgRx|49@bf1jZR%W z1amhbkMvB5{<)I+v$~B7c=RZ}e43CCgW^)$vX^M`| zUQk5_wylH=$ju#RK&IwpVBe}5U4@*1!rfM6VBadpz-;IJ!mh4#@no&MPq5|1f{=%P zQl)=NRe!P;CqUVnmFo8!WOZddQ2^X;$O9-^u@YdOmTV-Ajb!awUnU$4)QY?i2=YNepeY|G1v&Ur nH(=z9oFK7Azu+?oCZkXC1;};qM65w*K|koOB?!6UKb!vzS&upK literal 0 HcmV?d00001 diff --git a/regression-test/data/load_p0/ingestion_load/data1.parquet b/regression-test/data/load_p0/ingestion_load/data1.parquet new file mode 100644 index 0000000000000000000000000000000000000000..623456ace7430de2cdc8ec56113f7f69c503713e GIT binary patch literal 4057 zcmcgveQaCR6~E7U`9e-f6TjDI#B)PQTN9tp=tHbeYWG& zbvI3Gcia2!J?EZ#&iS2l?!9rp5#}k9I$cLC)deUUMYbVyf%vu!A>`6IK@fCaQ+ZJr zBt;W>zMkV0_yez6Zr|_~aX*$tNih2ukH_vGrs1Q|E=Xp(# zn*A5=g^4UFssu`5rV1jjv}0p}C}}(=aGauuiX!m}huM;(@QT6-x-5yDD1$Q`vB?A5 z1x@25Mb~9j7r=J?Cci3xQ<|)3nksNAsNe)*lV1~5jZ;;L=VeWnIho^h3IR^$P7Ic6 zpc)(s+Sp!0jIp_D_zFJV&`H*}3fpIY0(15f>7cB3C)r?U8fS9J>~K0kS!f%*r=I>h z$)2xcoBm7v_B+8XlCmIh6AuUv{nYkyayBk}6hP?RAVO*ApFlqa^eZqv0R3U;+n|32 zylR{biKQ75@2AnmRa7;omzx}vr3i{l#PUfK(eDWQEWy6h$R25M{2Ns@{a>_DZ(GU; zo+B>qZ3dGQTL=1m{)u$74hSDVz6OS1I2iWw+Q^PENe_Fh!0GRv>zP!u0w0WK*8-hR zCKGZnH>V9MBT32c+lHAr)wemjMdRdHM2zPIZ;$|HUkiNVy}jAlaPR1J_l$3>9vGU= zcMG_*Qto6f5SkpC9`94ro=7CVISmo(pgJ$BjI~#&X6j89prQ3{oE{sosvH(tR?2~l zr}Mka$Q`SrPuH>gtn86yTntxNSu=K*<-X$%I1^I{{bM&mZw?{!>b(d(c(zd39!KcA z;|TrwOrh|_j|+urBn*jnFeF~Xxvx;QaV)m3R6t!URV;w>1iehKuiwZnt$_llXsFUx z6NH}%1gJLicIkZLp+cc>6#59*ehbDQLT`XMU_me~oPd5M04&XrcrP|sp@6+Q5#7m0 zfWyAtUICCx&YOAojG)gG?3-=uuhs&9iiRqEH33{Cs4t0f1=a+t6NEPL;JmPcp|OMK_u7ry!$LeGK(^QZKw zTY*hj9p`?z^CXn#gGA@xbjsMxBGiLbzkg=9heGHFFt3hC74Fh~^x`@P)lfXTH)dkF z{P1i#m(CBboZ{0K`lN+DNwddnx7WI9eo9aWS8(=U+%!n*xWzWwp< zM=pFWEZ591q<3^N*f-II?T-p5HFrw!iZ= z=WVH@FE@D)T!E6rQVfaLaJ5ya+NgfEVWoQ4OvL7g6LZN-W;maoSbY)~*RzKmP{O%#u zv#AV9uehfwUuEWHpb>4eJM0Vpk3SyRYx|b7*ZP5Dujxy3ZLfXyf4WTB>w7h^*Xxd8 zuh(IvmucF@Hqp=Y^@ES(X94s3VOv^Aa0{G+B@V96Le?6hD*{&|U zV2t4vFa`~8EVZq892v;W3<_Z}IxwFb6vNXa0}&ANfYYWrx9Nm2UZTC-VBK^QzUCB@ zh9NFC% zWLX}E3tGHPho(${qJKqMyzaraeDh;*F+7@>$lMAJskE0Jn5QR*jkU>7ju{28S95a4nAWS!!O>2m}oRoWl9{ z<2ryAswnaHm_da_XshCjx4Opxi=|kGa2$9aMrpL)dP)f!$Ea zzc}{S1XD2@G5rN6p;hAJHsiD7{^(%uXl%fA9k!V3_aH`3Ol8wK7q=(@Pr*xw+^}VRz?P8*D;1{%bA(uq_(p$LSXE$ z)7WXBpr5FpXP52C%jUI#4lMcUulN5+?-^fTzVrzqW8%7`4}n1phw8@&A*yRITWI{x z3WSrnk}-@U7&;7PCON02>hXmh}njdAzY+yJN9g3I0V~6 z1KBE-V5@I^)~qku$e2CS3)_JbfeV)yebW@-%+PxZ;kSsdn)O|S9bj~|KB(Mwp4i zeS|zpkDRp5R>;5{j*kBC9Q_i${c^`K!a6owtTl;llWq?ow2vmbI$A-wYp6|7USP6E zdRQUoMjW-FU-;@ zfNyR6WAd2ab!=?BHyAYyZp=Gp*O`&l?Ga%icWuQXdxDA zEB2s#kYzE}h+KU__pGww0ozlzpbp7g~eOwvgjhW;oDWEup%pLl^6gmUb~VRRi{dy$M1 SKD1^wLi-Qp@b0(pBfJ8g%A9Ke literal 0 HcmV?d00001 diff --git a/regression-test/data/load_p0/ingestion_load/data2-2.parquet b/regression-test/data/load_p0/ingestion_load/data2-2.parquet new file mode 100644 index 0000000000000000000000000000000000000000..720ea77afd23d4f125e36becf6577ad3713e1c83 GIT binary patch literal 781 zcmaiz(Msb$6ow~RtzpTf6DE+0GT5LHhfIUfl-}%}$RgtHvdAQv)L@#{BvqvJReUBN z!AJ2-S~pzX1?=Ks!_e}-KBy5<;R1DhV!+r)6l&K^Q&9W8XYwSw}FsWw5mkI5S8 zVTEXy3QG*$crBBB6@TSAp%-Tr-U)^kza-PFmgoC@57;s6mn@l~Qj z!Urb8W}6NNFj==9d+Ztxzykx>N{V$$w_+=Ejqw-0YzGboU16|WD*VLgwLEY6(yH7N z_|Db`liU2O=3w(*gJIj?(zJk2E?(YmP?NkcuF<+4a?I{LGurI@O% z#E0@mmY0p3$O;POq9{aMfFKh^Ej{|)m5!FHmDka{qhYB04~3y$ki(9IW6+9$rC?`;?wBVk7b5?|ub8!W+qnoP7WQ literal 0 HcmV?d00001 diff --git a/regression-test/data/load_p0/ingestion_load/data2-3.parquet b/regression-test/data/load_p0/ingestion_load/data2-3.parquet new file mode 100644 index 0000000000000000000000000000000000000000..cc2b37a7c03fea0f374d5cb4040469794c8f4fd6 GIT binary patch literal 839 zcmah|(TjbTuS)J0SQiE$$kztUiW3Cys6iA%{uEm-z!R0<*he;xAAJZ(66!p;eAhVMsM8=z(zE0&NL8ALCw%4t+}`jNKi?(( GLcakq-Jlcz literal 0 HcmV?d00001 diff --git a/regression-test/data/load_p0/ingestion_load/test_ingestion_load.out b/regression-test/data/load_p0/ingestion_load/test_ingestion_load.out new file mode 100644 index 00000000000000..f8ce916c7de5fd --- /dev/null +++ b/regression-test/data/load_p0/ingestion_load/test_ingestion_load.out @@ -0,0 +1,37 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- +-2067215761 41V4fgCKI3Hkioe DZGaq24Yqh7SwmbPT6IX23jfC5NKqG7gE9JT4GLwiaQtoO8l6EjhGWQP9X7NHmjdqMbIN5kNeDkffOrlS6roIwj2wXpJ true 123 -4524 1014223769452772206 -6589390612399245616 29103.387 -4.09385322835896E8 886.394 -699.960 2024-02-22 2024-03-04 2024-01-01T22:13:54 2023-10-02T19:35:10 +-1982022740 fcZ1o6ZXG8UOFh5 iw4Ziys42GRRTFNkVPeQEA9I5EQtBD04xfefDsPCWN0vr1 true -3 -25143 7122614823242399351 7391807634038366248 23160.604 -9.20283206353984E8 829.880 -387.403 2023-10-16 2024-05-13 2024-07-16T18:27:45 2023-11-03T05:30:21 +-1228926576 1TISaGtB01BiqVt kq4u false -123 15962 3590007830423934951 -1478759439092857810 -7813.757 -6.98785793100899E8 930.743 402.350 2024-07-23 2023-07-30 2023-11-27T17:48:50 2024-03-11T21:09:58 +-575060895 rRfatUJiOO5dq9Y ETjqrUNUnI5kSmkafjWfRTG8HIp98pLGXagNpXZHqOIZZDRkoGeahOwk9 false 16 -767 6623730208927396375 -3055706367894284822 12540.839 -1.047911096098831E9 -752.454 -241.620 2024-04-10 2024-05-16 2023-12-07T23:38:05 2023-12-11T05:48:36 +-76042627 PcVaKC43qmIzuxY U3aGxaZumFpqcUsLI true 44 31151 9085406701767055602 -5846138572199996843 -16845.29 2.44522690225531E8 -784.720 -467.133 2023-10-31 2023-08-29 2023-09-12T10:12:46 2023-10-19T17:02:51 +121048200 KPLWjhhbGXqflJi rzqOYQH9ySHPwCm5K4GdeuI28G8LLmnpqLmsLMLfyRIvcfrlubQI47wUa8QILhuS38MBkjL true 42 13182 -6601530758880565531 5619594098883737912 -2782.1506 3.86698722676211E8 478.420 -330.289 2024-06-17 2023-12-26 2024-04-28T03:29:04 2023-08-18T21:05:32 +262860291 m3XgmlbIHYNH1qS BTJRzVrpM78zJAsHMEGhkF5BiDoc3yJuoV0s209sFcqElZsheBgolBGlFl9X4EfauD64FcFF2Mi4V0dKZfpDgaLLRPfG1SALV7 false -42 5990 -7504815416577235660 1535659792778122944 1171.9619 1.28834143701229E8 626.721 682.828 2023-11-24 2023-11-18 2024-03-21T11:50:17 2024-03-31T12:59:27 +579428879 KsOC6WGrieGlo7B SzeA6tRbsiGWJTBDvBQdBjCqjSE6Y false -111 32758 4029182463831656671 -3546198025044093789 20338.55 -2.015222388533773E9 61.981 720.310 2023-11-13 2024-07-04 2024-07-19T12:42:28 2024-01-04T10:32:53 +1145092610 xWJUDWAV8Nllo0F dnZ9RMVdoqxh4kGBvy55zQdChNTVYdlvRZP4aWIkXyErUbM1XmFGQ9vuCD113JKKCyx4crDoY false 115 -22832 -7242855305248390982 -4240353246453053617 -9074.909 -2.51212400295869E8 -502.410 618.820 2024-06-12 2024-04-18 2023-11-04T09:55:17 2023-11-13T16:30:23 +1736373707 UU14wnLhPkBid41 pmuNqYfOc3JCscf9meT5dYB2i28Pt9iaeXK4QqjVZJdoKFOeZI5bG9RKm1zInTdDMW1N0PKI5Y true -105 -20276 360048259532857165 -4602633478165721463 -13230.296 -1.708246954394742E9 757.147 -533.800 2024-01-05 2023-09-08 2023-11-27T05:21:33 2024-02-11T21:35:03 + +-- !select -- +-2067215761 41V4fgCKI3Hkioe DZGaq24Yqh7SwmbPT6IX23jfC5NKqG7gE9JT4GLwiaQtoO8l6EjhGWQP9X7NHmjdqMbIN5kNeDkffOrlS6roIwj2wXpJ true 123 -4524 1014223769452772206 -6589390612399245616 29103.387 -4.09385322835896E8 886.394 -699.960 2024-02-22 2024-03-04 2024-01-01T22:13:54 2023-10-02T19:35:10 +-1982022740 fcZ1o6ZXG8UOFh5 iw4Ziys42GRRTFNkVPeQEA9I5EQtBD04xfefDsPCWN0vr1 true -3 -25143 7122614823242399351 7391807634038366248 23160.604 -9.20283206353984E8 829.880 -387.403 2023-10-16 2024-05-13 2024-07-16T18:27:45 2023-11-03T05:30:21 +-1228926576 1TISaGtB01BiqVt kq4u false -123 15962 3590007830423934951 -1478759439092857810 -7813.757 -6.98785793100899E8 930.743 402.350 2024-07-23 2023-07-30 2023-11-27T17:48:50 2024-03-11T21:09:58 +-575060895 rRfatUJiOO5dq9Y ETjqrUNUnI5kSmkafjWfRTG8HIp98pLGXagNpXZHqOIZZDRkoGeahOwk9 false 16 -767 6623730208927396375 -3055706367894284822 12540.839 -1.047911096098831E9 -752.454 -241.620 2024-04-10 2024-05-16 2023-12-07T23:38:05 2023-12-11T05:48:36 +-76042627 PcVaKC43qmIzuxY U3aGxaZumFpqcUsLI true 44 31151 9085406701767055602 -5846138572199996843 -16845.29 2.44522690225531E8 -784.720 -467.133 2023-10-31 2023-08-29 2023-09-12T10:12:46 2023-10-19T17:02:51 +121048200 KPLWjhhbGXqflJi rzqOYQH9ySHPwCm5K4GdeuI28G8LLmnpqLmsLMLfyRIvcfrlubQI47wUa8QILhuS38MBkjL true 42 13182 -6601530758880565531 5619594098883737912 -2782.1506 3.86698722676211E8 478.420 -330.289 2024-06-17 2023-12-26 2024-04-28T03:29:04 2023-08-18T21:05:32 +262860291 m3XgmlbIHYNH1qS BTJRzVrpM78zJAsHMEGhkF5BiDoc3yJuoV0s209sFcqElZsheBgolBGlFl9X4EfauD64FcFF2Mi4V0dKZfpDgaLLRPfG1SALV7 false -42 5990 -7504815416577235660 1535659792778122944 1171.9619 1.28834143701229E8 626.721 682.828 2023-11-24 2023-11-18 2024-03-21T11:50:17 2024-03-31T12:59:27 +579428879 KsOC6WGrieGlo7B SzeA6tRbsiGWJTBDvBQdBjCqjSE6Y false -111 32758 4029182463831656671 -3546198025044093789 20338.55 -2.015222388533773E9 61.981 720.310 2023-11-13 2024-07-04 2024-07-19T12:42:28 2024-01-04T10:32:53 +1145092610 xWJUDWAV8Nllo0F dnZ9RMVdoqxh4kGBvy55zQdChNTVYdlvRZP4aWIkXyErUbM1XmFGQ9vuCD113JKKCyx4crDoY false 115 -22832 -7242855305248390982 -4240353246453053617 -9074.909 -2.51212400295869E8 -502.410 618.820 2024-06-12 2024-04-18 2023-11-04T09:55:17 2023-11-13T16:30:23 +1736373707 UU14wnLhPkBid41 pmuNqYfOc3JCscf9meT5dYB2i28Pt9iaeXK4QqjVZJdoKFOeZI5bG9RKm1zInTdDMW1N0PKI5Y true -105 -20276 360048259532857165 -4602633478165721463 -13230.296 -1.708246954394742E9 757.147 -533.800 2024-01-05 2023-09-08 2023-11-27T05:21:33 2024-02-11T21:35:03 + +-- !select -- +-9022291871392468311 2023-11-02 mOWPGHmiZ 10784 -128 2023-11-12T18:06:21 4218616419351308798 1993977685 -1857678846 +-6045452612961149194 2024-06-23 G1j 28468 -55 2024-06-09T00:12:11 -6456263257174124469 -727277974 144696403 +-1537906159489906139 2024-04-04 MRMRE18bVh49RD 32763 98 2024-01-20T00:54:03 -1289145371043997006 128181215 -1295829474 +-1510882223779118241 2024-07-24 PCwFn7r21MZr 22960 -79 2024-02-07T18:15:07 -8437284610883885859 472729036 -39626304 +-1185467471318572316 2023-11-08 ieed5Msw8X6be4HGS 16555 -79 2024-07-28T23:08:29 3263664376405334754 -809360772 -1229995615 +-234810200663664160 2024-06-07 s7GIrN805aU3cs2EM -7555 -124 2023-12-28T18:59:15 -3600712745035417587 2035647886 126756427 +4461660295430359180 2024-04-23 K 25428 6 2023-11-15T18:38:20 -4503242152141666001 -1093190312 1511443278 +6742880469957921530 2024-05-02 cJJrvRJfpCuGh 27232 64 2024-08-18T09:46:50 -2607385663861429432 -1390108377 1758263623 +7252685688720766402 2024-03-13 891C2 -9774 -1 2023-10-12T19:45:28 -3210623791036109982 -915986651 -1794344594 +8278077411585505009 2023-11-17 gBesLQnYpjK7iDUUcIi -26656 -50 2023-12-11T14:29:52 -8301529943262026214 -1555756888 -1318983102 + diff --git a/regression-test/data/load_p0/ingestion_load/test_ingestion_load_multi_table.out b/regression-test/data/load_p0/ingestion_load/test_ingestion_load_multi_table.out new file mode 100644 index 00000000000000..7a3ec8e86ddd05 --- /dev/null +++ b/regression-test/data/load_p0/ingestion_load/test_ingestion_load_multi_table.out @@ -0,0 +1,25 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- +-2067215761 41V4fgCKI3Hkioe DZGaq24Yqh7SwmbPT6IX23jfC5NKqG7gE9JT4GLwiaQtoO8l6EjhGWQP9X7NHmjdqMbIN5kNeDkffOrlS6roIwj2wXpJ true 123 -4524 1014223769452772206 -6589390612399245616 29103.387 -4.09385322835896E8 886.394 -699.960 2024-02-22 2024-03-04 2024-01-01T22:13:54 2023-10-02T19:35:10 +-1982022740 fcZ1o6ZXG8UOFh5 iw4Ziys42GRRTFNkVPeQEA9I5EQtBD04xfefDsPCWN0vr1 true -3 -25143 7122614823242399351 7391807634038366248 23160.604 -9.20283206353984E8 829.880 -387.403 2023-10-16 2024-05-13 2024-07-16T18:27:45 2023-11-03T05:30:21 +-1228926576 1TISaGtB01BiqVt kq4u false -123 15962 3590007830423934951 -1478759439092857810 -7813.757 -6.98785793100899E8 930.743 402.350 2024-07-23 2023-07-30 2023-11-27T17:48:50 2024-03-11T21:09:58 +-575060895 rRfatUJiOO5dq9Y ETjqrUNUnI5kSmkafjWfRTG8HIp98pLGXagNpXZHqOIZZDRkoGeahOwk9 false 16 -767 6623730208927396375 -3055706367894284822 12540.839 -1.047911096098831E9 -752.454 -241.620 2024-04-10 2024-05-16 2023-12-07T23:38:05 2023-12-11T05:48:36 +-76042627 PcVaKC43qmIzuxY U3aGxaZumFpqcUsLI true 44 31151 9085406701767055602 -5846138572199996843 -16845.29 2.44522690225531E8 -784.720 -467.133 2023-10-31 2023-08-29 2023-09-12T10:12:46 2023-10-19T17:02:51 +121048200 KPLWjhhbGXqflJi rzqOYQH9ySHPwCm5K4GdeuI28G8LLmnpqLmsLMLfyRIvcfrlubQI47wUa8QILhuS38MBkjL true 42 13182 -6601530758880565531 5619594098883737912 -2782.1506 3.86698722676211E8 478.420 -330.289 2024-06-17 2023-12-26 2024-04-28T03:29:04 2023-08-18T21:05:32 +262860291 m3XgmlbIHYNH1qS BTJRzVrpM78zJAsHMEGhkF5BiDoc3yJuoV0s209sFcqElZsheBgolBGlFl9X4EfauD64FcFF2Mi4V0dKZfpDgaLLRPfG1SALV7 false -42 5990 -7504815416577235660 1535659792778122944 1171.9619 1.28834143701229E8 626.721 682.828 2023-11-24 2023-11-18 2024-03-21T11:50:17 2024-03-31T12:59:27 +579428879 KsOC6WGrieGlo7B SzeA6tRbsiGWJTBDvBQdBjCqjSE6Y false -111 32758 4029182463831656671 -3546198025044093789 20338.55 -2.015222388533773E9 61.981 720.310 2023-11-13 2024-07-04 2024-07-19T12:42:28 2024-01-04T10:32:53 +1145092610 xWJUDWAV8Nllo0F dnZ9RMVdoqxh4kGBvy55zQdChNTVYdlvRZP4aWIkXyErUbM1XmFGQ9vuCD113JKKCyx4crDoY false 115 -22832 -7242855305248390982 -4240353246453053617 -9074.909 -2.51212400295869E8 -502.410 618.820 2024-06-12 2024-04-18 2023-11-04T09:55:17 2023-11-13T16:30:23 +1736373707 UU14wnLhPkBid41 pmuNqYfOc3JCscf9meT5dYB2i28Pt9iaeXK4QqjVZJdoKFOeZI5bG9RKm1zInTdDMW1N0PKI5Y true -105 -20276 360048259532857165 -4602633478165721463 -13230.296 -1.708246954394742E9 757.147 -533.800 2024-01-05 2023-09-08 2023-11-27T05:21:33 2024-02-11T21:35:03 + +-- !select -- +-2067215761 41V4fgCKI3Hkioe DZGaq24Yqh7SwmbPT6IX23jfC5NKqG7gE9JT4GLwiaQtoO8l6EjhGWQP9X7NHmjdqMbIN5kNeDkffOrlS6roIwj2wXpJ true 123 -4524 1014223769452772206 -6589390612399245616 29103.387 -4.09385322835896E8 886.394 -699.960 2024-02-22 2024-03-04 2024-01-01T22:13:54 2023-10-02T19:35:10 +-1982022740 fcZ1o6ZXG8UOFh5 iw4Ziys42GRRTFNkVPeQEA9I5EQtBD04xfefDsPCWN0vr1 true -3 -25143 7122614823242399351 7391807634038366248 23160.604 -9.20283206353984E8 829.880 -387.403 2023-10-16 2024-05-13 2024-07-16T18:27:45 2023-11-03T05:30:21 +-1228926576 1TISaGtB01BiqVt kq4u false -123 15962 3590007830423934951 -1478759439092857810 -7813.757 -6.98785793100899E8 930.743 402.350 2024-07-23 2023-07-30 2023-11-27T17:48:50 2024-03-11T21:09:58 +-575060895 rRfatUJiOO5dq9Y ETjqrUNUnI5kSmkafjWfRTG8HIp98pLGXagNpXZHqOIZZDRkoGeahOwk9 false 16 -767 6623730208927396375 -3055706367894284822 12540.839 -1.047911096098831E9 -752.454 -241.620 2024-04-10 2024-05-16 2023-12-07T23:38:05 2023-12-11T05:48:36 +-76042627 PcVaKC43qmIzuxY U3aGxaZumFpqcUsLI true 44 31151 9085406701767055602 -5846138572199996843 -16845.29 2.44522690225531E8 -784.720 -467.133 2023-10-31 2023-08-29 2023-09-12T10:12:46 2023-10-19T17:02:51 +121048200 KPLWjhhbGXqflJi rzqOYQH9ySHPwCm5K4GdeuI28G8LLmnpqLmsLMLfyRIvcfrlubQI47wUa8QILhuS38MBkjL true 42 13182 -6601530758880565531 5619594098883737912 -2782.1506 3.86698722676211E8 478.420 -330.289 2024-06-17 2023-12-26 2024-04-28T03:29:04 2023-08-18T21:05:32 +262860291 m3XgmlbIHYNH1qS BTJRzVrpM78zJAsHMEGhkF5BiDoc3yJuoV0s209sFcqElZsheBgolBGlFl9X4EfauD64FcFF2Mi4V0dKZfpDgaLLRPfG1SALV7 false -42 5990 -7504815416577235660 1535659792778122944 1171.9619 1.28834143701229E8 626.721 682.828 2023-11-24 2023-11-18 2024-03-21T11:50:17 2024-03-31T12:59:27 +579428879 KsOC6WGrieGlo7B SzeA6tRbsiGWJTBDvBQdBjCqjSE6Y false -111 32758 4029182463831656671 -3546198025044093789 20338.55 -2.015222388533773E9 61.981 720.310 2023-11-13 2024-07-04 2024-07-19T12:42:28 2024-01-04T10:32:53 +1145092610 xWJUDWAV8Nllo0F dnZ9RMVdoqxh4kGBvy55zQdChNTVYdlvRZP4aWIkXyErUbM1XmFGQ9vuCD113JKKCyx4crDoY false 115 -22832 -7242855305248390982 -4240353246453053617 -9074.909 -2.51212400295869E8 -502.410 618.820 2024-06-12 2024-04-18 2023-11-04T09:55:17 2023-11-13T16:30:23 +1736373707 UU14wnLhPkBid41 pmuNqYfOc3JCscf9meT5dYB2i28Pt9iaeXK4QqjVZJdoKFOeZI5bG9RKm1zInTdDMW1N0PKI5Y true -105 -20276 360048259532857165 -4602633478165721463 -13230.296 -1.708246954394742E9 757.147 -533.800 2024-01-05 2023-09-08 2023-11-27T05:21:33 2024-02-11T21:35:03 + diff --git a/regression-test/data/load_p0/ingestion_load/test_ingestion_load_with_inverted_index.out b/regression-test/data/load_p0/ingestion_load/test_ingestion_load_with_inverted_index.out new file mode 100644 index 00000000000000..f39b6b66d29108 --- /dev/null +++ b/regression-test/data/load_p0/ingestion_load/test_ingestion_load_with_inverted_index.out @@ -0,0 +1,13 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- +-2067215761 41V4fgCKI3Hkioe DZGaq24Yqh7SwmbPT6IX23jfC5NKqG7gE9JT4GLwiaQtoO8l6EjhGWQP9X7NHmjdqMbIN5kNeDkffOrlS6roIwj2wXpJ true 123 -4524 1014223769452772206 -6589390612399245616 29103.387 -4.09385322835896E8 886.394 -699.960 2024-02-22 2024-03-04 2024-01-01T22:13:54 2023-10-02T19:35:10 +-1982022740 fcZ1o6ZXG8UOFh5 iw4Ziys42GRRTFNkVPeQEA9I5EQtBD04xfefDsPCWN0vr1 true -3 -25143 7122614823242399351 7391807634038366248 23160.604 -9.20283206353984E8 829.880 -387.403 2023-10-16 2024-05-13 2024-07-16T18:27:45 2023-11-03T05:30:21 +-1228926576 1TISaGtB01BiqVt kq4u false -123 15962 3590007830423934951 -1478759439092857810 -7813.757 -6.98785793100899E8 930.743 402.350 2024-07-23 2023-07-30 2023-11-27T17:48:50 2024-03-11T21:09:58 +-575060895 rRfatUJiOO5dq9Y ETjqrUNUnI5kSmkafjWfRTG8HIp98pLGXagNpXZHqOIZZDRkoGeahOwk9 false 16 -767 6623730208927396375 -3055706367894284822 12540.839 -1.047911096098831E9 -752.454 -241.620 2024-04-10 2024-05-16 2023-12-07T23:38:05 2023-12-11T05:48:36 +-76042627 PcVaKC43qmIzuxY U3aGxaZumFpqcUsLI true 44 31151 9085406701767055602 -5846138572199996843 -16845.29 2.44522690225531E8 -784.720 -467.133 2023-10-31 2023-08-29 2023-09-12T10:12:46 2023-10-19T17:02:51 +121048200 KPLWjhhbGXqflJi rzqOYQH9ySHPwCm5K4GdeuI28G8LLmnpqLmsLMLfyRIvcfrlubQI47wUa8QILhuS38MBkjL true 42 13182 -6601530758880565531 5619594098883737912 -2782.1506 3.86698722676211E8 478.420 -330.289 2024-06-17 2023-12-26 2024-04-28T03:29:04 2023-08-18T21:05:32 +262860291 m3XgmlbIHYNH1qS BTJRzVrpM78zJAsHMEGhkF5BiDoc3yJuoV0s209sFcqElZsheBgolBGlFl9X4EfauD64FcFF2Mi4V0dKZfpDgaLLRPfG1SALV7 false -42 5990 -7504815416577235660 1535659792778122944 1171.9619 1.28834143701229E8 626.721 682.828 2023-11-24 2023-11-18 2024-03-21T11:50:17 2024-03-31T12:59:27 +579428879 KsOC6WGrieGlo7B SzeA6tRbsiGWJTBDvBQdBjCqjSE6Y false -111 32758 4029182463831656671 -3546198025044093789 20338.55 -2.015222388533773E9 61.981 720.310 2023-11-13 2024-07-04 2024-07-19T12:42:28 2024-01-04T10:32:53 +1145092610 xWJUDWAV8Nllo0F dnZ9RMVdoqxh4kGBvy55zQdChNTVYdlvRZP4aWIkXyErUbM1XmFGQ9vuCD113JKKCyx4crDoY false 115 -22832 -7242855305248390982 -4240353246453053617 -9074.909 -2.51212400295869E8 -502.410 618.820 2024-06-12 2024-04-18 2023-11-04T09:55:17 2023-11-13T16:30:23 +1736373707 UU14wnLhPkBid41 pmuNqYfOc3JCscf9meT5dYB2i28Pt9iaeXK4QqjVZJdoKFOeZI5bG9RKm1zInTdDMW1N0PKI5Y true -105 -20276 360048259532857165 -4602633478165721463 -13230.296 -1.708246954394742E9 757.147 -533.800 2024-01-05 2023-09-08 2023-11-27T05:21:33 2024-02-11T21:35:03 + diff --git a/regression-test/data/load_p0/ingestion_load/test_ingestion_load_with_partition.out b/regression-test/data/load_p0/ingestion_load/test_ingestion_load_with_partition.out new file mode 100644 index 00000000000000..37d0553e58c3c5 --- /dev/null +++ b/regression-test/data/load_p0/ingestion_load/test_ingestion_load_with_partition.out @@ -0,0 +1,7 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- +2024-09-01 5 +2024-09-02 1 +2024-09-03 1 +2024-09-04 3 + diff --git a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load.groovy b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load.groovy new file mode 100644 index 00000000000000..a6e213bba89f2a --- /dev/null +++ b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load.groovy @@ -0,0 +1,222 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import java.nio.file.Files +import java.nio.file.Paths +import java.nio.file.StandardCopyOption + +suite('test_ingestion_load', 'p0') { + + def testIngestLoadJob = { testTable, loadLabel, String dataFile -> + + sql "TRUNCATE TABLE ${testTable}" + + sql "CLEAN LABEL FROM ${context.dbName}" + + Integer loadId = -1 + Integer tableId = -1 + Integer partitionId = -1 + Integer indexId = -1 + Integer bucketId = 0 + Integer schemaHash = -1 + + String reqBody = + """{ + "label": "${loadLabel}", + "tableToPartition": { + "${testTable}": [] + }, + "properties": {} + }""" + + httpTest { + endpoint context.config.feHttpAddress + uri "/api/ingestion_load/internal/${context.dbName}/_create" + op "post" + basicAuthorization context.config.feHttpUser, context.config.feHttpPassword + body reqBody + check { code, resBody -> + assert code == 200 + def resBodyJson = parseJson(resBody) + assert resBodyJson instanceof Map + assert resBodyJson.code == 0 + def data = resBodyJson.data + loadId = data.loadId + def tableMeta = data.tableMeta + tableId = tableMeta["${testTable}"].id + def index = tableMeta["${testTable}"].indexes[0] + indexId = index.indexId + schemaHash = index.schemaHash + partitionId = tableMeta["${testTable}"].partitionInfo.partitions[0].partitionId + } + } + + String resultFileName = "V1.${loadLabel}.${tableId}.${partitionId}.${indexId}.${bucketId}.${schemaHash}.parquet" + logger.info("resultFileName: " + resultFileName) + + Files.copy(Paths.get(dataFile), + Paths.get(context.config.dataPath + "/load_p0/ingestion_load/${resultFileName}"), StandardCopyOption.REPLACE_EXISTING) + + String etlResultFilePath = uploadToHdfs "/load_p0/ingestion_load/${resultFileName}" + + String dppResult = '{\\"isSuccess\\":true,\\"failedReason\\":\\"\\",\\"scannedRows\\":10,\\"fileNumber\\":1,' + + '\\"fileSize\\":2441,\\"normalRows\\":10,\\"abnormalRows\\":0,\\"unselectRows\\":0,' + + '\\"partialAbnormalRows\\":\\"[]\\",\\"scannedBytes\\":0}' + + String updateStatusReqBody = + """{ + "loadId": ${loadId}, + "statusInfo": { + "status": "SUCCESS", + "msg": "", + "appId": "", + "dppResult": "${dppResult}", + "filePathToSize": "{\\"${etlResultFilePath}\\": 81758}", + "hadoopProperties": "{\\"fs.defaultFS\\":\\"${getHdfsFs()}\\",\\"hadoop.username\\":\\"${getHdfsUser()}\\",\\"hadoop.password\\":\\"${getHdfsPasswd()}\\"}" + } + }""" + + httpTest { + endpoint context.config.feHttpAddress + uri "/api/ingestion_load/internal/${context.dbName}/_update" + op "post" + basicAuthorization context.config.feHttpUser, context.config.feHttpPassword + body updateStatusReqBody + check { code, resBody -> + { + assert code == 200 + def resBodyJson = parseJson(resBody) + assert resBodyJson instanceof Map + assert resBodyJson.code == 0 + } + } + } + + max_try_milli_secs = 120000 + while (max_try_milli_secs) { + result = sql "show load where label = '${loadLabel}'" + if (result[0][2] == "FINISHED") { + sql "sync" + qt_select "select * from ${testTable} order by 1" + break + } else { + sleep(5000) // wait 1 second every time + max_try_milli_secs -= 5000 + if (max_try_milli_secs <= 0) { + assertEquals(1, 2) + } + } + } + + } + + if (enableHdfs()) { + + tableName = 'tbl_test_spark_load' + + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} ( + c_int int(11) NULL, + c_char char(15) NULL, + c_varchar varchar(100) NULL, + c_bool boolean NULL, + c_tinyint tinyint(4) NULL, + c_smallint smallint(6) NULL, + c_bigint bigint(20) NULL, + c_largeint largeint(40) NULL, + c_float float NULL, + c_double double NULL, + c_decimal decimal(6, 3) NULL, + c_decimalv3 decimal(6, 3) NULL, + c_date date NULL, + c_datev2 date NULL, + c_datetime datetime NULL, + c_datetimev2 datetime NULL + ) + DUPLICATE KEY(c_int) + DISTRIBUTED BY HASH(c_int) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + """ + + def label = "test_ingestion_load" + + testIngestLoadJob.call(tableName, label, context.config.dataPath + '/load_p0/ingestion_load/data.parquet') + + tableName = 'tbl_test_spark_load_unique_mor' + + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} ( + c_int int(11) NULL, + c_char char(15) NULL, + c_varchar varchar(100) NULL, + c_bool boolean NULL, + c_tinyint tinyint(4) NULL, + c_smallint smallint(6) NULL, + c_bigint bigint(20) NULL, + c_largeint largeint(40) NULL, + c_float float NULL, + c_double double NULL, + c_decimal decimal(6, 3) NULL, + c_decimalv3 decimal(6, 3) NULL, + c_date date NULL, + c_datev2 date NULL, + c_datetime datetime NULL, + c_datetimev2 datetime NULL + ) + UNIQUE KEY(c_int) + DISTRIBUTED BY HASH(c_int) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "false" + ) + """ + + label = "test_ingestion_load_unique_mor" + + testIngestLoadJob.call(tableName, label, context.config.dataPath + '/load_p0/ingestion_load/data.parquet') + + tableName = 'tbl_test_spark_load_agg' + + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} + ( + `user_id` LARGEINT NOT NULL COMMENT "user id", + `date` DATE NOT NULL COMMENT "data import time", + `city` VARCHAR(20) COMMENT "city", + `age` SMALLINT COMMENT "age", + `sex` TINYINT COMMENT "gender", + `last_visit_date` DATETIME REPLACE DEFAULT "1970-01-01 00:00:00" COMMENT "last visit date time", + `cost` BIGINT SUM DEFAULT "0" COMMENT "user total cost", + `max_dwell_time` INT MAX DEFAULT "0" COMMENT "user max dwell time", + `min_dwell_time` INT MIN DEFAULT "99999" COMMENT "user min dwell time" + ) + AGGREGATE KEY(`user_id`, `date`, `city`, `age`, `sex`) + DISTRIBUTED BY HASH(`user_id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + label = "test_ingestion_load_agg" + + testIngestLoadJob.call(tableName, label, context.config.dataPath + '/load_p0/ingestion_load/data1.parquet') + + } + +} diff --git a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_alter_column.groovy b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_alter_column.groovy new file mode 100644 index 00000000000000..4a56663d6291ed --- /dev/null +++ b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_alter_column.groovy @@ -0,0 +1,208 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import java.nio.file.Files +import java.nio.file.Paths +import java.nio.file.StandardCopyOption + +suite('test_ingestion_load_alter_column', 'p0') { + + def testIngestLoadJob = { testTable, loadLabel, dataFile, alterAction -> + + sql "TRUNCATE TABLE ${testTable}" + + sql "CLEAN LABEL FROM ${context.dbName}" + + Integer loadId = -1 + Integer tableId = -1 + Integer partitionId = -1 + Integer indexId = -1 + Integer bucketId = 0 + Integer schemaHash = -1 + + String reqBody = + """{ + "label": "${loadLabel}", + "tableToPartition": { + "${testTable}": [] + }, + "properties": {} + }""" + + httpTest { + endpoint context.config.feHttpAddress + uri "/api/ingestion_load/internal/${context.dbName}/_create" + op "post" + basicAuthorization context.config.feHttpUser, context.config.feHttpPassword + body reqBody + check { code, resBody -> + assert code == 200 + def resBodyJson = parseJson(resBody) + assert resBodyJson instanceof Map + assert resBodyJson.code == 0 + def data = resBodyJson.data + loadId = data.loadId + def tableMeta = data.tableMeta + tableId = tableMeta["${testTable}"].id + def index = tableMeta["${testTable}"].indexes[0] + indexId = index.indexId + schemaHash = index.schemaHash + partitionId = tableMeta["${testTable}"].partitionInfo.partitions[0].partitionId + } + } + + String resultFileName = "V1.${loadLabel}.${tableId}.${partitionId}.${indexId}.${bucketId}.${schemaHash}.parquet" + logger.info("resultFileName: " + resultFileName) + + Files.copy(Paths.get(dataFile), + Paths.get(context.config.dataPath + "/load_p0/ingestion_load/${resultFileName}"), StandardCopyOption.REPLACE_EXISTING) + + String etlResultFilePath = uploadToHdfs "/load_p0/ingestion_load/${resultFileName}" + + String dppResult = '{\\"isSuccess\\":true,\\"failedReason\\":\\"\\",\\"scannedRows\\":10,\\"fileNumber\\":1,' + + '\\"fileSize\\":2441,\\"normalRows\\":10,\\"abnormalRows\\":0,\\"unselectRows\\":0,' + + '\\"partialAbnormalRows\\":\\"[]\\",\\"scannedBytes\\":0}' + + String updateStatusReqBody = + """{ + "loadId": ${loadId}, + "statusInfo": { + "status": "SUCCESS", + "msg": "", + "appId": "", + "dppResult": "${dppResult}", + "filePathToSize": "{\\"${etlResultFilePath}\\": 81758}", + "hadoopProperties": "{\\"fs.defaultFS\\":\\"${getHdfsFs()}\\",\\"hadoop.username\\":\\"${getHdfsUser()}\\",\\"hadoop.password\\":\\"${getHdfsPasswd()}\\"}" + } + }""" + + httpTest { + endpoint context.config.feHttpAddress + uri "/api/ingestion_load/internal/${context.dbName}/_update" + op "post" + basicAuthorization context.config.feHttpUser, context.config.feHttpPassword + body updateStatusReqBody + check { code, resBody -> + { + assert code == 200 + def resBodyJson = parseJson(resBody) + assert resBodyJson instanceof Map + assert resBodyJson.code == 0 + } + } + } + + alterAction.call() + + max_try_milli_secs = 120000 + while (max_try_milli_secs) { + result = sql "show load where label = '${loadLabel}'" + if (result[0][2] == "CANCELLED") { + msg = result[0][7] + logger.info("err msg: " + msg) + assertTrue((result[0][7] =~ /schema of index \[\d+\] has changed/).find()) + break + } else { + sleep(5000) // wait 1 second every time + max_try_milli_secs -= 5000 + if (max_try_milli_secs <= 0) { + assertEquals(1, 2) + } + } + } + + } + + if (enableHdfs()) { + + tableName1 = 'tbl_test_spark_load_alter_column_1' + tableName2 = 'tbl_test_spark_load_alter_column_2' + + try { + + sql """ + CREATE TABLE IF NOT EXISTS ${tableName1} ( + c_int int(11) NULL, + c_char char(15) NULL, + c_varchar varchar(100) NULL, + c_bool boolean NULL, + c_tinyint tinyint(4) NULL, + c_smallint smallint(6) NULL, + c_bigint bigint(20) NULL, + c_largeint largeint(40) NULL, + c_float float NULL, + c_double double NULL, + c_decimal decimal(6, 3) NULL, + c_decimalv3 decimal(6, 3) NULL, + c_date date NULL, + c_datev2 date NULL, + c_datetime datetime NULL, + c_datetimev2 datetime NULL + ) + DUPLICATE KEY(c_int) + DISTRIBUTED BY HASH(c_int) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + """ + + label = "test_ingestion_load_alter_column_1" + + testIngestLoadJob.call(tableName1, label, context.config.dataPath + '/load_p0/ingestion_load/data.parquet', { + sql "alter table ${tableName1} drop column c_datetimev2" + }) + + sql """ + CREATE TABLE IF NOT EXISTS ${tableName2} ( + c_int int(11) NULL, + c_char char(15) NULL, + c_varchar varchar(100) NULL, + c_bool boolean NULL, + c_tinyint tinyint(4) NULL, + c_smallint smallint(6) NULL, + c_bigint bigint(20) NULL, + c_largeint largeint(40) NULL, + c_float float NULL, + c_double double NULL, + c_decimal decimal(6, 3) NULL, + c_decimalv3 decimal(6, 3) NULL, + c_date date NULL, + c_datev2 date NULL, + c_datetime datetime NULL, + c_datetimev2 datetime NULL + ) + DUPLICATE KEY(c_int) + DISTRIBUTED BY HASH(c_int) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + """ + + label = "test_ingestion_load_alter_column_2" + + testIngestLoadJob.call(tableName2, label, context.config.dataPath + '/load_p0/ingestion_load/data.parquet', { + sql "alter table ${tableName2} add column c_string string null" + }) + + } finally { + sql "DROP TABLE ${tableName1}" + sql "DROP TABLE ${tableName2}" + } + + } + +} \ No newline at end of file diff --git a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_alter_partition.groovy b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_alter_partition.groovy new file mode 100644 index 00000000000000..de91935710294b --- /dev/null +++ b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_alter_partition.groovy @@ -0,0 +1,224 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import java.nio.file.Files +import java.nio.file.Paths +import java.nio.file.StandardCopyOption + +suite('test_ingestion_load_alter_partition', 'p0') { + + def testIngestLoadJob = { testTable, loadLabel, dataFiles, alterAction -> + + sql "TRUNCATE TABLE ${testTable}" + + sql "CLEAN LABEL ${loadLabel} FROM ${context.dbName}" + + Integer loadId = -1 + Integer tableId = -1 + Integer partitionId = -1 + Integer indexId = -1 + Integer bucketId = 0 + Integer schemaHash = -1 + + String reqBody = + """{ + "label": "${loadLabel}", + "tableToPartition": { + "${testTable}": [] + }, + "properties": {} + }""" + + resultFileNames = [] + + httpTest { + endpoint context.config.feHttpAddress + uri "/api/ingestion_load/internal/${context.dbName}/_create" + op "post" + basicAuthorization context.config.feHttpUser, context.config.feHttpPassword + body reqBody + check { code, resBody -> + assert code == 200 + def resBodyJson = parseJson(resBody) + assert resBodyJson instanceof Map + assert resBodyJson.code == 0 + def data = resBodyJson.data + loadId = data.loadId + def tableMeta = data.tableMeta + tableId = tableMeta["${testTable}"].id + def index = tableMeta["${testTable}"].indexes[0] + indexId = index.indexId + schemaHash = index.schemaHash + partitions = tableMeta["${testTable}"].partitionInfo.partitions + for(partition in partitions) { + logger.info("partitionId: " + partition.partitionId) + resultFileNames.add("V1.${loadLabel}.${tableId}.${partition.partitionId}.${indexId}.${bucketId}.${schemaHash}.parquet") + } + } + } + + etlResultFilePaths = [] + for(int i=0; i < dataFiles.size(); i++) { + Files.copy(Paths.get(dataFiles[i]), + Paths.get(context.config.dataPath + "/load_p0/ingestion_load/${resultFileNames[i]}"), StandardCopyOption.REPLACE_EXISTING) + String etlResultFilePath = uploadToHdfs "/load_p0/ingestion_load/${resultFileNames[i]}" + logger.info("etlResultFilePath: " + etlResultFilePath) + etlResultFilePaths.add(etlResultFilePath) + } + + String dppResult = '{\\"isSuccess\\":true,\\"failedReason\\":\\"\\",\\"scannedRows\\":10,\\"fileNumber\\":1,' + + '\\"fileSize\\":2441,\\"normalRows\\":10,\\"abnormalRows\\":0,\\"unselectRows\\":0,' + + '\\"partialAbnormalRows\\":\\"[]\\",\\"scannedBytes\\":0}' + + String updateStatusReqBody = + """{ + "loadId": ${loadId}, + "statusInfo": { + "status": "SUCCESS", + "msg": "", + "appId": "", + "dppResult": "${dppResult}", + "filePathToSize": "{\\"${etlResultFilePaths.get(0)}\\":851,\\"${etlResultFilePaths.get(1)}\\":781,\\"${etlResultFilePaths.get(2)}\\":781,\\"${etlResultFilePaths.get(3)}\\":839}", + "hadoopProperties": "{\\"fs.defaultFS\\":\\"${getHdfsFs()}\\",\\"hadoop.username\\":\\"${getHdfsUser()}\\",\\"hadoop.password\\":\\"${getHdfsPasswd()}\\"}" + } + }""" + + httpTest { + endpoint context.config.feHttpAddress + uri "/api/ingestion_load/internal/${context.dbName}/_update" + op "post" + basicAuthorization context.config.feHttpUser, context.config.feHttpPassword + body updateStatusReqBody + check { code, resBody -> + { + assert code == 200 + def resBodyJson = parseJson(resBody) + assert resBodyJson instanceof Map + assert resBodyJson.code == 0 + } + } + } + + alterAction.call() + + max_try_milli_secs = 120000 + while (max_try_milli_secs) { + result = sql "show load where label = '${loadLabel}'" + if (result[0][2] == "FINISHED") { + sql "sync" + qt_select "select c1, count(*) from ${testTable} group by c1 order by c1" + break + } else if (result[0][2] == "CANCELLED") { + msg = result[0][7] + logger.info("err msg: " + msg) + assertTrue((result[0][7] =~ /partition does not exist/).find()) + break + } else { + sleep(5000) // wait 1 second every time + max_try_milli_secs -= 5000 + if (max_try_milli_secs <= 0) { + assertEquals(1, 2) + } + } + } + + } + + if (enableHdfs()) { + + tableName1 = 'tbl_test_spark_load_alter_partition_1' + tableName2 = 'tbl_test_spark_load_alter_partition_2' + tableName3 = 'tbl_test_spark_load_alter_partition_3' + + try { + + sql """ + CREATE TABLE IF NOT EXISTS ${tableName1} ( + c0 int not null, + c1 date, + c2 varchar(64) + ) + DUPLICATE KEY(c0) + PARTITION BY RANGE(c1) ( + FROM ("2024-09-01") TO ("2024-09-05") INTERVAL 1 DAY + ) + DISTRIBUTED BY HASH(c0) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + """ + + label = "test_ingestion_load_alter_partition_1" + + testIngestLoadJob.call(tableName1, label, [context.config.dataPath + '/load_p0/ingestion_load/data2-0.parquet', context.config.dataPath + '/load_p0/ingestion_load/data2-1.parquet',context.config.dataPath + '/load_p0/ingestion_load/data2-2.parquet',context.config.dataPath + '/load_p0/ingestion_load/data2-3.parquet'], { + sql "alter table ${tableName1} drop partition p_20240901" + }) + + sql """ + CREATE TABLE IF NOT EXISTS ${tableName2} ( + c0 int not null, + c1 date, + c2 varchar(64) + ) + DUPLICATE KEY(c0) + PARTITION BY RANGE(c1) ( + FROM ("2024-09-01") TO ("2024-09-05") INTERVAL 1 DAY + ) + DISTRIBUTED BY HASH(c0) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + """ + + label = "test_ingestion_load_alter_partition_2" + + testIngestLoadJob.call(tableName2, label, [context.config.dataPath + '/load_p0/ingestion_load/data2-0.parquet', context.config.dataPath + '/load_p0/ingestion_load/data2-1.parquet',context.config.dataPath + '/load_p0/ingestion_load/data2-2.parquet',context.config.dataPath + '/load_p0/ingestion_load/data2-3.parquet'], { + sql "alter table ${tableName2} add partition p_20240905 VALUES [('2024-09-05'), ('2024-09-06'))" + }) + + sql """ + CREATE TABLE IF NOT EXISTS ${tableName3} ( + c0 int not null, + c1 date, + c2 varchar(64) + ) + DUPLICATE KEY(c0) + PARTITION BY RANGE(c1) ( + FROM ("2024-09-01") TO ("2024-09-05") INTERVAL 1 DAY + ) + DISTRIBUTED BY HASH(c0) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + """ + + label = "test_ingestion_load_alter_partition_3" + + testIngestLoadJob.call(tableName3, label, [context.config.dataPath + '/load_p0/ingestion_load/data2-0.parquet', context.config.dataPath + '/load_p0/ingestion_load/data2-1.parquet',context.config.dataPath + '/load_p0/ingestion_load/data2-2.parquet',context.config.dataPath + '/load_p0/ingestion_load/data2-3.parquet'], { + sql "alter table ${tableName3} add temporary partition tp_20240901 VALUES [('2024-09-01'), ('2024-09-02'))" + sql "alter table ${tableName3} replace partition(p_20240901) with temporary partition(tp_20240901)" + }) + + } finally { + sql "DROP TABLE ${tableName1}" + sql "DROP TABLE ${tableName2}" + sql "DROP TABLE ${tableName3}" + } + + } + +} \ No newline at end of file diff --git a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_drop_table.groovy b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_drop_table.groovy new file mode 100644 index 00000000000000..4f245c3d535b15 --- /dev/null +++ b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_drop_table.groovy @@ -0,0 +1,196 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import java.nio.file.Files +import java.nio.file.Paths +import java.nio.file.StandardCopyOption + +suite('test_ingestion_load_drop_table', 'p0') { + + def testIngestLoadJob = { testTable, loadLabel, dataFile, alterAction -> + + sql "TRUNCATE TABLE ${testTable}" + + sql "CLEAN LABEL FROM ${context.dbName}" + + Integer loadId = -1 + Integer tableId = -1 + Integer partitionId = -1 + Integer indexId = -1 + Integer bucketId = 0 + Integer schemaHash = -1 + + String reqBody = + """{ + "label": "${loadLabel}", + "tableToPartition": { + "${testTable}": [] + }, + "properties": {} + }""" + + httpTest { + endpoint context.config.feHttpAddress + uri "/api/ingestion_load/internal/${context.dbName}/_create" + op "post" + basicAuthorization context.config.feHttpUser, context.config.feHttpPassword + body reqBody + check { code, resBody -> + assert code == 200 + def resBodyJson = parseJson(resBody) + assert resBodyJson instanceof Map + assert resBodyJson.code == 0 + def data = resBodyJson.data + loadId = data.loadId + def tableMeta = data.tableMeta + tableId = tableMeta["${testTable}"].id + def index = tableMeta["${testTable}"].indexes[0] + indexId = index.indexId + schemaHash = index.schemaHash + partitionId = tableMeta["${testTable}"].partitionInfo.partitions[0].partitionId + } + } + + String resultFileName = "V1.${loadLabel}.${tableId}.${partitionId}.${indexId}.${bucketId}.${schemaHash}.parquet" + logger.info("resultFileName: " + resultFileName) + + Files.copy(Paths.get(dataFile), + Paths.get(context.config.dataPath + "/load_p0/ingestion_load/${resultFileName}"), StandardCopyOption.REPLACE_EXISTING) + + String etlResultFilePath = uploadToHdfs "/load_p0/ingestion_load/${resultFileName}" + + String dppResult = '{\\"isSuccess\\":true,\\"failedReason\\":\\"\\",\\"scannedRows\\":10,\\"fileNumber\\":1,' + + '\\"fileSize\\":2441,\\"normalRows\\":10,\\"abnormalRows\\":0,\\"unselectRows\\":0,' + + '\\"partialAbnormalRows\\":\\"[]\\",\\"scannedBytes\\":0}' + + String updateStatusReqBody = + """{ + "loadId": ${loadId}, + "statusInfo": { + "status": "SUCCESS", + "msg": "", + "appId": "", + "dppResult": "${dppResult}", + "filePathToSize": "{\\"${etlResultFilePath}\\": 81758}", + "hadoopProperties": "{\\"fs.defaultFS\\":\\"${getHdfsFs()}\\",\\"hadoop.username\\":\\"${getHdfsUser()}\\",\\"hadoop.password\\":\\"${getHdfsPasswd()}\\"}" + } + }""" + + httpTest { + endpoint context.config.feHttpAddress + uri "/api/ingestion_load/internal/${context.dbName}/_update" + op "post" + basicAuthorization context.config.feHttpUser, context.config.feHttpPassword + body updateStatusReqBody + check { code, resBody -> + { + assert code == 200 + def resBodyJson = parseJson(resBody) + assert resBodyJson instanceof Map + assert resBodyJson.code == 0 + } + } + } + + alterAction.call() + + max_try_milli_secs = 120000 + while (max_try_milli_secs) { + result = sql "show load where label = '${loadLabel}'" + if (result.size() == 0) { + break + } else { + sleep(5000) // wait 1 second every time + max_try_milli_secs -= 5000 + if (max_try_milli_secs <= 0) { + assertEquals(1, 2) + } + } + } + + } + + if (enableHdfs()) { + + tableName = 'tbl_test_spark_load_drop_table' + + try { + + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} ( + c_int int(11) NULL, + c_char char(15) NULL, + c_varchar varchar(100) NULL, + c_bool boolean NULL, + c_tinyint tinyint(4) NULL, + c_smallint smallint(6) NULL, + c_bigint bigint(20) NULL, + c_largeint largeint(40) NULL, + c_float float NULL, + c_double double NULL, + c_decimal decimal(6, 3) NULL, + c_decimalv3 decimal(6, 3) NULL, + c_date date NULL, + c_datev2 date NULL, + c_datetime datetime NULL, + c_datetimev2 datetime NULL + ) + DUPLICATE KEY(c_int) + DISTRIBUTED BY HASH(c_int) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + """ + + label = "test_ingestion_load_drop_table" + + testIngestLoadJob.call(tableName, label, context.config.dataPath + '/load_p0/ingestion_load/data.parquet', { + sql "DROP TABLE ${tableName}" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} ( + c_int int(11) NULL, + c_char char(15) NULL, + c_varchar varchar(100) NULL, + c_bool boolean NULL, + c_tinyint tinyint(4) NULL, + c_smallint smallint(6) NULL, + c_bigint bigint(20) NULL, + c_largeint largeint(40) NULL, + c_float float NULL, + c_double double NULL, + c_decimal decimal(6, 3) NULL, + c_decimalv3 decimal(6, 3) NULL, + c_date date NULL, + c_datev2 date NULL, + c_datetime datetime NULL, + c_datetimev2 datetime NULL + ) + DUPLICATE KEY(c_int) + DISTRIBUTED BY HASH(c_int) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + """ + }) + + } finally { + sql "DROP TABLE ${tableName}" + } + + } + +} \ No newline at end of file diff --git a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_multi_table.groovy b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_multi_table.groovy new file mode 100644 index 00000000000000..67455d8c692cd3 --- /dev/null +++ b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_multi_table.groovy @@ -0,0 +1,208 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import java.nio.file.Files +import java.nio.file.Paths +import java.nio.file.StandardCopyOption + +suite('test_ingestion_load_multi_table', 'p0') { + + def testIngestLoadJob = { loadLabel, testTable1, testTable2, dataFile1, dataFile2 -> + + sql "TRUNCATE TABLE ${testTable1}" + sql "TRUNCATE TABLE ${testTable2}" + + sql "CLEAN LABEL FROM ${context.dbName}" + + Integer loadId = -1 + Integer tableId = -1 + Integer partitionId = -1 + Integer indexId = -1 + Integer bucketId = 0 + Integer schemaHash = -1 + + String resultFileName1 = "" + String resultFileName2 = "" + + String reqBody = + """{ + "label": "${loadLabel}", + "tableToPartition": { + "${testTable1}": [], + "${testTable2}": [] + }, + "properties": {} + }""" + + httpTest { + endpoint context.config.feHttpAddress + uri "/api/ingestion_load/internal/${context.dbName}/_create" + op "post" + basicAuthorization context.config.feHttpUser, context.config.feHttpPassword + body reqBody + check { code, resBody -> + assert code == 200 + def resBodyJson = parseJson(resBody) + assert resBodyJson instanceof Map + assert resBodyJson.code == 0 + def data = resBodyJson.data + loadId = data.loadId + def tableMeta = data.tableMeta + // table1 + tableId = tableMeta["${testTable1}"].id + def index1 = tableMeta["${testTable1}"].indexes[0] + indexId = index1.indexId + schemaHash = index1.schemaHash + partitionId = tableMeta["${testTable1}"].partitionInfo.partitions[0].partitionId + resultFileName1 = "V1.${loadLabel}.${tableId}.${partitionId}.${indexId}.${bucketId}.${schemaHash}.parquet" + // table2 + tableId = tableMeta["${testTable2}"].id + def index2 = tableMeta["${testTable2}"].indexes[0] + indexId = index2.indexId + schemaHash = index2.schemaHash + partitionId = tableMeta["${testTable2}"].partitionInfo.partitions[0].partitionId + resultFileName2 = "V1.${loadLabel}.${tableId}.${partitionId}.${indexId}.${bucketId}.${schemaHash}.parquet" + } + } + + logger.info("resultFileName1: " + resultFileName1) + logger.info("resultFileName2: " + resultFileName2) + + Files.copy(Paths.get(dataFile1), + Paths.get(context.config.dataPath + "/load_p0/ingestion_load/${resultFileName1}"), StandardCopyOption.REPLACE_EXISTING) + Files.copy(Paths.get(dataFile2), + Paths.get(context.config.dataPath + "/load_p0/ingestion_load/${resultFileName2}"), StandardCopyOption.REPLACE_EXISTING) + + String etlResultFilePath1 = uploadToHdfs "/load_p0/ingestion_load/${resultFileName1}" + String etlResultFilePath2 = uploadToHdfs "/load_p0/ingestion_load/${resultFileName2}" + + String dppResult = '{\\"isSuccess\\":true,\\"failedReason\\":\\"\\",\\"scannedRows\\":10,\\"fileNumber\\":2,' + + '\\"fileSize\\":163516,\\"normalRows\\":10,\\"abnormalRows\\":0,\\"unselectRows\\":0,' + + '\\"partialAbnormalRows\\":\\"[]\\",\\"scannedBytes\\":0}' + + + String updateStatusReqBody = + """{ + "loadId": ${loadId}, + "statusInfo": { + "status": "SUCCESS", + "msg": "", + "appId": "", + "dppResult": "${dppResult}", + "filePathToSize": "{\\"${etlResultFilePath1}\\": 81758, \\"${etlResultFilePath2}\\": 81758}", + "hadoopProperties": "{\\"fs.defaultFS\\":\\"${getHdfsFs()}\\",\\"hadoop.username\\":\\"${getHdfsUser()}\\",\\"hadoop.password\\":\\"${getHdfsPasswd()}\\"}" + } + }""" + + httpTest { + endpoint context.config.feHttpAddress + uri "/api/ingestion_load/internal/${context.dbName}/_update" + op "post" + basicAuthorization context.config.feHttpUser, context.config.feHttpPassword + body updateStatusReqBody + check { code, resBody -> + { + assert code == 200 + def resBodyJson = parseJson(resBody) + assert resBodyJson instanceof Map + assert resBodyJson.code == 0 + } + } + } + + max_try_milli_secs = 60000 + while (max_try_milli_secs) { + result = sql "show load where label = '${loadLabel}'" + if (result[0][2] == "FINISHED") { + sql "sync" + qt_select "select * from ${testTable1} order by c_int" + qt_select "select * from ${testTable2} order by c_int" + break + } else { + sleep(5000) // wait 1 second every time + max_try_milli_secs -= 5000 + if (max_try_milli_secs <= 0) { + assertEquals(1, 2) + } + } + } + + } + + if (enableHdfs()) { + + tableName1 = 'tbl_test_spark_load_multi_1' + + sql """ + CREATE TABLE IF NOT EXISTS ${tableName1} ( + c_int int(11) NULL, + c_char char(15) NULL, + c_varchar varchar(100) NULL, + c_bool boolean NULL, + c_tinyint tinyint(4) NULL, + c_smallint smallint(6) NULL, + c_bigint bigint(20) NULL, + c_largeint largeint(40) NULL, + c_float float NULL, + c_double double NULL, + c_decimal decimal(6, 3) NULL, + c_decimalv3 decimal(6, 3) NULL, + c_date date NULL, + c_datev2 date NULL, + c_datetime datetime NULL, + c_datetimev2 datetime NULL + ) + DISTRIBUTED BY HASH(c_int) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + """ + + tableName2 = 'tbl_test_spark_load_multi_2' + + sql """ + CREATE TABLE IF NOT EXISTS ${tableName2} ( + c_int int(11) NULL, + c_char char(15) NULL, + c_varchar varchar(100) NULL, + c_bool boolean NULL, + c_tinyint tinyint(4) NULL, + c_smallint smallint(6) NULL, + c_bigint bigint(20) NULL, + c_largeint largeint(40) NULL, + c_float float NULL, + c_double double NULL, + c_decimal decimal(6, 3) NULL, + c_decimalv3 decimal(6, 3) NULL, + c_date date NULL, + c_datev2 date NULL, + c_datetime datetime NULL, + c_datetimev2 datetime NULL + ) + DISTRIBUTED BY HASH(c_int) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + """ + + def label = "test_ingestion_load_multi_table" + + testIngestLoadJob.call(label, tableName1, tableName2, context.config.dataPath + '/load_p0/ingestion_load/data.parquet', context.config.dataPath + '/load_p0/ingestion_load/data.parquet') + + } + +} \ No newline at end of file diff --git a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_with_inverted_index.groovy b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_with_inverted_index.groovy new file mode 100644 index 00000000000000..7eed4bfdc58342 --- /dev/null +++ b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_with_inverted_index.groovy @@ -0,0 +1,166 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import java.nio.file.Files +import java.nio.file.Paths +import java.nio.file.StandardCopyOption + +suite('test_ingestion_load_with_inverted_index', 'p0') { + + def testIngestLoadJob = { testTable, loadLabel, String dataFile -> + + sql "TRUNCATE TABLE ${testTable}" + + sql "CLEAN LABEL FROM ${context.dbName}" + + Integer loadId = -1 + Integer tableId = -1 + Integer partitionId = -1 + Integer indexId = -1 + Integer bucketId = 0 + Integer schemaHash = -1 + + String reqBody = + """{ + "label": "${loadLabel}", + "tableToPartition": { + "${testTable}": [] + }, + "properties": {} + }""" + + httpTest { + endpoint context.config.feHttpAddress + uri "/api/ingestion_load/internal/${context.dbName}/_create" + op "post" + basicAuthorization context.config.feHttpUser, context.config.feHttpPassword + body reqBody + check { code, resBody -> + assert code == 200 + def resBodyJson = parseJson(resBody) + assert resBodyJson instanceof Map + assert resBodyJson.code == 0 + def data = resBodyJson.data + loadId = data.loadId + def tableMeta = data.tableMeta + tableId = tableMeta["${testTable}"].id + def index = tableMeta["${testTable}"].indexes[0] + indexId = index.indexId + schemaHash = index.schemaHash + partitionId = tableMeta["${testTable}"].partitionInfo.partitions[0].partitionId + } + } + + String resultFileName = "V1.${loadLabel}.${tableId}.${partitionId}.${indexId}.${bucketId}.${schemaHash}.parquet" + logger.info("resultFileName: " + resultFileName) + + Files.copy(Paths.get(dataFile), + Paths.get(context.config.dataPath + "/load_p0/ingestion_load/${resultFileName}"), StandardCopyOption.REPLACE_EXISTING) + + String etlResultFilePath = uploadToHdfs "/load_p0/ingestion_load/${resultFileName}" + + String dppResult = '{\\"isSuccess\\":true,\\"failedReason\\":\\"\\",\\"scannedRows\\":10,\\"fileNumber\\":1,' + + '\\"fileSize\\":2441,\\"normalRows\\":10,\\"abnormalRows\\":0,\\"unselectRows\\":0,' + + '\\"partialAbnormalRows\\":\\"[]\\",\\"scannedBytes\\":0}' + + String updateStatusReqBody = + """{ + "loadId": ${loadId}, + "statusInfo": { + "status": "SUCCESS", + "msg": "", + "appId": "", + "dppResult": "${dppResult}", + "filePathToSize": "{\\"${etlResultFilePath}\\": 81758}", + "hadoopProperties": "{\\"fs.defaultFS\\":\\"${getHdfsFs()}\\",\\"hadoop.username\\":\\"${getHdfsUser()}\\",\\"hadoop.password\\":\\"${getHdfsPasswd()}\\"}" + } + }""" + + httpTest { + endpoint context.config.feHttpAddress + uri "/api/ingestion_load/internal/${context.dbName}/_update" + op "post" + basicAuthorization context.config.feHttpUser, context.config.feHttpPassword + body updateStatusReqBody + check { code, resBody -> + { + assert code == 200 + def resBodyJson = parseJson(resBody) + assert resBodyJson instanceof Map + assert resBodyJson.code == 0 + } + } + } + + def max_try_milli_secs = 120000 + while (max_try_milli_secs) { + def result = sql "show load where label = '${loadLabel}'" + if (result[0][2] == "FINISHED") { + sql "sync" + qt_select "select * from ${testTable} order by 1" + break + } else { + sleep(5000) // wait 1 second every time + max_try_milli_secs -= 5000 + if (max_try_milli_secs <= 0) { + assertEquals(1, 2) + } + } + } + + } + + if (enableHdfs()) { + + def tableName = 'test_ingestion_load_with_inverted_index' + + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} ( + c_int int(11) NULL, + c_char char(15) NULL, + c_varchar varchar(100) NULL, + c_bool boolean NULL, + c_tinyint tinyint(4) NULL, + c_smallint smallint(6) NULL, + c_bigint bigint(20) NULL, + c_largeint largeint(40) NULL, + c_float float NULL, + c_double double NULL, + c_decimal decimal(6, 3) NULL, + c_decimalv3 decimal(6, 3) NULL, + c_date date NULL, + c_datev2 date NULL, + c_datetime datetime NULL, + c_datetimev2 datetime NULL, + INDEX idx_c_varchar(c_varchar) USING INVERTED, + INDEX idx_c_bigint(c_bigint) USING INVERTED, + INDEX idx_c_datetimev2(c_datetimev2) USING INVERTED + ) + DUPLICATE KEY(c_int) + DISTRIBUTED BY HASH(c_int) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + """ + + def label = "test_ingestion_load_with_inverted_index" + + testIngestLoadJob.call(tableName, label, context.config.dataPath + '/load_p0/ingestion_load/data.parquet') + + } + +} diff --git a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_with_partition.groovy b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_with_partition.groovy new file mode 100644 index 00000000000000..97ebb7a0761067 --- /dev/null +++ b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_with_partition.groovy @@ -0,0 +1,160 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import java.nio.file.Files +import java.nio.file.Paths +import java.nio.file.StandardCopyOption + +suite('test_ingestion_load_with_partition', 'p0') { + + def testIngestLoadJob = { testTable, loadLabel, dataFiles -> + + sql "TRUNCATE TABLE ${testTable}" + + sql "CLEAN LABEL FROM ${context.dbName}" + + Integer loadId = -1 + Integer tableId = -1 + Integer partitionId = -1 + Integer indexId = -1 + Integer bucketId = 0 + Integer schemaHash = -1 + + String reqBody = + """{ + "label": "${loadLabel}", + "tableToPartition": { + "${testTable}": [] + }, + "properties": {} + }""" + + resultFileNames = [] + + httpTest { + endpoint context.config.feHttpAddress + uri "/api/ingestion_load/internal/${context.dbName}/_create" + op "post" + basicAuthorization context.config.feHttpUser, context.config.feHttpPassword + body reqBody + check { code, resBody -> + assert code == 200 + def resBodyJson = parseJson(resBody) + assert resBodyJson instanceof Map + assert resBodyJson.code == 0 + def data = resBodyJson.data + loadId = data.loadId + def tableMeta = data.tableMeta + tableId = tableMeta["${testTable}"].id + def index = tableMeta["${testTable}"].indexes[0] + indexId = index.indexId + schemaHash = index.schemaHash + partitions = tableMeta["${testTable}"].partitionInfo.partitions + for(partition in partitions) { + logger.info("partitionId: " + partition.partitionId) + resultFileNames.add("V1.${loadLabel}.${tableId}.${partition.partitionId}.${indexId}.${bucketId}.${schemaHash}.parquet") + } + } + } + + etlResultFilePaths = [] + for(int i=0; i < dataFiles.size(); i++) { + Files.copy(Paths.get(dataFiles[i]), + Paths.get(context.config.dataPath + "/load_p0/ingestion_load/${resultFileNames[i]}"), StandardCopyOption.REPLACE_EXISTING) + String etlResultFilePath = uploadToHdfs "/load_p0/ingestion_load/${resultFileNames[i]}" + logger.info("etlResultFilePath: " + etlResultFilePath) + etlResultFilePaths.add(etlResultFilePath) + } + + String dppResult = '{\\"isSuccess\\":true,\\"failedReason\\":\\"\\",\\"scannedRows\\":10,\\"fileNumber\\":1,' + + '\\"fileSize\\":2441,\\"normalRows\\":10,\\"abnormalRows\\":0,\\"unselectRows\\":0,' + + '\\"partialAbnormalRows\\":\\"[]\\",\\"scannedBytes\\":0}' + + String updateStatusReqBody = + """{ + "loadId": ${loadId}, + "statusInfo": { + "status": "SUCCESS", + "msg": "", + "appId": "", + "dppResult": "${dppResult}", + "filePathToSize": "{\\"${etlResultFilePaths.get(0)}\\":851,\\"${etlResultFilePaths.get(1)}\\":781,\\"${etlResultFilePaths.get(2)}\\":781,\\"${etlResultFilePaths.get(3)}\\":839}", + "hadoopProperties": "{\\"fs.defaultFS\\":\\"${getHdfsFs()}\\",\\"hadoop.username\\":\\"${getHdfsUser()}\\",\\"hadoop.password\\":\\"${getHdfsPasswd()}\\"}" + } + }""" + + httpTest { + endpoint context.config.feHttpAddress + uri "/api/ingestion_load/internal/${context.dbName}/_update" + op "post" + basicAuthorization context.config.feHttpUser, context.config.feHttpPassword + body updateStatusReqBody + check { code, resBody -> + { + assert code == 200 + def resBodyJson = parseJson(resBody) + assert resBodyJson instanceof Map + assert resBodyJson.code == 0 + } + } + } + + max_try_milli_secs = 120000 + while (max_try_milli_secs) { + result = sql "show load where label = '${loadLabel}'" + if (result[0][2] == "FINISHED") { + sql "sync" + qt_select "select c1, count(*) from ${testTable} group by c1 order by c1" + break + } else { + sleep(5000) // wait 1 second every time + max_try_milli_secs -= 5000 + if (max_try_milli_secs <= 0) { + assertEquals(1, 2) + } + } + } + + } + + if (enableHdfs()) { + + tableName = 'tbl_test_spark_load_partition' + + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} ( + c0 int not null, + c1 date, + c2 varchar(64) + ) + DUPLICATE KEY(c0) + PARTITION BY RANGE(c1) ( + FROM ("2024-09-01") TO ("2024-09-05") INTERVAL 1 DAY + ) + DISTRIBUTED BY HASH(c0) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + """ + + def label = "test_ingestion_load_partition" + + testIngestLoadJob.call(tableName, label, [context.config.dataPath + '/load_p0/ingestion_load/data2-0.parquet', context.config.dataPath + '/load_p0/ingestion_load/data2-1.parquet',context.config.dataPath + '/load_p0/ingestion_load/data2-2.parquet',context.config.dataPath + '/load_p0/ingestion_load/data2-3.parquet']) + + } + +} \ No newline at end of file From 4123798e29eda56903f0bfdead3ede490f693910 Mon Sep 17 00:00:00 2001 From: gnehil Date: Mon, 14 Jul 2025 17:19:38 +0800 Subject: [PATCH 2/2] format code --- be/src/olap/push_handler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/olap/push_handler.cpp b/be/src/olap/push_handler.cpp index 896c472f79abe2..9f1a9628f2ceba 100644 --- a/be/src/olap/push_handler.cpp +++ b/be/src/olap/push_handler.cpp @@ -40,8 +40,8 @@ #include "common/config.h" #include "common/logging.h" #include "common/status.h" -#include "olap/cumulative_compaction_time_series_policy.h" #include "io/hdfs_builder.h" +#include "olap/cumulative_compaction_time_series_policy.h" #include "olap/delete_handler.h" #include "olap/olap_define.h" #include "olap/rowset/pending_rowset_helper.h"