From ad80db5e62402b64021de81baff0fe40a2f155f8 Mon Sep 17 00:00:00 2001 From: deardeng <565620795@qq.com> Date: Tue, 3 May 2022 23:37:33 +0800 Subject: [PATCH 1/4] fix #9351 can't load parquet file with column name case sensitive with Doris column --- .../src/main/java/org/apache/doris/load/Load.java | 15 +++++++++++++-- .../org/apache/doris/planner/BrokerScanNode.java | 4 ++-- .../apache/doris/planner/StreamLoadScanNode.java | 4 ++-- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/Load.java b/fe/fe-core/src/main/java/org/apache/doris/load/Load.java index 905f9507378a81..079f41f4649d3d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/Load.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/Load.java @@ -1045,12 +1045,23 @@ private static void initColumns(Table tbl, List columnExprs, return; } + Set tmpSet = Sets.newHashSet(); + for (ImportColumnDesc importColumnDesc : copiedColumnExprs) { + if (importColumnDesc.getExpr() == null) { + tmpSet.add(importColumnDesc.getColumnName()); + } + } + // init slot desc add expr map, also transform hadoop functions for (ImportColumnDesc importColumnDesc : copiedColumnExprs) { // make column name case match with real column name String columnName = importColumnDesc.getColumnName(); - String realColName = tbl.getColumn(columnName) == null ? columnName - : tbl.getColumn(columnName).getName(); + String realColName; + if (tbl.getColumn(columnName) == null || tmpSet.contains(columnName) ){ + realColName = columnName; + } else { + realColName = tbl.getColumn(columnName).getName(); + } if (importColumnDesc.getExpr() != null) { Expr expr = transformHadoopFunctionExpr(tbl, realColName, importColumnDesc.getExpr()); exprsByName.put(realColName, expr); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java index 73aa1fb04d3cf9..7faacde1dbf82f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java @@ -247,8 +247,8 @@ private void initParams(ParamCreateContext context) */ private void initColumns(ParamCreateContext context) throws UserException { context.srcTupleDescriptor = analyzer.getDescTbl().createTupleDescriptor(); - context.slotDescByName = Maps.newHashMap(); - context.exprMap = Maps.newHashMap(); + context.slotDescByName = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); + context.exprMap = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); // for load job, column exprs is got from file group // for query, there is no column exprs, they will be got from table's schema in "Load.initColumns" diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java index 0930bad5f0036f..f03b7d2f4c5ac8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java @@ -66,8 +66,8 @@ public class StreamLoadScanNode extends LoadScanNode { private TupleDescriptor srcTupleDesc; private TBrokerScanRange brokerScanRange; - private Map slotDescByName = Maps.newHashMap(); - private Map exprsByName = Maps.newHashMap(); + private Map slotDescByName = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); + private Map exprsByName = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); // used to construct for streaming loading public StreamLoadScanNode( From 9fb63e7ba4706d9802a04e88837c43e2d3810d3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=93=E9=91=AB?= Date: Sat, 7 May 2022 20:39:21 +0800 Subject: [PATCH 2/4] #9351 add some comments --- fe/fe-core/src/main/java/org/apache/doris/load/Load.java | 5 +++-- .../java/org/apache/doris/planner/StreamLoadScanNode.java | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/Load.java b/fe/fe-core/src/main/java/org/apache/doris/load/Load.java index 079f41f4649d3d..0c595e355300b2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/Load.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/Load.java @@ -1045,6 +1045,7 @@ private static void initColumns(Table tbl, List columnExprs, return; } + // load sql '(a, b)set(c=a, d=b)', tmpSet save {a,b}. Set tmpSet = Sets.newHashSet(); for (ImportColumnDesc importColumnDesc : copiedColumnExprs) { if (importColumnDesc.getExpr() == null) { @@ -1057,10 +1058,10 @@ private static void initColumns(Table tbl, List columnExprs, // make column name case match with real column name String columnName = importColumnDesc.getColumnName(); String realColName; - if (tbl.getColumn(columnName) == null || tmpSet.contains(columnName) ){ + if (tbl.getColumn(columnName) == null || tmpSet.contains(columnName)) { realColName = columnName; } else { - realColName = tbl.getColumn(columnName).getName(); + realColName = tbl.getColumn(columnName).getName(); } if (importColumnDesc.getExpr() != null) { Expr expr = transformHadoopFunctionExpr(tbl, realColName, importColumnDesc.getExpr()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java index f03b7d2f4c5ac8..0702383b394566 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java @@ -66,6 +66,9 @@ public class StreamLoadScanNode extends LoadScanNode { private TupleDescriptor srcTupleDesc; private TBrokerScanRange brokerScanRange; + // If use case sensitive map, for example, + // the column name 「A」 in the table and the mapping '(a) set (A = a)' in load sql, + // Slotdescbyname stores「a」, later will use 「a」to get table's 「A」 column info, will throw exception. private Map slotDescByName = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); private Map exprsByName = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); From ddad03a5df5a1eaede039fe969057d49686ef03f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=93=E9=91=AB?= Date: Sat, 7 May 2022 20:47:43 +0800 Subject: [PATCH 3/4] #9351 add some comments --- .../java/org/apache/doris/planner/StreamLoadScanNode.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java index 0702383b394566..65a265d9c56009 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java @@ -69,8 +69,8 @@ public class StreamLoadScanNode extends LoadScanNode { // If use case sensitive map, for example, // the column name 「A」 in the table and the mapping '(a) set (A = a)' in load sql, // Slotdescbyname stores「a」, later will use 「a」to get table's 「A」 column info, will throw exception. - private Map slotDescByName = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); - private Map exprsByName = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); + private final Map slotDescByName = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); + private final Map exprsByName = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); // used to construct for streaming loading public StreamLoadScanNode( From fd525209290db86e8e9d85e730efd272b968dfb4 Mon Sep 17 00:00:00 2001 From: deardeng <565620795@qq.com> Date: Mon, 9 May 2022 11:27:40 +0800 Subject: [PATCH 4/4] #9351 fix reviewed --- .../src/main/java/org/apache/doris/load/Load.java | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/Load.java b/fe/fe-core/src/main/java/org/apache/doris/load/Load.java index 0c595e355300b2..ea381977e24f78 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/Load.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/Load.java @@ -1045,20 +1045,12 @@ private static void initColumns(Table tbl, List columnExprs, return; } - // load sql '(a, b)set(c=a, d=b)', tmpSet save {a,b}. - Set tmpSet = Sets.newHashSet(); - for (ImportColumnDesc importColumnDesc : copiedColumnExprs) { - if (importColumnDesc.getExpr() == null) { - tmpSet.add(importColumnDesc.getColumnName()); - } - } - // init slot desc add expr map, also transform hadoop functions for (ImportColumnDesc importColumnDesc : copiedColumnExprs) { // make column name case match with real column name String columnName = importColumnDesc.getColumnName(); String realColName; - if (tbl.getColumn(columnName) == null || tmpSet.contains(columnName)) { + if (tbl.getColumn(columnName) == null || importColumnDesc.getExpr() == null) { realColName = columnName; } else { realColName = tbl.getColumn(columnName).getName();